diff --git a/processinghistory/history.py b/processinghistory/history.py index e7e8520..a492e07 100644 --- a/processinghistory/history.py +++ b/processinghistory/history.py @@ -30,6 +30,15 @@ value being a list of keys of the parents of that file. This dictionary stores all the ancestry relationships for the whole lineage. +History in VRT files +-------------------- +A GDAL VRT file is handled as a somewhat special case. The component files +of the VRT are treated as parents of the VRT (and there can be no other parents), +and the history of those files is read directly from them, rather than being +copied into the VRT. This is handled transparently, so that when history +is read from the VRT, it appears to have all come from there. This allows the +history of the components to be as dynamic as the data itself. + """ import sys import os @@ -54,6 +63,7 @@ PARENTS_BY_KEY = "parentsByKey" AUTOENVVARSLIST_NAME = "HISTORY_ENVVARS_TO_AUTOINCLUDE" NO_TIMESTAMP = "UnknownTimestamp" +TIMESTAMP = "timestamp" # These GDAL drivers are known to have limits on the size of metadata which # can be stored, and so we need to keep below these, or we lose everything. @@ -70,9 +80,36 @@ def __init__(self): self.metadataByKey = {} self.parentsByKey = {} + def addParentHistory(self, parentfile): + """ + Add history from parent file to self + """ + parentHist = readHistoryFromFile(filename=parentfile) + + if parentHist is not None: + key = (os.path.basename(parentfile), + parentHist.metadataByKey[CURRENTFILE_KEY][TIMESTAMP]) + + # Convert parent's "currentfile" metadata and parentage to normal key entries + self.metadataByKey[key] = parentHist.metadataByKey[CURRENTFILE_KEY] + self.parentsByKey[key] = parentHist.parentsByKey[CURRENTFILE_KEY] + + # Remove those from parentHist + parentHist.metadataByKey.pop(CURRENTFILE_KEY) + parentHist.parentsByKey.pop(CURRENTFILE_KEY) + + # Copy over all the other ancestor metadata and parentage + self.metadataByKey.update(parentHist.metadataByKey) + self.parentsByKey.update(parentHist.parentsByKey) + else: + key = (os.path.basename(parentfile), NO_TIMESTAMP) + + # Add this parent as parent of current file + self.parentsByKey[CURRENTFILE_KEY].append(key) + def toJSON(self): """ - Return a JSON representation of the given ProcessingHistory + Return a JSON representation of the current ProcessingHistory """ d = { METADATA_BY_KEY: {}, @@ -131,7 +168,7 @@ def makeAutomaticFields(): dictn = {} # Time stamp formatted as per ISO 8601 standard, including time zone offset - dictn['timestamp'] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.localtime()) + dictn[TIMESTAMP] = time.strftime("%Y-%m-%d %H:%M:%S%z", time.localtime()) dictn['login'] = getpass.getuser() @@ -239,8 +276,6 @@ def writeHistoryToFile(userDict={}, parents=[], *, filename=None, gdalDS=None): File can be specified as either a filename string or an open GDAL Dataset """ - procHist = makeProcessingHistory(userDict, parents) - if filename is not None: ds = gdal.Open(filename, gdal.GA_Update) else: @@ -250,6 +285,12 @@ def writeHistoryToFile(userDict={}, parents=[], *, filename=None, gdalDS=None): raise ProcessingHistoryError("Must supply either filename or gdalDS") drvrName = ds.GetDriver().ShortName + isVRT = (drvrName == "VRT") + if isVRT and len(parents) > 0: + msg = "History for VRT files should not have parents" + raise ProcessingHistoryError(msg) + + procHist = makeProcessingHistory(userDict, parents) # Convert to JSON procHistJSON = procHist.toJSON() @@ -295,28 +336,7 @@ def makeProcessingHistory(userDict, parents): # Now add history from each parent file procHist.parentsByKey[CURRENTFILE_KEY] = [] for parentfile in parents: - parentHist = readHistoryFromFile(filename=parentfile) - - if parentHist is not None: - key = (os.path.basename(parentfile), - parentHist.metadataByKey[CURRENTFILE_KEY]['timestamp']) - - # Convert parent's "currentfile" metadata and parentage to normal key entries - procHist.metadataByKey[key] = parentHist.metadataByKey[CURRENTFILE_KEY] - procHist.parentsByKey[key] = parentHist.parentsByKey[CURRENTFILE_KEY] - - # Remove those from parentHist - parentHist.metadataByKey.pop(CURRENTFILE_KEY) - parentHist.parentsByKey.pop(CURRENTFILE_KEY) - - # Copy over all the other ancestor metadata and parentage - procHist.metadataByKey.update(parentHist.metadataByKey) - procHist.parentsByKey.update(parentHist.parentsByKey) - else: - key = (os.path.basename(parentfile), NO_TIMESTAMP) - - # Add this parent as parent of current file - procHist.parentsByKey[CURRENTFILE_KEY].append(key) + procHist.addParentHistory(parentfile) return procHist @@ -342,6 +362,20 @@ def readHistoryFromFile(filename=None, gdalDS=None): if procHistJSON is not None: procHist = ProcessingHistory.fromJSON(procHistJSON) + + # If this is a VRT, then read the component files as though they were + # parent files + isVRT = (ds.GetDriver().ShortName == "VRT") + if isVRT: + vrtFile = ds.GetDescription() + componentList = [fn for fn in ds.GetFileList() if fn != vrtFile] + for componentFile in componentList: + if not os.path.exists(componentFile): + msg = (f"VRT file '{vrtFile}' missing component " + + f"'{componentFile}'") + raise ProcessingHistoryError(msg) + + procHist.addParentHistory(componentFile) else: procHist = None