From 61e16733c882f5c6dca9c8e98aaf54b8596045df Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 15:12:17 -0700 Subject: [PATCH 1/6] Capture diffs of dist tarballs vs auto-generated source archives from GitHub --- autospec/git.py | 1 + autospec/tarball.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/autospec/git.py b/autospec/git.py index dfe3933d..2a558cd1 100644 --- a/autospec/git.py +++ b/autospec/git.py @@ -113,6 +113,7 @@ def commit_to_git(config, name, success): call("git add whatrequires", check=False, stderr=subprocess.DEVNULL, cwd=path) call("git add description", check=False, stderr=subprocess.DEVNULL, cwd=path) call("git add attrs", check=False, stderr=subprocess.DEVNULL, cwd=path) + call("git add archive.diff", check=False, stderr=subprocess.DEVNULL, cwd=path) # remove deprecated config files call("git rm make_install_append", check=False, stderr=subprocess.DEVNULL, cwd=path) diff --git a/autospec/tarball.py b/autospec/tarball.py index fe50acce..a9e32cf8 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -26,7 +26,7 @@ import zipfile import download -from util import do_regex, get_sha1sum, print_fatal, write_out +from util import call, do_regex, get_sha1sum, print_fatal, write_out class Source(): @@ -184,6 +184,7 @@ def __init__(self, url, name, version, archives, config, base_path): self.prefixes = dict() self.config = config self.base_path = base_path + self.autogenerated_tarball = None def write_upstream(self, sha, tarfile, mode="w"): """Write the upstream hash to the upstream file.""" @@ -213,6 +214,12 @@ def process_main_source(self, url): main_src = Source(url, '', src_path, self.config.default_pattern) return main_src + def process_autogenerated_source(self, url): + """Download any autogenerated source tarball for comparison.""" + src_path = self.check_or_get_file(url, os.path.basename(url)) + autogenerated_src = Source(url, '../autogenerated', src_path, self.config.default_pattern) + return autogenerated_src + def print_header(self): """Print header for autospec run.""" print("\n") @@ -295,6 +302,9 @@ def name_and_version(self, filemanager): name = re.sub(r"release-", '', name) name = re.sub(r"\d*$", '', name) self.rawname = name + # Identify the auto-generated tarball URL for comparison + if "/releases/download/" in self.url: + self.autogenerated_tarball = "https://github.com/" + match.group(1).strip() + "/" + self.repo + "/archive/refs/tags/" + match.group(3).strip() + ".tar.gz" version = match.group(3).replace(name, '') if "/archive/" not in self.url: version = re.sub(r"^[-_.a-zA-Z]+", "", version) @@ -427,3 +437,10 @@ def process(self, filemanager): archives_src = self.process_archives() # Extract all sources self.extract_sources(main_src, archives_src) + # Download and process any auto-generated source-tree archive for comparison + autogenerated_src = self.process_autogenerated_source(self.autogenerated_tarball) + # Extract autogenerated source for comparison + if autogenerated_src: + autogenerated_src.extract(os.path.join(self.base_path, 'autogenerated')) + call(f"diff -u -r autogenerated/{autogenerated_src.prefix} {main_src.prefix}", + logfile="archive.diff", check=False, cwd=self.base_path) From 6174188098f65444e03f4beb4cafecd121aa446f Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 15:25:16 -0700 Subject: [PATCH 2/6] Fix failure on packages that don't have an autogenerated source tarball --- autospec/tarball.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/autospec/tarball.py b/autospec/tarball.py index a9e32cf8..650b8449 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -216,8 +216,10 @@ def process_main_source(self, url): def process_autogenerated_source(self, url): """Download any autogenerated source tarball for comparison.""" - src_path = self.check_or_get_file(url, os.path.basename(url)) - autogenerated_src = Source(url, '../autogenerated', src_path, self.config.default_pattern) + autogenerated_src = None + if url: + src_path = self.check_or_get_file(url, os.path.basename(url)) + autogenerated_src = Source(url, '../autogenerated', src_path, self.config.default_pattern) return autogenerated_src def print_header(self): From 2cf51ddfb9d1612f32fdeb994f2ca428767df7cb Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 15:52:45 -0700 Subject: [PATCH 3/6] Move autogenerated source tree to a versionless directory --- autospec/tarball.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/autospec/tarball.py b/autospec/tarball.py index 650b8449..973c31d9 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -219,7 +219,7 @@ def process_autogenerated_source(self, url): autogenerated_src = None if url: src_path = self.check_or_get_file(url, os.path.basename(url)) - autogenerated_src = Source(url, '../autogenerated', src_path, self.config.default_pattern) + autogenerated_src = Source(url, '../autogenerated-tmp', src_path, self.config.default_pattern) return autogenerated_src def print_header(self): @@ -443,6 +443,8 @@ def process(self, filemanager): autogenerated_src = self.process_autogenerated_source(self.autogenerated_tarball) # Extract autogenerated source for comparison if autogenerated_src: - autogenerated_src.extract(os.path.join(self.base_path, 'autogenerated')) - call(f"diff -u -r autogenerated/{autogenerated_src.prefix} {main_src.prefix}", - logfile="archive.diff", check=False, cwd=self.base_path) + autogenerated_src.extract(os.path.join(self.base_path, 'autogenerated-tmp')) + # Move the autogenerated source to a non-version-named directory for consistent diffs + call(f"mv autogenerated-tmp/{autogenerated_src.prefix} autogenerated", check=True, cwd=self.base_path) + call(f"diff -u -r ../autogenerated ./", + logfile="archive.diff", check=False, cwd=os.path.join(self.base_path, main_src.prefix)) From 617c67c388fe5b8e4350968fb135f77b464df0ae Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 16:00:38 -0700 Subject: [PATCH 4/6] Make flake8 happy --- autospec/tarball.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autospec/tarball.py b/autospec/tarball.py index 973c31d9..1fee539b 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -446,5 +446,5 @@ def process(self, filemanager): autogenerated_src.extract(os.path.join(self.base_path, 'autogenerated-tmp')) # Move the autogenerated source to a non-version-named directory for consistent diffs call(f"mv autogenerated-tmp/{autogenerated_src.prefix} autogenerated", check=True, cwd=self.base_path) - call(f"diff -u -r ../autogenerated ./", + call("diff -u -r ../autogenerated ./", logfile="archive.diff", check=False, cwd=os.path.join(self.base_path, main_src.prefix)) From 16ee88cb20500a7b31671962f9554a0e07404178 Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 16:59:54 -0700 Subject: [PATCH 5/6] Don't write source archive comparison tarball to upstreams file We only use this for source tree comparison, not part of the build, so don't add it to the upstreams file. --- autospec/tarball.py | 2 +- autospec/util.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/autospec/tarball.py b/autospec/tarball.py index 1fee539b..65d8d784 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -218,7 +218,7 @@ def process_autogenerated_source(self, url): """Download any autogenerated source tarball for comparison.""" autogenerated_src = None if url: - src_path = self.check_or_get_file(url, os.path.basename(url)) + src_path = self.check_or_get_file(url, os.path.basename(url), mode="") autogenerated_src = Source(url, '../autogenerated-tmp', src_path, self.config.default_pattern) return autogenerated_src diff --git a/autospec/util.py b/autospec/util.py index b89552b8..aa924351 100644 --- a/autospec/util.py +++ b/autospec/util.py @@ -150,8 +150,9 @@ def binary_in_path(binary): def write_out(filename, content, mode="w"): """File.write convenience wrapper.""" - with open_auto(filename, mode) as require_f: - require_f.write(content) + if mode: + with open_auto(filename, mode) as require_f: + require_f.write(content) def open_auto(*args, **kwargs): From 239d8cd6189be2cacd46219dfb0d61c8dd9cc743 Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 15 Apr 2024 17:13:34 -0700 Subject: [PATCH 6/6] Limit diff to ~10 lines Also include content of added files. --- autospec/tarball.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/autospec/tarball.py b/autospec/tarball.py index 65d8d784..38092a20 100644 --- a/autospec/tarball.py +++ b/autospec/tarball.py @@ -446,5 +446,8 @@ def process(self, filemanager): autogenerated_src.extract(os.path.join(self.base_path, 'autogenerated-tmp')) # Move the autogenerated source to a non-version-named directory for consistent diffs call(f"mv autogenerated-tmp/{autogenerated_src.prefix} autogenerated", check=True, cwd=self.base_path) - call("diff -u -r ../autogenerated ./", - logfile="archive.diff", check=False, cwd=os.path.join(self.base_path, main_src.prefix)) + call("diff -u -r --unidirectional-new-file ../autogenerated ./", + logfile="archive.diff.in", check=False, cwd=os.path.join(self.base_path, main_src.prefix)) + call("grep -A14 -E '^(diff|Only in)' archive.diff.in", + logfile="archive.diff", check=False, cwd=os.getcwd()) + call("rm archive.diff.in", check=False, cwd=os.getcwd())