Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
dbbd5b3
Add support for rosdistro cache to hold the README and CHANGELOG cont…
tfoote Jun 13, 2025
7673ee5
get everything working
tfoote Jun 13, 2025
c8f0247
add support for caching other content than package.xml
tfoote Jun 18, 2025
5051dcb
Add timestamp of last update
tfoote Jun 19, 2025
69ab352
Add rate limiting backoff on url fetches
tfoote Jun 19, 2025
ce2be60
use filepath in git plugin
tfoote Jun 19, 2025
32efafc
fix clone logic for source packages and add a little debug
tfoote Jun 19, 2025
667443e
add source cache linking logic for README.md and CHANGELOG.rst
tfoote Jun 19, 2025
4fbaf6d
Add support for changelog and readme to github source provider
tfoote Jun 19, 2025
9998ad2
Fixup remove debug
tfoote Jun 19, 2025
0a5235e
add debug instrumentation
tfoote Jun 20, 2025
2f085b8
filepaths for all source manifest providers
tfoote Jun 20, 2025
cd9669f
fix bug in compression logic
tfoote Jun 20, 2025
886c3b1
debug for clearing content visibilty
tfoote Jun 20, 2025
be0a62e
better variable naming
tfoote Jun 20, 2025
25f8cce
remove debug statement
tfoote Jun 20, 2025
372e2fb
Differentiate which fetch is running
tfoote Jun 20, 2025
d673ca6
proof of concept truncating extra content
tfoote Jun 23, 2025
86fbcde
Only update timestamp if something is crawled
tfoote Jun 24, 2025
d85c93d
improve readability of status messages, todo magic number
tfoote Jun 24, 2025
70c9934
remove magic number
tfoote Jun 24, 2025
8b8e32b
extend truncation message
tfoote Jun 24, 2025
23e9b86
Make docs truncation idempotent
tfoote Jun 24, 2025
6228ae2
simplify deduplication logic
tfoote Jun 24, 2025
9bd8a78
Allow the collapse of any long string >300 right now
tfoote Jun 24, 2025
b2cb88f
truncate on initial insertion of docs
tfoote Jun 24, 2025
56407dd
clearer cache loading message
tfoote Jun 24, 2025
9657a2e
add debugging config option for faster development cycles
tfoote Jun 25, 2025
2982685
fix loud load typo
tfoote Jun 25, 2025
63ba475
restore items with new API
tfoote Jun 25, 2025
ff86598
refactor to generic _resources from package_xmls
tfoote Jul 4, 2025
f60979e
remove debugging truncate, it's in the sanitizer now
tfoote Jul 4, 2025
94c5fd1
multi resource support for git
tfoote Jul 7, 2025
47049ea
Print out url string not Result address
tfoote Dec 27, 2025
6e184ca
First draft at a schema
tfoote Dec 27, 2025
9b19db3
schema running for intermediate state
tfoote Dec 27, 2025
f432591
catch last_update_time and update SourceRepositoryCache docs
tfoote Dec 27, 2025
b7e0c2b
add environment variable to not wait as long for timeouts
tfoote Dec 27, 2025
b18a733
fix exception variable scopes
tfoote Dec 27, 2025
e730cc4
Switch release cache to release_resources as a dict avoid special cas…
tfoote Dec 27, 2025
93ab5cf
improve visibility of progress on release updates
tfoote Dec 27, 2025
a69bdf5
Update the loading process and self declaration
tfoote Dec 27, 2025
66cc8aa
backwards compatibility for self.package_xmls
tfoote Dec 27, 2025
c3c2cc6
deduplication logic is updated for the new structure
tfoote Dec 27, 2025
03c2293
clean up last update time logic
tfoote Dec 27, 2025
454afa1
Store the release_resources in the ReleaseCache
tfoote Jan 4, 2026
33e6af5
fix backwards compat insertion
tfoote Jan 4, 2026
813e7b6
add extra resource initializaiton to test mock
tfoote Jan 4, 2026
d6b5548
add missing import
tfoote Jan 4, 2026
a86fe10
Iterate filepaths correctly
tfoote Jan 4, 2026
ef1ee32
update tests for new storage format
tfoote Jan 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions dev_test/rosdistro_cache_3.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/ros-infrastructure/rosdistro/TODO/rosdistro_cache_3.schema.json",
"title": "ROSDistro Cache Format 3 Schema",
"description": "Cache Format 3 for rosdistro",
"type": "object",
"properties": {
"version": {
"description": "The version of the rosdistro cache",
"type": "integer",
"minimum": 3,
"maximum": 3
},
"type": {
"description": "Clarifying the file type",
"type": "string"
},
"source_repo_resources": {
"type": "object",
"patternProperties": {
"^.*$": {"$ref": "#/$defs/repository_resources"}
}
},
"release_resources": {
"type": "object",
"additionalProperties": {"$ref": "#/$defs/package_resources"},
"properties": {
"_last_update_time": {
"type": "string"
},
"version": {
"type": "string"
}
},
"required": ["version"]
},
"additionalProperties": false
},
"distribution_file": {
"type": "object",
"description": "rosdistro distribution_file"
},
"name": {
"type": "string",
"description": "The name of the distribution being cached"
},
"required": ["source_repo_resources", "distribution_file", "name"],
"$defs": {
"repository_resources": {
"type": "object",
"additionalProperties": {"$ref": "#/$defs/package_resources"},
"properties": {
"_last_update_time": {
"type": "string"
},
"_ref": {
"type": "string"
}
},
"required": ["_ref"]
},
"package_resources": {
"type": "object",
"properties": {
"CHANGELOG.rst": {
"type": "string",
"description": "Contents of the CHANGELOG.rst if it's available"
},
"package.xml": {
"type": "string",
"description": "Contents of the package.xml if it's available"
},
"package_path": {
"type": "string",
"description": "The package_path if it's available"
},
Comment on lines +73 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

"README.md": {
"type": "string",
"description": "Contents of the README.md if it's available"
}
}
}
}
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# - src/rosdistro/__init__.py
# - stdeb.cfg
'version': '1.0.1',
'install_requires': ['PyYAML', 'setuptools'],
'install_requires': ['PyYAML', 'setuptools', 'jsonschema'],
'python_requires': '>=3.6',
'packages': find_packages('src'),
'package_dir': {'': 'src'},
Expand Down
45 changes: 30 additions & 15 deletions src/rosdistro/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,17 @@ def __init__(self, distribution_file, manifest_providers=None, source_manifest_p
if source_manifest_providers is not None:
self._source_manifest_providers = source_manifest_providers

self._release_package_xmls = {}
self._source_repo_package_xmls = {}
self._release_resources = {}
self._source_repo_resources = {}

def __getattr__(self, name):
return getattr(self._distribution_file, name)

def get_release_package_xml(self, pkg_name):
if pkg_name not in self._release_package_xmls:

def get_release_resource(self, pkg_name, filepath):
if pkg_name not in self._release_resources:
self._release_resources[pkg_name] = {}
if not self._release_resources[pkg_name].get(filepath, None):
pkg = self._distribution_file.release_packages[pkg_name]
repo_name = pkg.repository_name
repo = self._distribution_file.repositories[repo_name]
Expand All @@ -73,33 +76,45 @@ def get_release_package_xml(self, pkg_name):
repo = repo.release_repository
if repo.version is None:
return None
package_xml = None
for mp in self._manifest_providers:
package_xml = mp(self._distribution_file.name, repo, pkg_name)
if package_xml is not None:
content = mp(self._distribution_file.name, repo, pkg_name, filepath)
if content is not None:
break
self._release_package_xmls[pkg_name] = package_xml
return self._release_package_xmls[pkg_name]
self._release_resources[pkg_name][filepath] = content
return self._release_resources.get(pkg_name, {}).get(filepath, None)


def get_release_package_xml(self, pkg_name):
# TODO(tfoote) deprecated
return self.get_release_resource(pkg_name, 'package.xml')

def get_release_readme(self, pkg_name):
# TODO(tfoote) deprecated
return self.get_release_resource(pkg_name, 'README.md')

def get_release_changelog(self, pkg_name):
# TODO(tfoote) deprecated
return self.get_release_resource(pkg_name, 'CHANGELOG.rst')

def get_source_package_xml(self, pkg_name):
repo_name = self._distribution_file.source_packages[pkg_name].repository_name
repo_cache = self.get_source_repo_package_xmls(repo_name)
repo_cache = self.get_source_repo_resources(repo_name)
if repo_cache:
return repo_cache[pkg_name][1]
else:
return None

def get_source_repo_package_xmls(self, repo_name):
if repo_name in self._source_repo_package_xmls:
return self._source_repo_package_xmls[repo_name]
def get_source_repo_resources(self, repo_name):
if repo_name in self._source_repo_resources:
return self._source_repo_resources[repo_name]
else:
for mp in self._source_manifest_providers:
repo_cache = mp(self.repositories[repo_name].source_repository)
if repo_cache is not None:
# Update map of package XMLs, and also list of known package names.
self._source_repo_package_xmls[repo_name] = repo_cache
self._source_repo_resources[repo_name] = repo_cache
for pkg_name in repo_cache:
if pkg_name[0] != '_':
self._distribution_file.source_packages[pkg_name] = Package(pkg_name, repo_name)
return self._source_repo_package_xmls[repo_name]
return self._source_repo_resources[repo_name]
return None
107 changes: 78 additions & 29 deletions src/rosdistro/distribution_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import datetime
import sys
import time

from . import logger
from .distribution_file import create_distribution_file
Expand All @@ -46,38 +48,57 @@ class DistributionCache(object):

def __init__(self, name, data=None, distribution_file_data=None):
assert data or distribution_file_data

# default value
inbound_version = 0
if data:
assert 'type' in data, "Expected file type is '%s'" % DistributionCache._type
assert data['type'] == DistributionCache._type, "Expected file type is '%s', not '%s'" % (DistributionCache._type, data['type'])

assert 'version' in data, "Distribution cache file for '%s' lacks required version information" % name
self.version = int(data['version'])
assert self.version > 1, "Unable to handle '%s' format version '%d' anymore, please update your '%s' file to version '2'" % (DistributionCache._type, self.version, DistributionCache._type)
assert self.version == 2, "Unable to handle '%s' format version '%d', please update rosdistro (e.g. on Ubuntu/Debian use: sudo apt-get update && sudo apt-get install --only-upgrade python-rosdistro)" % (DistributionCache._type, self.version)
inbound_version = int(data['version'])
assert inbound_version > 1, "Unable to handle '%s' format version '%d' anymore, please update your '%s' file to version '2'" % (DistributionCache._type, inbound_version, DistributionCache._type)
assert inbound_version <= 3, "Unable to handle '%s' format version '%d', please update rosdistro (e.g. on Ubuntu/Debian use: sudo apt-get update && sudo apt-get install --only-upgrade python-rosdistro)" % (DistributionCache._type, inbound_version)

assert 'name' in data, "Distribution cache file for '%s' lacks required name information" % name
assert data['name'] == name, "Distribution cache file for '%s' does not match the name '%s'" % (name, data['name'])
else:
self.version = 2

# All data will be migrated forward on import, any rexport will be in version 3
self.version = 3

self._distribution_file_data = data['distribution_file'] if data else distribution_file_data
self.distribution_file = create_distribution_file(name, self._distribution_file_data)
self.release_package_xmls = data['release_package_xmls'] if data else {}
self.source_repo_package_xmls = {}
if data and 'source_repo_package_xmls' in data:
for repo_name, repo_data in data['source_repo_package_xmls'].items():
self.source_repo_package_xmls[repo_name] = SourceRepositoryCache(repo_data)

# self.release_package_xmls = data['release_package_xmls'] if data and 'release_package_xmls' in data else {}
# self.release_readmes = data['release_readmes'] if data and 'release_readmes' in data else {}
# self.release_changelogs = data['release_changelogs'] if data and 'release_changelogs' in data else {}
self.release_resources = data['release_resources'] if data and 'release_resources' in data else {}

# Format 2 backards compatability
# Convert release_package_xml from flat dict at the root to be an instance of a resource loaded
if inbound_version == 2 and 'release_package_xmls' in data:
if not 'release_resources' in data:
data['release_resources'] = {}
for pkg_name, pkg_xml in data['release_package_xmls'].items():
if not pkg_name in data['release_resources']:
data['release_resources'][pkg_name] = {}
data['release_resources'][pkg_name]['package.xml'] = pkg_xml

self.source_repo_resources = {}
if data and 'source_repo_resources' in data:
for repo_name, repo_data in data['source_repo_resources'].items():
self.source_repo_resources[repo_name] = SourceRepositoryCache(repo_data)
self.distribution_file.source_packages = self.get_source_packages()

def get_data(self):
data = {}
data['type'] = 'cache'
data['version'] = 2
data['version'] = 3
data['name'] = self.distribution_file.name
data['distribution_file'] = self._distribution_file_data
data['release_package_xmls'] = self.release_package_xmls
data['source_repo_package_xmls'] = dict([(repo_name, repo_cache.get_data())
for repo_name, repo_cache in self.source_repo_package_xmls.items()])
data['release_resources'] = self.release_resources
data['source_repo_resources'] = dict([(repo_name, repo_cache.get_data())
for repo_name, repo_cache in self.source_repo_resources.items()])
return data

def update_distribution(self, distribution_file_data):
Expand Down Expand Up @@ -105,19 +126,32 @@ def update_distribution(self, distribution_file_data):
dist_file = create_distribution_file(self.distribution_file.name, self._distribution_file_data)

# remove all release package xmls where the package version has changed.
print("- removing invalid release package cache entries.")
print(f"- checking [{len(dist_file.release_packages.keys())}] release package cache entries for different versions")
dropped_count = 0
skipped_count = 0
for pkg_name in sorted(dist_file.release_packages.keys()):
if pkg_name not in self.distribution_file.release_packages:
logger.debug("Skipping %s because not in the distro." % pkg_name)
skipped_count += 1
continue
if pkg_name in self.release_package_xmls and self._get_repo_info(dist_file, pkg_name) != self._get_repo_info(self.distribution_file, pkg_name):
logger.debug("Dropping release package XML cache for %s" % pkg_name)
del self.release_package_xmls[pkg_name]
if pkg_name in self.release_resources and self._get_repo_info(dist_file, pkg_name) != self._get_repo_info(self.distribution_file, pkg_name):
logger.debug("Dropping release resources package cache for %s" % pkg_name)
dropped_count += 1
del self.release_resources[pkg_name]


sys.stdout.write('\n')
sys.stdout.write(f'Dropped {dropped_count} repositories\n')
sys.stdout.write(f'Skippted {skipped_count} repositories\n')

# Remove all source package xmls where the devel branch is pointing to a different commit than
# the one we have associated with our cache. This requires calling git ls-remote on all affected repos.
if self.source_repo_package_xmls:
print("- checking invalid source repo cache entries.")
for repo in sorted(self.source_repo_package_xmls.keys()):
if self.source_repo_resources:
start_time = time.perf_counter()
dropped_count = 0
skipped_count = 0
print(f"- checking [{len(self.source_repo_resources.keys())}] source repo cache entries without source entries, requires ls-remote")
for repo in sorted(self.source_repo_resources.keys()):
sys.stdout.write('.')
sys.stdout.flush()
try:
Expand All @@ -126,27 +160,42 @@ def update_distribution(self, distribution_file_data):
# The repo entry has been dropped, or the source stanza from it has been dropped,
# either way, remove the cache entries associated with this repository.
logger.debug('Unable to find source repository info for repo "%s".' % repo)
del self.source_repo_package_xmls[repo]
del self.source_repo_resources[repo]
continue

min_update_delta = 1 * 60 * 60 # TOOD(tfoote) magic number make into a parameter
if '_last_update_time' in self.source_repo_resources[repo]:
now = datetime.datetime.now()
entry_age = (now - self.source_repo_resources[repo]['_last_update_time']).total_seconds()
if entry_age < min_update_delta:
logger.debug(f'Skipping check of {repo} because it was last updated only {entry_age} seconds ago less than {min_update_delta}')
skipped_count += 1
continue

if ref_is_hash(source_repository.version):
source_hash = source_repository.version
else:
result = Git().command('ls-remote', source_repository.url, source_repository.version)
if result['returncode'] != 0 or not result['output']:
# Error checking remote, or unable to find remote reference. Drop the cache entry.
logger.debug("Unable to check hash for branch %s of %s, dropping cache entry." % (source_repository.version, source_repository.url))
del self.source_repo_package_xmls[repo]
del self.source_repo_resources[repo]
dropped_count += 1
continue
# Split by line first and take the last line, to squelch any preamble output, for example
# a known host key validation notice.
source_hash = result['output'].split('\n')[-1].split('\t')[0]

cached_hash = self.source_repo_package_xmls[repo].ref()
cached_hash = self.source_repo_resources[repo].ref()
if source_hash != cached_hash:
logger.debug('Repo "%s" has moved from %s to %s, dropping cache.' % (repo, cached_hash, source_hash))
del self.source_repo_package_xmls[repo]
del self.source_repo_resources[repo]
dropped_count += 1
sys.stdout.write('\n')
sys.stdout.write(f'Dropped {dropped_count} repositories\n')
sys.stdout.write(f'Skippted {skipped_count} repositories\n')
end_time = time.perf_counter()
logger.debug(f'Check of invalid source repo cache entries took {(end_time - start_time):.1f} seconds')

self.distribution_file = dist_file
self.distribution_file.source_packages = self.get_source_packages()
Expand All @@ -157,7 +206,7 @@ def update_distribution(self, distribution_file_data):
def get_source_packages(self):
""" Returns dictionary mapping source package names to Package() objects. """
package_dict = {}
for source_repo_name, source_repo in self.source_repo_package_xmls.items():
for source_repo_name, source_repo in self.source_repo_resources.items():
for pkg_name in source_repo:
package_dict[pkg_name] = Package(pkg_name, source_repo_name)
return package_dict
Expand All @@ -168,7 +217,7 @@ def _get_repo_info(self, dist_file, pkg_name):
return (repo.version, repo.url)

def _remove_obsolete_entries(self):
for pkg_name in list(self.release_package_xmls.keys()):
for pkg_name in list(self.release_resources.keys()):
if pkg_name not in self.distribution_file.release_packages:
print('- REMOVE', pkg_name)
del self.release_package_xmls[pkg_name]
print('- REMOVE Release Resources for: ', pkg_name)
del self.release_resources[pkg_name]
Loading
Loading