From fe9aed62895ea0f9885126af99a81554c8d4229f Mon Sep 17 00:00:00 2001 From: Florian Roth Date: Mon, 26 Jan 2026 15:40:54 +0100 Subject: [PATCH 1/3] fix: fix empty repos --- main/rule_collector.py | 9 +++++++-- yara-forge-config.yml | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/main/rule_collector.py b/main/rule_collector.py index b141510..e1bb010 100644 --- a/main/rule_collector.py +++ b/main/rule_collector.py @@ -5,6 +5,7 @@ import shutil import datetime import logging +from urllib.parse import unquote #from pprint import pprint import plyara from git import Repo @@ -89,7 +90,9 @@ def retrieve_yara_rule_sets(repo_staging_dir, yara_repos): # If a sub-path is configured, restrict checkout to that path to skip large folders if 'path' in repo: repo_obj.git.sparse_checkout('init', '--cone') - repo_obj.git.sparse_checkout('set', repo['path']) + # URL-decode the path before using it with git sparse-checkout + decoded_path = unquote(repo['path']) + repo_obj.git.sparse_checkout('set', decoded_path) repo['commit_hash'] = repo_obj.head.commit.hexsha else: # Get the latest commit hash @@ -121,7 +124,9 @@ def retrieve_yara_rule_sets(repo_staging_dir, yara_repos): # Walk a sub folder if one is set in the config walk_folder = repo_folder if 'path' in repo: - walk_folder = os.path.join(repo_folder, repo['path']) + # URL-decode the path before using it + decoded_path = unquote(repo['path']) + walk_folder = os.path.join(repo_folder, decoded_path) # Print the processed folder logging.debug("Processing folder: %s", walk_folder) diff --git a/yara-forge-config.yml b/yara-forge-config.yml index 733c97e..1b84948 100644 --- a/yara-forge-config.yml +++ b/yara-forge-config.yml @@ -124,7 +124,7 @@ yara_repositories: author: "BlackBerry Threat Research Team" quality: 85 branch: "master" - path: "BlackBerry" + path: "Blackberry" - name: "Cluster25" url: "https://github.com/mikesxrs/Open-Source-YARA-rules" author: "Cluster25" From dad39d323dd3870f695eb38fc0125a9a164f6aff Mon Sep 17 00:00:00 2001 From: Florian Roth Date: Mon, 26 Jan 2026 18:24:32 +0100 Subject: [PATCH 2/3] chore: build-stats in workflows --- .github/workflows/run-yara-forge.yml | 24 ++++++++++++++++++++++++ .github/workflows/weekly-release.yml | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/.github/workflows/run-yara-forge.yml b/.github/workflows/run-yara-forge.yml index 7ce67bf..e18b4f4 100644 --- a/.github/workflows/run-yara-forge.yml +++ b/.github/workflows/run-yara-forge.yml @@ -47,3 +47,27 @@ jobs: - name: Run YARA-Forge run: | python yara-forge.py + + - name: Upload build statistics + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-statistics + path: build_stats.md + retention-days: 30 + + - name: Upload build log + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-log + path: yara-forge.log + retention-days: 30 + + - name: Upload rule issues + if: always() + uses: actions/upload-artifact@v4 + with: + name: rule-issues + path: yara-forge-rule-issues.yml + retention-days: 30 diff --git a/.github/workflows/weekly-release.yml b/.github/workflows/weekly-release.yml index bc5d63d..6c18480 100644 --- a/.github/workflows/weekly-release.yml +++ b/.github/workflows/weekly-release.yml @@ -43,6 +43,30 @@ jobs: run: | python yara-forge.py + - name: Upload build statistics + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-statistics + path: build_stats.md + retention-days: 90 + + - name: Upload build log + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-log + path: yara-forge.log + retention-days: 90 + + - name: Upload rule issues + if: always() + uses: actions/upload-artifact@v4 + with: + name: rule-issues + path: yara-forge-rule-issues.yml + retention-days: 90 + - name: Get current date run: echo "CURRENT_DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV shell: bash From 640799b0d59af9a5bcd9f83867919e34ac86a850 Mon Sep 17 00:00:00 2001 From: Florian Roth Date: Mon, 26 Jan 2026 20:47:24 +0100 Subject: [PATCH 3/3] fix: issue with paths --- main/rule_collector.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/main/rule_collector.py b/main/rule_collector.py index e1bb010..bffa50e 100644 --- a/main/rule_collector.py +++ b/main/rule_collector.py @@ -95,9 +95,15 @@ def retrieve_yara_rule_sets(repo_staging_dir, yara_repos): repo_obj.git.sparse_checkout('set', decoded_path) repo['commit_hash'] = repo_obj.head.commit.hexsha else: - # Get the latest commit hash + # Repository already cloned - reuse it repo_folder = os.path.join(repo_staging_dir, repo['owner'], repo['repo']) - repo['commit_hash'] = Repo(repo_folder).head.commit.hexsha + repo_obj = Repo(repo_folder) + repo['commit_hash'] = repo_obj.head.commit.hexsha + # If this repo config has a path, add it to sparse checkout + # (needed when multiple configs share the same git URL) + if 'path' in repo: + decoded_path = unquote(repo['path']) + repo_obj.git.sparse_checkout('add', decoded_path) # Walk through the extracted folders and find a LICENSE file # and save it into the repository object