From fb82c1a45ab6053331652fd14dad8938ef22effc Mon Sep 17 00:00:00 2001 From: kyle Date: Sun, 12 Oct 2025 11:47:20 -0700 Subject: [PATCH] feat: v0.3.1 workflow refresh from clean base; --source flag; selection manifest/symlink; precise filtering in Step 2; output/tmpdir options; GUI verbose; assets icons; packaging scripts; docs updated --- .DS_Store | Bin 0 -> 6148 bytes .github/workflows/ci.yml | 16 +--- .github/workflows/release.yml | 76 ++--------------- .gitignore | 22 ----- PCAPpuller.py | 108 ++++++++++++++++++------ README.md | 37 ++++++-- WORKFLOW_GUIDE.md | 36 ++++---- assets/PCAPpuller.icns | 7 ++ assets/PCAPpuller.ico | 7 ++ assets/PCAPpuller.png | 10 +++ docs/Analyst-Guide.md | 35 ++++---- gui_pcappuller.py | 98 ++++++++++++++++----- packaging/linux/install_desktop.sh | 43 ++++++++++ packaging/linux/uninstall_desktop.sh | 25 ++++++ packaging/macos/build_pyinstaller.sh | 19 +++++ packaging/windows/build_pyinstaller.ps1 | 21 +++++ pcappuller-gui.desktop | 6 +- pcappuller/gui.py | 99 +++++++++++++++++----- pcappuller/workflow.py | 98 +++++++++++++-------- pyproject.toml | 3 +- 20 files changed, 513 insertions(+), 253 deletions(-) create mode 100644 .DS_Store create mode 100644 assets/PCAPpuller.icns create mode 100644 assets/PCAPpuller.ico create mode 100644 assets/PCAPpuller.png create mode 100755 packaging/linux/install_desktop.sh create mode 100755 packaging/linux/uninstall_desktop.sh create mode 100755 packaging/macos/build_pyinstaller.sh create mode 100644 packaging/windows/build_pyinstaller.ps1 diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..39e36cbbbc07119b0cbc01ec40ef7d7853393760 GIT binary patch literal 6148 zcmeHKOHRWu5PefBXhBG2#|9}UsKgD@(kvXT&z97#GK48}PO=n-*t1!GqI8 zYg76?GCYx;;Tc(puWGFFd2410m;$E2Mk~NQTdlF@(MnUm6fgyr3h4JCqASJ$bC2%R z!NMH@h;@dYv93Re%83HT0dtRRp^2vwJypUHLp+`FDCETfbB~@535O2}BTG1;h#Q^# zM=l)_d$iINFa?SVZ28-k?*GZp=l>$f?o0tw;9n^q_1>V@<&?tSTA7^gwGsWAuBLgp m$0daow-qy2x8g&(GsYvW5aWQkM@DG&Bj9AP!W8&Z1-<|)6=dT8 literal 0 HcmV?d00001 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01cfea9..e0b98bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,22 +20,10 @@ jobs: run: | python -m pip install --upgrade pip pip install ruff mypy - # Install PySimpleGUI from their private index for gui dependencies - pip install --extra-index-url https://PySimpleGUI.net/install PySimpleGUI || true - pip install -e .[datetime] - # Install GUI dependencies only if PySimpleGUI succeeded - pip install -e .[gui] || echo "Skipping GUI dependencies" - - - name: Python syntax check - run: | - python -m py_compile PCAPpuller.py - python -m py_compile gui_pcappuller.py - python -m compileall pcappuller/ + pip install -e .[gui,datetime] - name: Ruff (E,F only) run: ruff check --select E,F --ignore E501 . - name: Mypy - run: | - # Run mypy with ignore-missing-imports for potential GUI dependency issues - mypy --ignore-missing-imports PCAPpuller.py pcappuller gui_pcappuller.py + run: mypy PCAPpuller.py pcappuller gui_pcappuller.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b44f681..d7a5b6b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -31,54 +31,20 @@ jobs: # PySimpleGUI requires private index pip install --extra-index-url https://PySimpleGUI.net/install PySimpleGUI - - name: Prepare icons - shell: bash - run: | - set -euxo pipefail - mkdir -p artifacts/icons - ICON_SRC="assets/icons/pcappuller.png" - if [ ! -f "$ICON_SRC" ] && [ -f assets/icons/pcap.png ]; then ICON_SRC="assets/icons/pcap.png"; fi - if [ -f "$ICON_SRC" ]; then - # Linux does not embed icon, but Windows/macOS will use .ico/.icns - if [ "$RUNNER_OS" = "Windows" ]; then - echo Using ImageMagick to create .ico from $ICON_SRC - magick convert "$ICON_SRC" -resize 256x256 artifacts/icons/pcappuller.ico - elif [ "$RUNNER_OS" = "macOS" ]; then - echo Building .icns from .iconset using $ICON_SRC - ICONSET=artifacts/icons/pcappuller.iconset - mkdir -p "$ICONSET" - sips -z 16 16 "$ICON_SRC" --out "$ICONSET/icon_16x16.png" - sips -z 32 32 "$ICON_SRC" --out "$ICONSET/icon_16x16@2x.png" - sips -z 32 32 "$ICON_SRC" --out "$ICONSET/icon_32x32.png" - sips -z 64 64 "$ICON_SRC" --out "$ICONSET/icon_32x32@2x.png" - sips -z 128 128 "$ICON_SRC" --out "$ICONSET/icon_128x128.png" - sips -z 256 256 "$ICON_SRC" --out "$ICONSET/icon_128x128@2x.png" - sips -z 256 256 "$ICON_SRC" --out "$ICONSET/icon_256x256.png" - sips -z 512 512 "$ICON_SRC" --out "$ICONSET/icon_256x256@2x.png" - sips -z 512 512 "$ICON_SRC" --out "$ICONSET/icon_512x512.png" - cp "$ICON_SRC" "$ICONSET/icon_512x512@2x.png" || true - iconutil -c icns "$ICONSET" -o artifacts/icons/pcappuller.icns - fi - fi - - name: Build GUI binary shell: bash run: | set -euxo pipefail mkdir -p release if [ "$RUNNER_OS" = "Windows" ]; then - if [ -f artifacts/icons/pcappuller.ico ]; then ICON="--icon artifacts/icons/pcappuller.ico"; else ICON=""; fi - pyinstaller --onefile --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py + pyinstaller --onefile --windowed --name PCAPpullerGUI gui_pcappuller.py mv dist/PCAPpullerGUI.exe "release/PCAPpullerGUI-windows.exe" elif [ "$RUNNER_OS" = "macOS" ]; then # Build a proper .app so Finder runs it correctly - if [ -f artifacts/icons/pcappuller.icns ]; then ICON="--icon artifacts/icons/pcappuller.icns"; else ICON=""; fi - pyinstaller --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py + pyinstaller --windowed --name PCAPpullerGUI gui_pcappuller.py (cd dist && zip -r ../release/PCAPpullerGUI-macos.zip PCAPpullerGUI.app) else - # Linux: try to use icon if available - if [ -f assets/icons/pcappuller.png ]; then ICON="--icon assets/icons/pcappuller.png"; else ICON=""; fi - pyinstaller --onefile --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py + pyinstaller --onefile --windowed --name PCAPpullerGUI gui_pcappuller.py mv dist/PCAPpullerGUI "release/PCAPpullerGUI-linux" fi @@ -92,53 +58,23 @@ jobs: sudo gem install --no-document fpm VERSION=$(grep -E '^version\s*=\s*"[0-9]+\.[0-9]+\.[0-9]+"' pyproject.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)"/\1/') STAGE=$(mktemp -d) - - # Install binary mkdir -p "$STAGE/usr/local/bin" install -m 0755 release/PCAPpullerGUI-linux "$STAGE/usr/local/bin/pcappuller-gui" - - # Install desktop file - mkdir -p "$STAGE/usr/share/applications" - install -m 0644 pcappuller-gui.desktop "$STAGE/usr/share/applications/" - - # Install icon - if [ -f assets/icons/pcappuller.png ]; then - mkdir -p "$STAGE/usr/share/icons/hicolor/256x256/apps" - install -m 0644 assets/icons/pcappuller.png "$STAGE/usr/share/icons/hicolor/256x256/apps/pcappuller.png" - # Also install in standard pixmaps location - mkdir -p "$STAGE/usr/share/pixmaps" - install -m 0644 assets/icons/pcappuller.png "$STAGE/usr/share/pixmaps/pcappuller.png" - fi - NAME=pcappuller-gui - DESC="PCAPpuller GUI: fast PCAP window selector, merger, trimmer, and cleaner" + DESC="PCAPpuller GUI: fast PCAP window selector, merger, trimmer" URL="https://github.com/ktalons/daPCAPpuller" LICENSE=MIT MAINTAINER="Kyle Versluis" - - # Create post-install script - echo '#!/bin/bash' > postinst.sh - echo 'if command -v update-desktop-database >/dev/null 2>&1; then' >> postinst.sh - echo ' update-desktop-database /usr/share/applications' >> postinst.sh - echo 'fi' >> postinst.sh - echo 'if command -v gtk-update-icon-cache >/dev/null 2>&1; then' >> postinst.sh - echo ' gtk-update-icon-cache -f -t /usr/share/icons/hicolor' >> postinst.sh - echo 'fi' >> postinst.sh - chmod +x postinst.sh - - # deb with post-install script + # deb fpm -s dir -t deb -n "$NAME" -v "$VERSION" \ --license "$LICENSE" --url "$URL" --maintainer "$MAINTAINER" \ --description "$DESC" \ - --after-install postinst.sh \ -C "$STAGE" --prefix / \ -p "release/${NAME}_${VERSION}_amd64.deb" - - # rpm with post-install script + # rpm fpm -s dir -t rpm -n "$NAME" -v "$VERSION" \ --license "$LICENSE" --url "$URL" --maintainer "$MAINTAINER" \ --description "$DESC" \ - --after-install postinst.sh \ -C "$STAGE" --prefix / \ -p "release/${NAME}-${VERSION}-1.x86_64.rpm" diff --git a/.gitignore b/.gitignore index 04cb2a3..2d1a8ea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,3 @@ -# Python build and env -__pycache__/ -*.pyc -.venv/ - -# PyInstaller -/build/ -/dist/ -/*.spec - -# Packaging outputs -packaging/artifacts/ -.debstage/ - -# OS/editor -.DS_Store -*.swp -*.swo - -# Logs -*.log - # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] diff --git a/PCAPpuller.py b/PCAPpuller.py index 8eedc1d..ab9ff69 100755 --- a/PCAPpuller.py +++ b/PCAPpuller.py @@ -47,17 +47,19 @@ def parse_args(): ap.add_argument("--resume", action="store_true", help="Resume from existing workflow state") ap.add_argument("--status", action="store_true", help="Show workflow status and exit") - # Step 1: Selection parameters + # Step 1: File Selection step1_group = ap.add_argument_group("Step 1: File Selection") - step1_group.add_argument("--root", nargs="+", help="Root directories to search (required for new workflow)") - step1_group.add_argument("--include-pattern", nargs="*", default=["*.chunk_*.pcap"], - help="Include files matching these patterns") - step1_group.add_argument("--exclude-pattern", nargs="*", default=["*.sorted.pcap", "*.s256.pcap"], - help="Exclude files matching these patterns") - step1_group.add_argument("--slop-min", type=int, default=120, help="Extra minutes around window for mtime prefilter") - step1_group.add_argument("--precise-filter", action="store_true", default=True, help="Use capinfos for precise filtering") - step1_group.add_argument("--no-precise-filter", action="store_false", dest="precise_filter", - help="Skip precise filtering, use mtime only") + # New preferred flag + step1_group.add_argument("--source", nargs="+", help="Source directories to search (required for new workflow)") + # Backward-compat alias (hidden) + step1_group.add_argument("--root", nargs="+", dest="source", help=argparse.SUPPRESS) + step1_group.add_argument("--include-pattern", nargs="*", default=["*.pcap", "*.pcapng"], + help="Include files matching these patterns (default: *.pcap, *.pcapng)") + step1_group.add_argument("--exclude-pattern", nargs="*", default=[], + help="Exclude files matching these patterns (optional)") + step1_group.add_argument("--slop-min", type=int, default=None, help="Extra minutes around window for mtime prefilter (auto by default)") + step1_group.add_argument("--selection-mode", choices=["manifest", "symlink"], default="manifest", + help="How to materialize Step 1 selections. 'manifest' (default) avoids any data copy; 'symlink' creates symlinks in the workspace.") # Time window (required for new workflow) time_group = ap.add_argument_group("Time Window") @@ -68,12 +70,14 @@ def parse_args(): # Step 2: Processing parameters step2_group = ap.add_argument_group("Step 2: Processing") - step2_group.add_argument("--batch-size", type=int, default=500, help="Files per merge batch") + step2_group.add_argument("--batch-size", type=int, default=None, help="Files per merge batch (auto by default)") step2_group.add_argument("--out-format", choices=["pcap", "pcapng"], default="pcapng", help="Output format") step2_group.add_argument("--display-filter", help="Wireshark display filter") step2_group.add_argument("--trim-per-batch", action="store_true", help="Trim each batch before final merge") step2_group.add_argument("--no-trim-per-batch", action="store_false", dest="trim_per_batch", help="Only trim final merged file") + step2_group.add_argument("--out", help="Explicit output file path for Step 2 (e.g., /path/to/output.pcapng). If omitted, a timestamped file is written under the workspace.") + step2_group.add_argument("--no-precise-filter", action="store_true", help="Disable precise filtering in Step 2 (advanced)") # Step 3: Cleaning parameters step3_group = ap.add_argument_group("Step 3: Cleaning") @@ -101,8 +105,8 @@ def parse_args(): if not args.resume: # New workflow requires certain parameters - if not args.root: - ap.error("--root is required for new workflow (use --resume to continue existing)") + if not args.source: + ap.error("--source is required for new workflow (use --resume to continue existing)") if not args.start: ap.error("--start is required for new workflow") if not args.minutes and not args.end: @@ -135,9 +139,9 @@ def progress_callback(phase: str, current: int, total: int): def run_step1(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> WorkflowState: """Execute Step 1: File Selection.""" - print("๐Ÿ” Step 1: Selecting and copying PCAP files...") + print("๐Ÿ” Step 1: Selecting PCAP files...") - # Setup cache + # Setup cache (not strictly needed for Step 1 now, but keep for future-proofing) cache = None if not args.no_cache: cache_path = default_cache_path() if args.cache == "auto" else Path(args.cache) @@ -149,16 +153,37 @@ def run_step1(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl progress_cb, cleanup_pb = setup_progress_callback("Step 1: File Selection") try: + # Auto defaults: compute slop based on requested duration when not provided + try: + start, end = parse_start_and_window(args.start, args.minutes, args.end) + duration_minutes = int((end - start).total_seconds() // 60) + except Exception: + duration_minutes = 60 + if args.slop_min is None: + if duration_minutes <= 15: + slop_min = 120 + elif duration_minutes <= 60: + slop_min = 60 + elif duration_minutes <= 240: + slop_min = 30 + elif duration_minutes <= 720: + slop_min = 20 + else: + slop_min = 15 + else: + slop_min = args.slop_min + workers = parse_workers(args.workers, 1000) # Estimate for auto calculation state = workflow.step1_select_and_move( state=state, - slop_min=args.slop_min, - precise_filter=args.precise_filter, + slop_min=slop_min, + precise_filter=False, # moved to Step 2 by default workers=workers, cache=cache, dry_run=args.dry_run, - progress_callback=progress_cb + progress_callback=progress_cb, + selection_mode=args.selection_mode ) if not args.dry_run: @@ -186,14 +211,48 @@ def run_step2(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl if args.trim_per_batch is not None: trim_per_batch = args.trim_per_batch + # Auto defaults for Step 2 if not provided + # Determine duration from state + duration_minutes = int((state.window.end - state.window.start).total_seconds() // 60) + if args.batch_size is None: + if duration_minutes <= 15: + batch_size = 500 + elif duration_minutes <= 60: + batch_size = 400 + elif duration_minutes <= 240: + batch_size = 300 + elif duration_minutes <= 720: + batch_size = 200 + else: + batch_size = 150 + else: + batch_size = int(args.batch_size) + if trim_per_batch is None: + trim_per_batch = duration_minutes > 60 + + # Setup cache for Step 2 precise filtering (default on) + cache = None + if not args.no_cache: + cache_path = default_cache_path() if args.cache == "auto" else Path(args.cache) + cache = CapinfosCache(cache_path) + if args.clear_cache: + cache.clear() + + workers = parse_workers(args.workers, total_files=1000) + state = workflow.step2_process( state=state, - batch_size=args.batch_size, + batch_size=batch_size, out_format=args.out_format, display_filter=args.display_filter, trim_per_batch=trim_per_batch, progress_callback=progress_cb, - verbose=args.verbose + verbose=args.verbose, + out_path=Path(args.out) if args.out else None, + tmpdir_parent=Path(args.tmpdir) if args.tmpdir else None, + precise_filter=not bool(getattr(args, "no_precise_filter", False)), + workers=workers, + cache=cache, ) print("โœ… Step 2 complete: Processed file saved") @@ -219,12 +278,9 @@ def run_step3(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl if args.gzip: clean_options['gzip'] = True + # If user did not specify options, apply safe defaults that do not truncate payloads if not clean_options: - print("โญ๏ธ Step 3: No cleaning options specified, skipping...") - state.step3_complete = True - state.cleaned_file = state.processed_file # Use processed file as final - state.save(workflow.state_file) - return state + clean_options = {"convert_to_pcap": True, "gzip": True} print("๐Ÿงน Step 3: Cleaning output (removing headers/metadata)...") @@ -305,7 +361,7 @@ def main(): window = Window(start=start, end=end) # Initialize new workflow - root_dirs = [Path(r) for r in args.root] + root_dirs = [Path(r) for r in args.source] state = workflow.initialize_workflow( root_dirs=root_dirs, window=window, diff --git a/README.md b/README.md index 0e2c681..e654270 100644 --- a/README.md +++ b/README.md @@ -140,11 +140,19 @@ ___ ### Three-Step Workflow (Recommended) ```bash # Complete workflow - solves size inflation issues! -pcap-puller --workspace /tmp/job --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 --snaplen 256 --gzip +pcap-puller --workspace /tmp/job \ + --source /mnt/dir \ + --start "YYYY-MM-DD HH:MM:SS" \ + --minutes 15 \ + --selection-mode symlink \ + --out /path/to/output.pcapng \ + --tmpdir /path/on/large/volume/tmp \ + --snaplen 256 \ + --gzip # Individual steps for more control -pcap-puller --workspace /tmp/job --step 1 --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 # Select -pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns" # Process +pcap-puller --workspace /tmp/job --step 1 --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 --selection-mode manifest # Select (no data copy) +pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns" --out /path/to/output.pcapng --tmpdir /big/tmp # Process pcap-puller --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip # Clean # Check status anytime @@ -172,10 +180,10 @@ pcap-puller --workspace /tmp/job --status ### Direct (without install) ```bash # New three-step workflow (recommended) -python3 PCAPpuller.py --workspace /tmp/job --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 --snaplen 256 --gzip +python3 PCAPpuller.py --workspace /tmp/job --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 --snaplen 256 --gzip # Individual steps -python3 PCAPpuller.py --workspace /tmp/job --step 1 --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 +python3 PCAPpuller.py --workspace /tmp/job --step 1 --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 python3 PCAPpuller.py --workspace /tmp/job --step 2 --resume --display-filter "dns" python3 PCAPpuller.py --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip @@ -186,7 +194,7 @@ ___ ## Arguments ๐Ÿ’ฅ ### Required โ— > `--workspace ` โ€” workspace directory for three-step workflow (NEW).
-> `--root ` โ€” one or more directories to search.
+> `--source ` โ€” one or more directories to search. (`--root` is still accepted as an alias.)
> `--start "YYYY-MM-DD HH:MM:SS"` โ€” window start (local time).
> `--minutes <1โ€“1440>` โ€” duration; must stay within a single calendar day. Or use `--end` with same-day end time.
### Optional โ“ @@ -196,9 +204,10 @@ ___ > `--resume` โ€” resume from existing workflow state.
> `--status` โ€” show workflow status and exit.
-**Pattern Filtering (Step 1):** -> `--include-pattern [PATTERNS...]` โ€” include files matching patterns (default: *.chunk_*.pcap).
-> `--exclude-pattern [PATTERNS...]` โ€” exclude files matching patterns (default: *.sorted.pcap, *.s256.pcap).
+**Pattern Filtering (Step 1): +> `--include-pattern [PATTERNS...]` โ€” include files matching patterns (default: *.pcap, *.pcapng).
+> `--exclude-pattern [PATTERNS...]` โ€” optional excludes (none by default).
+> `--selection-mode {manifest|symlink}` โ€” how to materialize selections. Default: manifest. Use `symlink` to browse selections in a workspace folder.
**Processing Options:** > `--end ` โ€” end time instead of `--minutes` (must be same day as `--start`).
@@ -208,6 +217,8 @@ ___ > `--workers ` โ€” concurrency for precise filter (default: auto โ‰ˆ 2ร—CPU, gently capped).
> `--display-filter ""` โ€” post-trim filter via tshark (e.g., "dns", "tcp.port==443").
> `--out-format {pcap|pcapng}` โ€” final capture format (default: pcapng).
+> `--out ` โ€” explicit output path for Step 2 (otherwise written under workspace).
+> `--tmpdir ` โ€” directory for temporary files during Step 2 (overrides system/workspace tmp).
**Cleaning Options (Step 3):** > `--snaplen ` โ€” truncate packets to N bytes.
@@ -242,6 +253,14 @@ ___ - Cleaning options in Step 3 can reduce final file size by 60-90% - Check status anytime: `--workspace /path --status` ___ +## App Icons ๐Ÿ–ผ๏ธ +- Place your icons under assets/ + - macOS: PCAPpuller.icns + - Linux: PCAPpuller.png (e.g., install to /usr/share/icons/hicolor/512x512/apps/PCAPpuller.png) + - Windows: PCAPpuller.ico +- During development, the GUI attempts to load assets/PCAPpuller.ico/.png/.icns and set the window icon automatically. +- The Linux desktop entry now uses Name=PCAPpuller and Exec=PCAPpuller with Icon=PCAPpuller. + ## Development ๐Ÿ› ๏ธ - Install tooling (in a virtualenv): - python3 -m pip install -e .[datetime] diff --git a/WORKFLOW_GUIDE.md b/WORKFLOW_GUIDE.md index 95660a4..d35bb6d 100644 --- a/WORKFLOW_GUIDE.md +++ b/WORKFLOW_GUIDE.md @@ -13,29 +13,33 @@ PCAPpuller has been enhanced with a three-step workflow that solves the file siz ```bash python3 PCAPpuller.py \ --workspace /tmp/my_workspace \ - --root /path/to/pcap/directory \ + --source /path/to/pcap/directory \ --start "2025-08-26 16:00:00" \ --minutes 30 \ - --slop-min 100000 \ + --selection-mode symlink \ + --out /path/to/output.pcapng \ + --tmpdir /path/on/large/volume/tmp \ --snaplen 128 \ --gzip ``` ### Individual Steps ```bash -# Step 1: Select files +# Step 1: Select files (no data copy using a manifest) python3 PCAPpuller.py \ --workspace /tmp/my_workspace \ - --root /path/to/pcap/directory \ + --source /path/to/pcap/directory \ --start "2025-08-26 16:00:00" \ --minutes 30 \ - --slop-min 100000 \ + --selection-mode manifest \ --step 1 -# Step 2: Process selected files +# Step 2: Process selected files to an explicit path python3 PCAPpuller.py \ --workspace /tmp/my_workspace \ --step 2 \ + --out /path/to/output.pcapng \ + --tmpdir /path/on/large/volume/tmp \ --resume # Step 3: Clean output @@ -56,15 +60,15 @@ python3 PCAPpuller.py \ ### File Pattern Filtering (Step 1) - **Include patterns**: Only process files matching these patterns - - Default: `*.chunk_*.pcap` (includes chunk files) -- **Exclude patterns**: Skip files matching these patterns - - Default: `*.sorted.pcap`, `*.s256.pcap` (excludes large consolidated files) + - Default: `*.pcap`, `*.pcapng` +- **Exclude patterns**: Optional. Add if needed. +- **Selection mode**: `--selection-mode {manifest|symlink}` controls how Step 1 materializes files in the workspace. Default is `manifest`; use `symlink` to create a browsable workspace. ### Example: Custom Patterns ```bash python3 PCAPpuller.py \ --workspace /tmp/workspace \ - --root /data/pcaps \ +--source /data/pcaps --include-pattern "*.chunk_*.pcap" "capture_*.pcap" \ --exclude-pattern "*.backup.pcap" "*.temp.*" \ --start "2025-08-26 16:00:00" \ @@ -76,6 +80,8 @@ python3 PCAPpuller.py \ - **Output format**: pcap or pcapng (default: pcapng) - **Display filter**: Wireshark filter to apply - **Trim per batch**: Trim each batch vs. final file only +- **Output path**: `--out /path/to/output.pcapng` +- **Temporary directory**: `--tmpdir /path/on/large/volume/tmp` ### Cleaning Options (Step 3) - **Snaplen**: Truncate packets to N bytes (saves space) @@ -135,7 +141,7 @@ python3 PCAPpuller.py --workspace /tmp/workspace --step 2 --resume # Process 6 hours of data with optimizations python3 PCAPpuller.py \ --workspace /tmp/large_job \ - --root /data/capture_2025_08_26 \ + --source /data/capture_2025_08_26 \ --start "2025-08-26 12:00:00" \ --minutes 360 \ --slop-min 100000 \ @@ -152,10 +158,10 @@ python3 PCAPpuller.py \ # See what files would be selected without processing python3 PCAPpuller.py \ --workspace /tmp/preview \ - --root /data/pcaps \ + --source /data/pcaps \ --start "2025-08-26 16:00:00" \ --minutes 60 \ - --step 1 \ + --step 1 --dry-run ``` @@ -164,7 +170,7 @@ python3 PCAPpuller.py \ # Step 1: Select HTTP traffic files python3 PCAPpuller.py \ --workspace /tmp/http_analysis \ - --root /data/network_logs \ + --source /data/network_logs \ --include-pattern "*http*" "*web*" \ --start "2025-08-26 16:00:00" \ --minutes 120 \ @@ -228,7 +234,7 @@ python3 PCAPpuller_legacy.py \ # New workflow (solves size inflation) python3 PCAPpuller.py \ --workspace /tmp/workspace \ - --root /data/pcaps \ + --source /data/pcaps \ --start "2025-08-26 16:00:00" \ --minutes 60 \ --slop-min 100000 \ diff --git a/assets/PCAPpuller.icns b/assets/PCAPpuller.icns new file mode 100644 index 0000000..d1bcd93 --- /dev/null +++ b/assets/PCAPpuller.icns @@ -0,0 +1,7 @@ +This is a placeholder for the PCAPpuller application icon (ICNS format). + +Replace this file with your real macOS .icns icon: +- Name: PCAPpuller.icns +- Place under assets/ for development window icon (best-effort on macOS) + +For distribution with a bundled app, configure your bundler (py2app, PyInstaller, Briefcase, etc.) to use this .icns file. diff --git a/assets/PCAPpuller.ico b/assets/PCAPpuller.ico new file mode 100644 index 0000000..f42bfd1 --- /dev/null +++ b/assets/PCAPpuller.ico @@ -0,0 +1,7 @@ +This is a placeholder for the PCAPpuller application icon (ICO format). + +Replace this file with your real Windows .ico icon: +- Name: PCAPpuller.ico +- Place under assets/ for development window icon on Windows + +For packaging MSI/EXE, configure your bundler to reference this .ico file. diff --git a/assets/PCAPpuller.png b/assets/PCAPpuller.png new file mode 100644 index 0000000..a430387 --- /dev/null +++ b/assets/PCAPpuller.png @@ -0,0 +1,10 @@ +This is a placeholder for the PCAPpuller application icon (PNG format). + +Replace this file with your real icon: +- Recommended sizes: 512x512 and 256x256 +- Name: PCAPpuller.png + +Packaging notes: +- Linux .desktop uses Icon=PCAPpuller; install this file to a theme path like: + /usr/share/icons/hicolor/512x512/apps/PCAPpuller.png +- During development, the GUI will attempt to load assets/PCAPpuller.png automatically for the window icon. diff --git a/docs/Analyst-Guide.md b/docs/Analyst-Guide.md index 1e53313..e638241 100644 --- a/docs/Analyst-Guide.md +++ b/docs/Analyst-Guide.md @@ -1,4 +1,4 @@ -# PCAPpuller Analyst Guide v0.3.0 +# PCAPpuller Analyst Guide v0.3.1 A comprehensive guide for SOC analysts to extract, clean, and analyze network traffic efficiently using the new **three-step workflow** that solves file size inflation issues. @@ -36,20 +36,20 @@ mergecap --version **Solves file size inflation issues!** **GUI**: Launch PCAPpuller GUI -1. Set **Root** directories containing PCAPs -2. Configure **Start time** and **Duration** +1. Set **Source Directory** containing PCAPs +2. Configure **Start time** and **Duration** (or use All Day) 3. Enable workflow steps: โ˜‘๏ธ Step 1, โ˜‘๏ธ Step 2, โ˜๏ธ Step 3 (optional) -4. Click **Pattern Settings** to configure file filtering +4. Click **Pattern Settings** to configure file filtering (defaults include only .pcap/.pcapng) 5. Optional: Apply **Display filter** (300+ filters available) 6. Click **Run Workflow** **CLI**: ```bash # Complete three-step workflow (recommended) -pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:30:00" --minutes 15 --snaplen 256 --gzip +pcap-puller --workspace /tmp/job --source /data --start "2025-10-10 14:30:00" --minutes 15 --snaplen 256 --gzip # Individual steps for better control -pcap-puller --workspace /tmp/job --step 1 --root /data --start "2025-10-10 14:30:00" --minutes 15 # Select & filter +pcap-puller --workspace /tmp/job --step 1 --source /data --start "2025-10-10 14:30:00" --minutes 15 # Select & filter pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns or http" # Process pcap-puller --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip # Clean @@ -60,10 +60,11 @@ pcap-puller --workspace /tmp/job --status #### Legacy Mode (May Cause Size Inflation) ```bash # Use legacy mode only if needed -pcap-puller --root /data --start "2025-10-10 14:30:00" --minutes 15 --out incident.pcapng +pcap-puller --source /data --start "2025-10-10 14:30:00" --minutes 15 --out incident.pcapng ``` -### B. PCAP Cleaning (Enhanced in v0.3.0) +### B. PCAP Cleaning (Enhanced in v0.3.1) +Note: If you leave Step 3 options blank in the 3-step workflow, defaults preserve payloads (convert to pcap when possible, gzip output). **GUI**: Click **"Clean..."** button 1. Select input PCAP/PCAPNG file 2. Configure cleaning options: @@ -90,8 +91,10 @@ pcap-clean --input capture.pcapng --start "2025-10-10 14:00:00" \ The new pattern filtering automatically prevents duplicate data processing. **Default Settings** (work for most cases): -- **Include**: `*.chunk_*.pcap` (individual time-based files) -- **Exclude**: `*.sorted.pcap`, `*.s256.pcap` (large consolidated files) +- **Include**: `*.pcap`, `*.pcapng` +- **Exclude**: (none by default) โ€” add excludes only if needed + +Tip: If your environment uses chunked filenames (e.g., `*.chunk_*.pcap`), add them via Advanced Options or Pattern Settings. **Custom Patterns** (GUI: Pattern Settings button): ```bash @@ -161,7 +164,7 @@ The new pattern filtering automatically prevents duplicate data processing. pcap-puller --root /data --start "2025-10-10 14:00:00" --minutes 30 --out result.pcap # NEW (solves size inflation) -pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:00:00" --minutes 30 --snaplen 256 --gzip +pcap-puller --workspace /tmp/job --source /data --start "2025-10-10 14:00:00" --minutes 30 --snaplen 256 --gzip ``` ## 5. Performance & Best Practices @@ -186,7 +189,7 @@ pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:00:00" --mi ### Audit & Validation ```bash # NEW: Validate three-step workflow with dry-run -pcap-puller --workspace /tmp/job --step 1 --root /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run +pcap-puller --workspace /tmp/job --step 1 --source /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run # Check workflow status pcap-puller --workspace /tmp/job --status @@ -243,12 +246,12 @@ pcap-puller --root /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run -- ### SOAR Integration ```bash # NEW: Automated incident response with three-step workflow -pcap-puller --workspace "/cases/$CASE_ID/workspace" --root "$PCAP_STORAGE" \ +pcap-puller --workspace "/cases/$CASE_ID/workspace" --source "$PCAP_STORAGE" \ --start "$INCIDENT_START" --minutes "$INCIDENT_DURATION" \ --display-filter "$IOC_FILTER" --snaplen 256 --gzip --verbose # Legacy method (if needed) -pcap-puller --root "$PCAP_STORAGE" --start "$INCIDENT_START" \ +pcap-puller --source "$PCAP_STORAGE" --start "$INCIDENT_START" \ --minutes "$INCIDENT_DURATION" --display-filter "$IOC_FILTER" \ --out "/cases/$CASE_ID/network_evidence.pcapng" --verbose ``` @@ -257,13 +260,13 @@ pcap-puller --root "$PCAP_STORAGE" --start "$INCIDENT_START" \ ```bash # NEW: Process multiple timeframes with three-step workflow for time in "14:00:00" "14:30:00" "15:00:00"; do - pcap-puller --workspace "/tmp/batch_${time//:}" --root /data \ + pcap-puller --workspace "/tmp/batch_${time//:}" --source /data \ --start "2025-10-10 $time" --minutes 15 --snaplen 256 --gzip done # Legacy batch processing (if needed) for time in "14:00:00" "14:30:00" "15:00:00"; do - pcap-puller --root /data --start "2025-10-10 $time" --minutes 15 \ + pcap-puller --source /data --start "2025-10-10 $time" --minutes 15 \ --out "analysis_${time//:}.pcapng" done ``` diff --git a/gui_pcappuller.py b/gui_pcappuller.py index 48a1136..add91fb 100755 --- a/gui_pcappuller.py +++ b/gui_pcappuller.py @@ -180,8 +180,8 @@ def _open_pattern_settings(parent: "sg.Window", current_include: list, current_e win.close() return None elif ev == "Reset to Defaults": - win["-INCLUDE-"].update("*.chunk_*.pcap") - win["-EXCLUDE-"].update("*.sorted.pcap\n*.s256.pcap") + win["-INCLUDE-"].update("*.pcap\n*.pcapng") + win["-EXCLUDE-"].update("") elif ev == "Save": include_text = vals.get("-INCLUDE-", "").strip() exclude_text = vals.get("-EXCLUDE-", "").strip() @@ -217,10 +217,10 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over desired_end = dt.datetime.combine(start.date(), dt.time(23, 59, 59, 999999)) window_obj = Window(start=start, end=desired_end) - roots = [Path(values["-ROOT-"])] if values["-ROOT-"] else [] + roots = [Path(values["-SOURCE-"])] if values.get("-SOURCE-") else [] if not roots: - raise PCAPPullerError("Root directory is required") + raise PCAPPullerError("Source directory is required") # Create workspace in temp directory workspace_name = f"pcappuller_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -230,8 +230,8 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over workflow = ThreeStepWorkflow(workspace_dir) # Get pattern settings from values - include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.chunk_*.pcap"]) - exclude_patterns = values.get("-EXCLUDE-PATTERNS-", ["*.sorted.pcap", "*.s256.pcap"]) + include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.pcap", "*.pcapng"]) + exclude_patterns = values.get("-EXCLUDE-PATTERNS-", []) state = workflow.initialize_workflow( root_dirs=roots, @@ -267,6 +267,15 @@ def progress_callback(phase: str, current: int, total: int): run_step3 = values.get("-RUN-STEP3-", False) try: + # Verbose: announce core settings + print("Configuration:") + print(f" Source: {roots[0]}") + print(f" Window: {window_obj.start} .. {window_obj.end}") + print(f" Selection: manifest (Step 1 uses mtime+pattern only)") + print(f" Output: {values.get('-OUT-', '(workspace default)')}") + print(f" Tmpdir: {values.get('-TMPDIR-', '(workspace tmp)')}") + print(f" Effective settings: workers={eff_settings['workers']}, batch={eff_settings['batch']}, slop={eff_settings['slop']}, trim_per_batch={eff_settings['trim_per_batch']}, precise_in_step2={eff_settings['precise_filter']}") + # Step 1: Select and Move if run_step1: window.write_event_value("-STEP-UPDATE-", ("Step 1: Selecting files...", 1)) @@ -275,7 +284,7 @@ def progress_callback(phase: str, current: int, total: int): state = workflow.step1_select_and_move( state=state, slop_min=eff_settings["slop"], - precise_filter=eff_settings["precise_filter"], + precise_filter=False, # moved to Step 2 workers=workers, cache=cache, dry_run=values.get("-DRYRUN-", False), @@ -291,12 +300,20 @@ def progress_callback(phase: str, current: int, total: int): return if not state.selected_files: + print("Step 1 selected 0 files.") window.write_event_value("-DONE-", "No files selected in Step 1") return + else: + total_size_mb = sum(f.stat().st_size for f in state.selected_files) / (1024*1024) + print(f"Step 1 selected {len(state.selected_files)} files ({total_size_mb:.1f} MB)") # Step 2: Process if run_step2: window.write_event_value("-STEP-UPDATE-", ("Step 2: Processing files...", 2)) + print("Step 2: Applying precise filter and processing...") + print(f" Batch size: {eff_settings['batch']} | Trim per batch: {eff_settings['trim_per_batch']}") + if values.get("-DFILTER-"): + print(f" Display filter: {values['-DFILTER-']}") state = workflow.step2_process( state=state, @@ -305,7 +322,12 @@ def progress_callback(phase: str, current: int, total: int): display_filter=values["-DFILTER-"] or None, trim_per_batch=eff_settings["trim_per_batch"], progress_callback=progress_callback, - verbose=values.get("-VERBOSE-", False) + verbose=values.get("-VERBOSE-", False), + out_path=(Path(values["-OUT-"]) if values.get("-OUT-") else None), + tmpdir_parent=(Path(values["-TMPDIR-"]) if values.get("-TMPDIR-") else None), + precise_filter=eff_settings["precise_filter"], + workers=parse_workers(eff_settings["workers"], 1000), + cache=cache, ) # Step 3: Clean @@ -327,13 +349,15 @@ def progress_callback(phase: str, current: int, total: int): if values.get("-GZIP-"): clean_options["gzip"] = True - if clean_options: - state = workflow.step3_clean( - state=state, - options=clean_options, - progress_callback=progress_callback, - verbose=values.get("-VERBOSE-", False) - ) + # If no options were specified but Step 3 is enabled, apply sensible defaults + if not clean_options: + clean_options = {"snaplen": 256, "gzip": True} + state = workflow.step3_clean( + state=state, + options=clean_options, + progress_callback=progress_callback, + verbose=values.get("-VERBOSE-", False) + ) # Determine final output final_file = state.cleaned_file or state.processed_file @@ -357,8 +381,8 @@ def main(): sg.theme("SystemDefault") # Default patterns - default_include = ["*.chunk_*.pcap"] - default_exclude = ["*.sorted.pcap", "*.s256.pcap"] + default_include = ["*.pcap", "*.pcapng"] + default_exclude = [] # Create layout with three-step workflow layout = [ @@ -366,12 +390,14 @@ def main(): [sg.HSeparator()], # Basic settings - [sg.Text("Root Directory"), sg.Input(key="-ROOT-", expand_x=True), sg.FolderBrowse()], + [sg.Text("Source Directory"), sg.Input(key="-SOURCE-", expand_x=True), sg.FolderBrowse()], [sg.Text("Start Time (YYYY-MM-DD HH:MM:SS)"), sg.Input(key="-START-", expand_x=True)], [sg.Text("Duration"), sg.Text("Hours"), sg.Slider(range=(0, 24), orientation="h", key="-HOURS-", default_value=0, size=(20,15), enable_events=True), sg.Text("Minutes"), sg.Slider(range=(0, 59), orientation="h", key="-MINS-", default_value=15, size=(20,15), enable_events=True), sg.Button("All Day", key="-ALLDAY-")], + [sg.Text("Output File"), sg.Input(key="-OUT-", expand_x=True), sg.FileSaveAs()], + [sg.Text("Temporary Directory"), sg.Input(key="-TMPDIR-", expand_x=True), sg.FolderBrowse()], [sg.HSeparator()], @@ -392,7 +418,7 @@ def main(): ], expand_x=True)], [sg.Frame("Step 3: Cleaning Options", [ - [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space"), + [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space (leave blank to keep full payload)"), sg.Checkbox("Convert to PCAP", key="-CLEAN-CONVERT-", tooltip="Force conversion to pcap format"), sg.Checkbox("Gzip Compress", key="-GZIP-", tooltip="Compress final output")], ], expand_x=True)], @@ -420,6 +446,24 @@ def main(): ] window = sg.Window("PCAPpuller v2", layout, size=(900, 800)) + # Try to set a custom window icon if assets exist + try: + here = Path(__file__).resolve() + assets_dir = None + for p in [here.parent, *here.parents]: + cand = p / "assets" + if cand.exists(): + assets_dir = cand + break + if assets_dir is None: + assets_dir = here.parent / "assets" + for icon_name in ["PCAPpuller.ico", "PCAPpuller.png", "PCAPpuller.icns"]: + ip = assets_dir / icon_name + if ip.exists(): + window.set_icon(str(ip)) + break + except Exception: + pass stop_flag = {"stop": False} worker = None adv_overrides: dict | None = None @@ -456,8 +500,8 @@ def _update_reco_label(): if event == "Run Workflow" and worker is None: # Validation - if not values.get("-ROOT-"): - sg.popup_error("Root directory is required") + if not values.get("-SOURCE-"): + sg.popup_error("Source directory is required") continue if not values.get("-START-"): sg.popup_error("Start time is required") @@ -540,11 +584,21 @@ def _update_reco_label(): elif event == "-PROGRESS-": phase, cur, tot = values[event] + friendly = { + "pattern-filter": "Filtering by pattern", + "precise": "Precise filtering", + "merge-batches": "Merging batches", + "trim-batches": "Trimming batches", + "trim": "Trimming final", + "display-filter": "Applying display filter", + "gzip": "Compressing", + } if str(phase).startswith("scan"): window["-STATUS-"].update(f"Scanning... {cur} files visited") window["-PB-"].update(cur % 100) else: - window["-STATUS-"].update(f"{phase} {cur}/{tot}") + label = friendly.get(str(phase), str(phase)) + window["-STATUS-"].update(f"{label}: {cur}/{tot}") pct = 0 if tot <= 0 else int((cur / tot) * 100) window["-PB-"].update(pct) print(f"{phase}: {cur}/{tot}") diff --git a/packaging/linux/install_desktop.sh b/packaging/linux/install_desktop.sh new file mode 100755 index 0000000..d169b16 --- /dev/null +++ b/packaging/linux/install_desktop.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Minimal installer for PCAPpuller desktop integration on Linux +# - Installs desktop entry and icon for system menus +# - Requires root privileges (via sudo) +set -euo pipefail + +repo_root=$(cd "$(dirname "$0")"/../.. && pwd) +app_desktop_src="$repo_root/pcappuller-gui.desktop" +icon_src="$repo_root/assets/PCAPpuller.png" + +app_desktop_dst="/usr/share/applications/PCAPpuller.desktop" +icon_dst_dir="/usr/share/icons/hicolor/512x512/apps" +icon_dst="$icon_dst_dir/PCAPpuller.png" + +if [[ $EUID -ne 0 ]]; then + echo "This script requires root. Re-running with sudo..." + exec sudo "$0" "$@" +fi + +if [[ ! -f "$app_desktop_src" ]]; then + echo "Desktop file not found: $app_desktop_src" >&2 + exit 1 +fi +if [[ ! -f "$icon_src" ]]; then + echo "Icon file not found: $icon_src" >&2 + exit 1 +fi + +install -Dm644 "$app_desktop_src" "$app_desktop_dst" +install -d "$icon_dst_dir" +install -m644 "$icon_src" "$icon_dst" + +# Refresh desktop and icon caches if tools are present +if command -v update-desktop-database >/dev/null 2>&1; then + update-desktop-database /usr/share/applications || true +fi +if command -v gtk-update-icon-cache >/dev/null 2>&1; then + gtk-update-icon-cache -q /usr/share/icons/hicolor || true +fi + +echo "Installed:" +echo " $app_desktop_dst" +echo " $icon_dst" diff --git a/packaging/linux/uninstall_desktop.sh b/packaging/linux/uninstall_desktop.sh new file mode 100755 index 0000000..fc86668 --- /dev/null +++ b/packaging/linux/uninstall_desktop.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Minimal uninstaller for PCAPpuller desktop integration on Linux +set -euo pipefail + +if [[ $EUID -ne 0 ]]; then + echo "This script requires root. Re-running with sudo..." + exec sudo "$0" "$@" +fi + +app_desktop_dst="/usr/share/applications/PCAPpuller.desktop" +icon_dst="/usr/share/icons/hicolor/512x512/apps/PCAPpuller.png" + +rm -f "$app_desktop_dst" "$icon_dst" + +# Refresh caches if tools are present +if command -v update-desktop-database >/dev/null 2>&1; then + update-desktop-database /usr/share/applications || true +fi +if command -v gtk-update-icon-cache >/dev/null 2>&1; then + gtk-update-icon-cache -q /usr/share/icons/hicolor || true +fi + +echo "Removed:" +echo " $app_desktop_dst" +echo " $icon_dst" diff --git a/packaging/macos/build_pyinstaller.sh b/packaging/macos/build_pyinstaller.sh new file mode 100755 index 0000000..872e83a --- /dev/null +++ b/packaging/macos/build_pyinstaller.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Build a portable macOS app using PyInstaller +# Requires: python3 -m pip install pyinstaller +set -euo pipefail + +repo_root=$(cd "$(dirname "$0")"/../.. && pwd) +cd "$repo_root" + +python3 -m pip install --upgrade pyinstaller >/dev/null + +# Use the existing GUI script as the entrypoint +pyinstaller \ + --name "PCAPpuller" \ + --windowed \ + --icon assets/PCAPpuller.icns \ + --noconfirm \ + gui_pcappuller.py + +echo "Built app at: dist/PCAPpuller.app" diff --git a/packaging/windows/build_pyinstaller.ps1 b/packaging/windows/build_pyinstaller.ps1 new file mode 100644 index 0000000..2ccd87c --- /dev/null +++ b/packaging/windows/build_pyinstaller.ps1 @@ -0,0 +1,21 @@ +# Build a portable Windows app using PyInstaller +# Run in PowerShell: pwsh -File packaging\windows\build_pyinstaller.ps1 + +$ErrorActionPreference = "Stop" + +# Ensure pyinstaller is available +python -m pip install --upgrade pyinstaller | Out-Null + +# Change to repo root +$repoRoot = Split-Path -Parent (Split-Path -Parent $PSScriptRoot) +Set-Location $repoRoot + +# Build +pyinstaller ` + --name "PCAPpuller" ` + --windowed ` + --icon assets/PCAPpuller.ico ` + --noconfirm ` + gui_pcappuller.py + +Write-Host "Built app at: dist/PCAPpuller.exe" diff --git a/pcappuller-gui.desktop b/pcappuller-gui.desktop index c4bff0b..17895a0 100644 --- a/pcappuller-gui.desktop +++ b/pcappuller-gui.desktop @@ -1,11 +1,11 @@ [Desktop Entry] Version=1.0 Type=Application -Name=PCAPpuller GUI +Name=PCAPpuller GenericName=PCAP Analysis Tool Comment=Fast PCAP window selector, merger, trimmer, and cleaner -Exec=pcappuller-gui -Icon=pcappuller +Exec=PCAPpuller +Icon=PCAPpuller Terminal=false Categories=Network;System; Keywords=pcap;wireshark;network;packet;analysis; diff --git a/pcappuller/gui.py b/pcappuller/gui.py index f48bdf0..de68a1b 100644 --- a/pcappuller/gui.py +++ b/pcappuller/gui.py @@ -175,8 +175,8 @@ def _open_pattern_settings(parent: "sg.Window", current_include: list, current_e win.close() return None elif ev == "Reset to Defaults": - win["-INCLUDE-"].update("*.chunk_*.pcap") - win["-EXCLUDE-"].update("*.sorted.pcap\n*.s256.pcap") + win["-INCLUDE-"].update("*.pcap\n*.pcapng") + win["-EXCLUDE-"].update("") elif ev == "Save": include_text = vals.get("-INCLUDE-", "").strip() exclude_text = vals.get("-EXCLUDE-", "").strip() @@ -212,10 +212,10 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over desired_end = dt.datetime.combine(start.date(), dt.time(23, 59, 59, 999999)) window_obj = Window(start=start, end=desired_end) - roots = [Path(values["-ROOT-"])] if values["-ROOT-"] else [] + roots = [Path(values["-SOURCE-"])] if values.get("-SOURCE-") else [] if not roots: - raise PCAPPullerError("Root directory is required") + raise PCAPPullerError("Source directory is required") # Create workspace in temp directory workspace_name = f"pcappuller_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -225,8 +225,8 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over workflow = ThreeStepWorkflow(workspace_dir) # Get pattern settings from values - include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.chunk_*.pcap"]) - exclude_patterns = values.get("-EXCLUDE-PATTERNS-", ["*.sorted.pcap", "*.s256.pcap"]) + include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.pcap", "*.pcapng"]) + exclude_patterns = values.get("-EXCLUDE-PATTERNS-", []) state = workflow.initialize_workflow( root_dirs=roots, @@ -262,6 +262,15 @@ def progress_callback(phase: str, current: int, total: int): run_step3 = values.get("-RUN-STEP3-", False) try: + # Verbose: announce core settings + print("Configuration:") + print(f" Source: {roots[0]}") + print(f" Window: {window_obj.start} .. {window_obj.end}") + print(f" Selection: manifest (Step 1 uses mtime+pattern only)") + print(f" Output: {values.get('-OUT-', '(workspace default)')}") + print(f" Tmpdir: {values.get('-TMPDIR-', '(workspace tmp)')}") + print(f" Effective settings: workers={eff_settings['workers']}, batch={eff_settings['batch']}, slop={eff_settings['slop']}, trim_per_batch={eff_settings['trim_per_batch']}, precise_in_step2={eff_settings['precise_filter']}") + # Step 1: Select and Move if run_step1: window.write_event_value("-STEP-UPDATE-", ("Step 1: Selecting files...", 1)) @@ -270,7 +279,7 @@ def progress_callback(phase: str, current: int, total: int): state = workflow.step1_select_and_move( state=state, slop_min=eff_settings["slop"], - precise_filter=eff_settings["precise_filter"], + precise_filter=False, # moved to Step 2 workers=workers, cache=cache, dry_run=values.get("-DRYRUN-", False), @@ -286,12 +295,20 @@ def progress_callback(phase: str, current: int, total: int): return if not state.selected_files: + print("Step 1 selected 0 files.") window.write_event_value("-DONE-", "No files selected in Step 1") return + else: + total_size_mb = sum(f.stat().st_size for f in state.selected_files) / (1024*1024) + print(f"Step 1 selected {len(state.selected_files)} files ({total_size_mb:.1f} MB)") # Step 2: Process if run_step2: window.write_event_value("-STEP-UPDATE-", ("Step 2: Processing files...", 2)) + print("Step 2: Applying precise filter and processing...") + print(f" Batch size: {eff_settings['batch']} | Trim per batch: {eff_settings['trim_per_batch']}") + if values.get("-DFILTER-"): + print(f" Display filter: {values['-DFILTER-']}") state = workflow.step2_process( state=state, @@ -300,7 +317,12 @@ def progress_callback(phase: str, current: int, total: int): display_filter=values["-DFILTER-"] or None, trim_per_batch=eff_settings["trim_per_batch"], progress_callback=progress_callback, - verbose=values.get("-VERBOSE-", False) + verbose=values.get("-VERBOSE-", False), + out_path=(Path(values["-OUT-"]) if values.get("-OUT-") else None), + tmpdir_parent=(Path(values["-TMPDIR-"]) if values.get("-TMPDIR-") else None), + precise_filter=eff_settings["precise_filter"], + workers=parse_workers(eff_settings["workers"], 1000), + cache=cache, ) # Step 3: Clean @@ -322,13 +344,15 @@ def progress_callback(phase: str, current: int, total: int): if values.get("-GZIP-"): clean_options["gzip"] = True - if clean_options: - state = workflow.step3_clean( - state=state, - options=clean_options, - progress_callback=progress_callback, - verbose=values.get("-VERBOSE-", False) - ) + # If no options were specified but Step 3 is enabled, apply sensible defaults + if not clean_options: + clean_options = {"snaplen": 256, "gzip": True} + state = workflow.step3_clean( + state=state, + options=clean_options, + progress_callback=progress_callback, + verbose=values.get("-VERBOSE-", False) + ) # Determine final output final_file = state.cleaned_file or state.processed_file @@ -353,8 +377,8 @@ def main(): sg.theme("SystemDefault") # Default patterns - default_include = ["*.chunk_*.pcap"] - default_exclude = ["*.sorted.pcap", "*.s256.pcap"] + default_include = ["*.pcap", "*.pcapng"] + default_exclude = [] # Create layout with three-step workflow layout = [ @@ -362,12 +386,14 @@ def main(): [sg.HSeparator()], # Basic settings - [sg.Text("Root Directory"), sg.Input(key="-ROOT-", expand_x=True), sg.FolderBrowse()], + [sg.Text("Source Directory"), sg.Input(key="-SOURCE-", expand_x=True), sg.FolderBrowse()], [sg.Text("Start Time (YYYY-MM-DD HH:MM:SS)"), sg.Input(key="-START-", expand_x=True)], [sg.Text("Duration"), sg.Text("Hours"), sg.Slider(range=(0, 24), orientation="h", key="-HOURS-", default_value=0, size=(20,15), enable_events=True), sg.Text("Minutes"), sg.Slider(range=(0, 59), orientation="h", key="-MINS-", default_value=15, size=(20,15), enable_events=True), sg.Button("All Day", key="-ALLDAY-")], + [sg.Text("Output File"), sg.Input(key="-OUT-", expand_x=True), sg.FileSaveAs()], + [sg.Text("Temporary Directory"), sg.Input(key="-TMPDIR-", expand_x=True), sg.FolderBrowse()], [sg.HSeparator()], @@ -388,7 +414,7 @@ def main(): ], expand_x=True)], [sg.Frame("Step 3: Cleaning Options", [ - [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space"), + [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space (leave blank to keep full payload)"), sg.Checkbox("Convert to PCAP", key="-CLEAN-CONVERT-", tooltip="Force conversion to pcap format"), sg.Checkbox("Gzip Compress", key="-GZIP-", tooltip="Compress final output")], ], expand_x=True)], @@ -416,6 +442,25 @@ def main(): ] window = sg.Window("PCAPpuller", layout, size=(900, 800)) + # Try to set a custom window icon if assets exist + try: + here = Path(__file__).resolve() + assets_dir = None + # Search upwards for a top-level 'assets' directory (repo layout) + for p in [here.parent, *here.parents]: + cand = p / "assets" + if cand.exists(): + assets_dir = cand + break + if assets_dir is None: + assets_dir = here.parent.parent / "assets" + for icon_name in ["PCAPpuller.ico", "PCAPpuller.png", "PCAPpuller.icns"]: + ip = assets_dir / icon_name + if ip.exists(): + window.set_icon(str(ip)) + break + except Exception: + pass stop_flag = {"stop": False} worker = None adv_overrides: dict | None = None @@ -452,8 +497,8 @@ def _update_reco_label(): if event == "Run Workflow" and worker is None: # Validation - if not values.get("-ROOT-"): - sg.popup_error("Root directory is required") + if not values.get("-SOURCE-"): + sg.popup_error("Source directory is required") continue if not values.get("-START-"): sg.popup_error("Start time is required") @@ -536,11 +581,21 @@ def _update_reco_label(): elif event == "-PROGRESS-": phase, cur, tot = values[event] + friendly = { + "pattern-filter": "Filtering by pattern", + "precise": "Precise filtering", + "merge-batches": "Merging batches", + "trim-batches": "Trimming batches", + "trim": "Trimming final", + "display-filter": "Applying display filter", + "gzip": "Compressing", + } if str(phase).startswith("scan"): window["-STATUS-"].update(f"Scanning... {cur} files visited") window["-PB-"].update(cur % 100) else: - window["-STATUS-"].update(f"{phase} {cur}/{tot}") + label = friendly.get(str(phase), str(phase)) + window["-STATUS-"].update(f"{label}: {cur}/{tot}") pct = 0 if tot <= 0 else int((cur / tot) * 100) window["-PB-"].update(pct) print(f"{phase}: {cur}/{tot}") diff --git a/pcappuller/workflow.py b/pcappuller/workflow.py index 98d0dcd..73f2225 100644 --- a/pcappuller/workflow.py +++ b/pcappuller/workflow.py @@ -3,6 +3,7 @@ import json import logging import shutil +import os from dataclasses import dataclass, asdict from pathlib import Path from typing import List, Optional, Dict, Any, Callable @@ -105,11 +106,12 @@ def step1_select_and_move( self, state: WorkflowState, slop_min: int = 120, - precise_filter: bool = True, + precise_filter: bool = False, workers: Optional[int] = None, cache: Optional[CapinfosCache] = None, dry_run: bool = False, - progress_callback: Optional[Callable[[str, int, int], None]] = None + progress_callback: Optional[Callable[[str, int, int], None]] = None, + selection_mode: str = "manifest" # one of: 'manifest', 'symlink' ) -> WorkflowState: """ Step 1: Select and move PCAP files based on time window and patterns. @@ -124,8 +126,9 @@ def step1_select_and_move( logging.info("Step 1 already complete, skipping...") return state - # Create selected directory - if not dry_run: + # Create selected directory only if we will materialize files + materialize = selection_mode == "symlink" + if not dry_run and materialize: self.selected_dir.mkdir(parents=True, exist_ok=True) # Find candidates using existing logic @@ -137,12 +140,11 @@ def step1_select_and_move( if progress_callback: progress_callback("pattern-filter", len(filtered_candidates), len(all_candidates)) - # Apply precise filtering if requested + # Step 1 is now mtime/pattern only by default; precise filtering moved to Step 2 if precise_filter and filtered_candidates: if workers is None: from .core import parse_workers workers = parse_workers("auto", len(filtered_candidates)) - final_candidates = precise_filter_parallel( filtered_candidates, state.window, workers, 0, progress_callback, cache ) @@ -156,30 +158,40 @@ def step1_select_and_move( logging.info(f" After precise filtering: {len(final_candidates)}") return state - # Copy files to workspace - copied_files = [] - for i, src_file in enumerate(final_candidates): - dst_file = self.selected_dir / src_file.name - # Handle name conflicts by appending a counter - counter = 1 - while dst_file.exists(): - stem = src_file.stem - suffix = src_file.suffix - dst_file = self.selected_dir / f"{stem}_{counter:03d}{suffix}" - counter += 1 - - shutil.copy2(src_file, dst_file) - copied_files.append(dst_file) - - if progress_callback: - progress_callback("copy-files", i + 1, len(final_candidates)) + selected_list: List[Path] = [] + if selection_mode == "manifest": + # Do not materialize files; just record original paths + selected_list = list(final_candidates) + else: + # Materialize files via symlink only + for i, src_file in enumerate(final_candidates): + dst_file = self.selected_dir / src_file.name + # Handle name conflicts by appending a counter + counter = 1 + while dst_file.exists(): + stem = src_file.stem + suffix = src_file.suffix + dst_file = self.selected_dir / f"{stem}_{counter:03d}{suffix}" + counter += 1 + try: + os.symlink(src_file, dst_file) + selected_list.append(dst_file) + except Exception as e: + logging.warning("Failed to symlink %s -> %s (%s); recording manifest path instead", src_file, dst_file, e) + selected_list.append(src_file) + + if progress_callback: + progress_callback("copy-files", i + 1, len(final_candidates)) # Update state - state.selected_files = copied_files + state.selected_files = selected_list state.step1_complete = True state.save(self.state_file) - logging.info(f"Step 1 complete: Selected and copied {len(copied_files)} files to {self.selected_dir}") + if selection_mode == "manifest": + logging.info(f"Step 1 complete: Selected {len(selected_list)} files (manifest-only, no data copied)") + else: + logging.info(f"Step 1 complete: Materialized {len(selected_list)} files to {self.selected_dir} via {selection_mode}") return state def step2_process( @@ -190,7 +202,12 @@ def step2_process( display_filter: Optional[str] = None, trim_per_batch: Optional[bool] = None, progress_callback: Optional[Callable[[str, int, int], None]] = None, - verbose: bool = False + verbose: bool = False, + out_path: Optional[Path] = None, + tmpdir_parent: Optional[Path] = None, + precise_filter: bool = True, + workers: Optional[int] = None, + cache: Optional[CapinfosCache] = None, ) -> WorkflowState: """ Step 2: Process selected files using existing merge/trim logic. @@ -213,25 +230,40 @@ def step2_process( # Create processed directory self.processed_dir.mkdir(parents=True, exist_ok=True) - # Determine output filename + # Determine output filename or use provided path timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S") - output_file = self.processed_dir / f"merged_{timestamp}.{out_format}" + default_output = self.processed_dir / f"merged_{timestamp}.{out_format}" + output_file = out_path if out_path else default_output # Auto-determine trim_per_batch if not specified if trim_per_batch is None: duration_minutes = int((state.window.end - state.window.start).total_seconds() // 60) trim_per_batch = duration_minutes > 60 - # Ensure tmp directory exists - tmp_dir = self.workspace_dir / "tmp" - tmp_dir.mkdir(parents=True, exist_ok=True) + # Ensure tmp directory exists (use override if provided) + if tmpdir_parent is None: + tmp_dir = self.workspace_dir / "tmp" + tmp_dir.mkdir(parents=True, exist_ok=True) + tmp_parent = tmp_dir + else: + Path(tmpdir_parent).mkdir(parents=True, exist_ok=True) + tmp_parent = Path(tmpdir_parent) + # Optionally apply precise filtering now (moved from Step 1) + candidates_for_merge = list(state.selected_files) + if precise_filter and candidates_for_merge: + if workers is None: + from .core import parse_workers + workers = parse_workers("auto", len(candidates_for_merge)) + candidates_for_merge = precise_filter_parallel( + candidates_for_merge, state.window, workers, 0, progress_callback, cache + ) # Use existing build_output logic result_file = build_output( - candidates=state.selected_files, + candidates=candidates_for_merge, window=state.window, out_path=output_file, - tmpdir_parent=tmp_dir, + tmpdir_parent=tmp_parent, batch_size=batch_size, out_format=out_format, display_filter=display_filter, diff --git a/pyproject.toml b/pyproject.toml index 69a959e..8acaef7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pcappuller" -version = "0.2.3" +version = "0.3.1" description = "A fast PCAP window selector, merger, trimmer, and cleaner" readme = "README.md" authors = [ @@ -50,6 +50,7 @@ datetime = ["python-dateutil"] [project.scripts] pcap-puller = "pcappuller.cli:main" pcap-puller-gui = "pcappuller.gui:main" +PCAPpuller = "pcappuller.gui:main" pcap-clean = "pcappuller.clean_cli:main" [tool.setuptools]