diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..39e36cb
Binary files /dev/null and b/.DS_Store differ
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 01cfea9..e0b98bb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,22 +20,10 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install ruff mypy
- # Install PySimpleGUI from their private index for gui dependencies
- pip install --extra-index-url https://PySimpleGUI.net/install PySimpleGUI || true
- pip install -e .[datetime]
- # Install GUI dependencies only if PySimpleGUI succeeded
- pip install -e .[gui] || echo "Skipping GUI dependencies"
-
- - name: Python syntax check
- run: |
- python -m py_compile PCAPpuller.py
- python -m py_compile gui_pcappuller.py
- python -m compileall pcappuller/
+ pip install -e .[gui,datetime]
- name: Ruff (E,F only)
run: ruff check --select E,F --ignore E501 .
- name: Mypy
- run: |
- # Run mypy with ignore-missing-imports for potential GUI dependency issues
- mypy --ignore-missing-imports PCAPpuller.py pcappuller gui_pcappuller.py
+ run: mypy PCAPpuller.py pcappuller gui_pcappuller.py
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b44f681..d7a5b6b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -31,54 +31,20 @@ jobs:
# PySimpleGUI requires private index
pip install --extra-index-url https://PySimpleGUI.net/install PySimpleGUI
- - name: Prepare icons
- shell: bash
- run: |
- set -euxo pipefail
- mkdir -p artifacts/icons
- ICON_SRC="assets/icons/pcappuller.png"
- if [ ! -f "$ICON_SRC" ] && [ -f assets/icons/pcap.png ]; then ICON_SRC="assets/icons/pcap.png"; fi
- if [ -f "$ICON_SRC" ]; then
- # Linux does not embed icon, but Windows/macOS will use .ico/.icns
- if [ "$RUNNER_OS" = "Windows" ]; then
- echo Using ImageMagick to create .ico from $ICON_SRC
- magick convert "$ICON_SRC" -resize 256x256 artifacts/icons/pcappuller.ico
- elif [ "$RUNNER_OS" = "macOS" ]; then
- echo Building .icns from .iconset using $ICON_SRC
- ICONSET=artifacts/icons/pcappuller.iconset
- mkdir -p "$ICONSET"
- sips -z 16 16 "$ICON_SRC" --out "$ICONSET/icon_16x16.png"
- sips -z 32 32 "$ICON_SRC" --out "$ICONSET/icon_16x16@2x.png"
- sips -z 32 32 "$ICON_SRC" --out "$ICONSET/icon_32x32.png"
- sips -z 64 64 "$ICON_SRC" --out "$ICONSET/icon_32x32@2x.png"
- sips -z 128 128 "$ICON_SRC" --out "$ICONSET/icon_128x128.png"
- sips -z 256 256 "$ICON_SRC" --out "$ICONSET/icon_128x128@2x.png"
- sips -z 256 256 "$ICON_SRC" --out "$ICONSET/icon_256x256.png"
- sips -z 512 512 "$ICON_SRC" --out "$ICONSET/icon_256x256@2x.png"
- sips -z 512 512 "$ICON_SRC" --out "$ICONSET/icon_512x512.png"
- cp "$ICON_SRC" "$ICONSET/icon_512x512@2x.png" || true
- iconutil -c icns "$ICONSET" -o artifacts/icons/pcappuller.icns
- fi
- fi
-
- name: Build GUI binary
shell: bash
run: |
set -euxo pipefail
mkdir -p release
if [ "$RUNNER_OS" = "Windows" ]; then
- if [ -f artifacts/icons/pcappuller.ico ]; then ICON="--icon artifacts/icons/pcappuller.ico"; else ICON=""; fi
- pyinstaller --onefile --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py
+ pyinstaller --onefile --windowed --name PCAPpullerGUI gui_pcappuller.py
mv dist/PCAPpullerGUI.exe "release/PCAPpullerGUI-windows.exe"
elif [ "$RUNNER_OS" = "macOS" ]; then
# Build a proper .app so Finder runs it correctly
- if [ -f artifacts/icons/pcappuller.icns ]; then ICON="--icon artifacts/icons/pcappuller.icns"; else ICON=""; fi
- pyinstaller --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py
+ pyinstaller --windowed --name PCAPpullerGUI gui_pcappuller.py
(cd dist && zip -r ../release/PCAPpullerGUI-macos.zip PCAPpullerGUI.app)
else
- # Linux: try to use icon if available
- if [ -f assets/icons/pcappuller.png ]; then ICON="--icon assets/icons/pcappuller.png"; else ICON=""; fi
- pyinstaller --onefile --windowed $ICON --name PCAPpullerGUI gui_pcappuller.py
+ pyinstaller --onefile --windowed --name PCAPpullerGUI gui_pcappuller.py
mv dist/PCAPpullerGUI "release/PCAPpullerGUI-linux"
fi
@@ -92,53 +58,23 @@ jobs:
sudo gem install --no-document fpm
VERSION=$(grep -E '^version\s*=\s*"[0-9]+\.[0-9]+\.[0-9]+"' pyproject.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)"/\1/')
STAGE=$(mktemp -d)
-
- # Install binary
mkdir -p "$STAGE/usr/local/bin"
install -m 0755 release/PCAPpullerGUI-linux "$STAGE/usr/local/bin/pcappuller-gui"
-
- # Install desktop file
- mkdir -p "$STAGE/usr/share/applications"
- install -m 0644 pcappuller-gui.desktop "$STAGE/usr/share/applications/"
-
- # Install icon
- if [ -f assets/icons/pcappuller.png ]; then
- mkdir -p "$STAGE/usr/share/icons/hicolor/256x256/apps"
- install -m 0644 assets/icons/pcappuller.png "$STAGE/usr/share/icons/hicolor/256x256/apps/pcappuller.png"
- # Also install in standard pixmaps location
- mkdir -p "$STAGE/usr/share/pixmaps"
- install -m 0644 assets/icons/pcappuller.png "$STAGE/usr/share/pixmaps/pcappuller.png"
- fi
-
NAME=pcappuller-gui
- DESC="PCAPpuller GUI: fast PCAP window selector, merger, trimmer, and cleaner"
+ DESC="PCAPpuller GUI: fast PCAP window selector, merger, trimmer"
URL="https://github.com/ktalons/daPCAPpuller"
LICENSE=MIT
MAINTAINER="Kyle Versluis"
-
- # Create post-install script
- echo '#!/bin/bash' > postinst.sh
- echo 'if command -v update-desktop-database >/dev/null 2>&1; then' >> postinst.sh
- echo ' update-desktop-database /usr/share/applications' >> postinst.sh
- echo 'fi' >> postinst.sh
- echo 'if command -v gtk-update-icon-cache >/dev/null 2>&1; then' >> postinst.sh
- echo ' gtk-update-icon-cache -f -t /usr/share/icons/hicolor' >> postinst.sh
- echo 'fi' >> postinst.sh
- chmod +x postinst.sh
-
- # deb with post-install script
+ # deb
fpm -s dir -t deb -n "$NAME" -v "$VERSION" \
--license "$LICENSE" --url "$URL" --maintainer "$MAINTAINER" \
--description "$DESC" \
- --after-install postinst.sh \
-C "$STAGE" --prefix / \
-p "release/${NAME}_${VERSION}_amd64.deb"
-
- # rpm with post-install script
+ # rpm
fpm -s dir -t rpm -n "$NAME" -v "$VERSION" \
--license "$LICENSE" --url "$URL" --maintainer "$MAINTAINER" \
--description "$DESC" \
- --after-install postinst.sh \
-C "$STAGE" --prefix / \
-p "release/${NAME}-${VERSION}-1.x86_64.rpm"
diff --git a/.gitignore b/.gitignore
index 04cb2a3..2d1a8ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,25 +1,3 @@
-# Python build and env
-__pycache__/
-*.pyc
-.venv/
-
-# PyInstaller
-/build/
-/dist/
-/*.spec
-
-# Packaging outputs
-packaging/artifacts/
-.debstage/
-
-# OS/editor
-.DS_Store
-*.swp
-*.swo
-
-# Logs
-*.log
-
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
diff --git a/PCAPpuller.py b/PCAPpuller.py
index 8eedc1d..ab9ff69 100755
--- a/PCAPpuller.py
+++ b/PCAPpuller.py
@@ -47,17 +47,19 @@ def parse_args():
ap.add_argument("--resume", action="store_true", help="Resume from existing workflow state")
ap.add_argument("--status", action="store_true", help="Show workflow status and exit")
- # Step 1: Selection parameters
+ # Step 1: File Selection
step1_group = ap.add_argument_group("Step 1: File Selection")
- step1_group.add_argument("--root", nargs="+", help="Root directories to search (required for new workflow)")
- step1_group.add_argument("--include-pattern", nargs="*", default=["*.chunk_*.pcap"],
- help="Include files matching these patterns")
- step1_group.add_argument("--exclude-pattern", nargs="*", default=["*.sorted.pcap", "*.s256.pcap"],
- help="Exclude files matching these patterns")
- step1_group.add_argument("--slop-min", type=int, default=120, help="Extra minutes around window for mtime prefilter")
- step1_group.add_argument("--precise-filter", action="store_true", default=True, help="Use capinfos for precise filtering")
- step1_group.add_argument("--no-precise-filter", action="store_false", dest="precise_filter",
- help="Skip precise filtering, use mtime only")
+ # New preferred flag
+ step1_group.add_argument("--source", nargs="+", help="Source directories to search (required for new workflow)")
+ # Backward-compat alias (hidden)
+ step1_group.add_argument("--root", nargs="+", dest="source", help=argparse.SUPPRESS)
+ step1_group.add_argument("--include-pattern", nargs="*", default=["*.pcap", "*.pcapng"],
+ help="Include files matching these patterns (default: *.pcap, *.pcapng)")
+ step1_group.add_argument("--exclude-pattern", nargs="*", default=[],
+ help="Exclude files matching these patterns (optional)")
+ step1_group.add_argument("--slop-min", type=int, default=None, help="Extra minutes around window for mtime prefilter (auto by default)")
+ step1_group.add_argument("--selection-mode", choices=["manifest", "symlink"], default="manifest",
+ help="How to materialize Step 1 selections. 'manifest' (default) avoids any data copy; 'symlink' creates symlinks in the workspace.")
# Time window (required for new workflow)
time_group = ap.add_argument_group("Time Window")
@@ -68,12 +70,14 @@ def parse_args():
# Step 2: Processing parameters
step2_group = ap.add_argument_group("Step 2: Processing")
- step2_group.add_argument("--batch-size", type=int, default=500, help="Files per merge batch")
+ step2_group.add_argument("--batch-size", type=int, default=None, help="Files per merge batch (auto by default)")
step2_group.add_argument("--out-format", choices=["pcap", "pcapng"], default="pcapng", help="Output format")
step2_group.add_argument("--display-filter", help="Wireshark display filter")
step2_group.add_argument("--trim-per-batch", action="store_true", help="Trim each batch before final merge")
step2_group.add_argument("--no-trim-per-batch", action="store_false", dest="trim_per_batch",
help="Only trim final merged file")
+ step2_group.add_argument("--out", help="Explicit output file path for Step 2 (e.g., /path/to/output.pcapng). If omitted, a timestamped file is written under the workspace.")
+ step2_group.add_argument("--no-precise-filter", action="store_true", help="Disable precise filtering in Step 2 (advanced)")
# Step 3: Cleaning parameters
step3_group = ap.add_argument_group("Step 3: Cleaning")
@@ -101,8 +105,8 @@ def parse_args():
if not args.resume:
# New workflow requires certain parameters
- if not args.root:
- ap.error("--root is required for new workflow (use --resume to continue existing)")
+ if not args.source:
+ ap.error("--source is required for new workflow (use --resume to continue existing)")
if not args.start:
ap.error("--start is required for new workflow")
if not args.minutes and not args.end:
@@ -135,9 +139,9 @@ def progress_callback(phase: str, current: int, total: int):
def run_step1(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> WorkflowState:
"""Execute Step 1: File Selection."""
- print("๐ Step 1: Selecting and copying PCAP files...")
+ print("๐ Step 1: Selecting PCAP files...")
- # Setup cache
+ # Setup cache (not strictly needed for Step 1 now, but keep for future-proofing)
cache = None
if not args.no_cache:
cache_path = default_cache_path() if args.cache == "auto" else Path(args.cache)
@@ -149,16 +153,37 @@ def run_step1(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl
progress_cb, cleanup_pb = setup_progress_callback("Step 1: File Selection")
try:
+ # Auto defaults: compute slop based on requested duration when not provided
+ try:
+ start, end = parse_start_and_window(args.start, args.minutes, args.end)
+ duration_minutes = int((end - start).total_seconds() // 60)
+ except Exception:
+ duration_minutes = 60
+ if args.slop_min is None:
+ if duration_minutes <= 15:
+ slop_min = 120
+ elif duration_minutes <= 60:
+ slop_min = 60
+ elif duration_minutes <= 240:
+ slop_min = 30
+ elif duration_minutes <= 720:
+ slop_min = 20
+ else:
+ slop_min = 15
+ else:
+ slop_min = args.slop_min
+
workers = parse_workers(args.workers, 1000) # Estimate for auto calculation
state = workflow.step1_select_and_move(
state=state,
- slop_min=args.slop_min,
- precise_filter=args.precise_filter,
+ slop_min=slop_min,
+ precise_filter=False, # moved to Step 2 by default
workers=workers,
cache=cache,
dry_run=args.dry_run,
- progress_callback=progress_cb
+ progress_callback=progress_cb,
+ selection_mode=args.selection_mode
)
if not args.dry_run:
@@ -186,14 +211,48 @@ def run_step2(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl
if args.trim_per_batch is not None:
trim_per_batch = args.trim_per_batch
+ # Auto defaults for Step 2 if not provided
+ # Determine duration from state
+ duration_minutes = int((state.window.end - state.window.start).total_seconds() // 60)
+ if args.batch_size is None:
+ if duration_minutes <= 15:
+ batch_size = 500
+ elif duration_minutes <= 60:
+ batch_size = 400
+ elif duration_minutes <= 240:
+ batch_size = 300
+ elif duration_minutes <= 720:
+ batch_size = 200
+ else:
+ batch_size = 150
+ else:
+ batch_size = int(args.batch_size)
+ if trim_per_batch is None:
+ trim_per_batch = duration_minutes > 60
+
+ # Setup cache for Step 2 precise filtering (default on)
+ cache = None
+ if not args.no_cache:
+ cache_path = default_cache_path() if args.cache == "auto" else Path(args.cache)
+ cache = CapinfosCache(cache_path)
+ if args.clear_cache:
+ cache.clear()
+
+ workers = parse_workers(args.workers, total_files=1000)
+
state = workflow.step2_process(
state=state,
- batch_size=args.batch_size,
+ batch_size=batch_size,
out_format=args.out_format,
display_filter=args.display_filter,
trim_per_batch=trim_per_batch,
progress_callback=progress_cb,
- verbose=args.verbose
+ verbose=args.verbose,
+ out_path=Path(args.out) if args.out else None,
+ tmpdir_parent=Path(args.tmpdir) if args.tmpdir else None,
+ precise_filter=not bool(getattr(args, "no_precise_filter", False)),
+ workers=workers,
+ cache=cache,
)
print("โ
Step 2 complete: Processed file saved")
@@ -219,12 +278,9 @@ def run_step3(workflow: ThreeStepWorkflow, state: WorkflowState, args) -> Workfl
if args.gzip:
clean_options['gzip'] = True
+ # If user did not specify options, apply safe defaults that do not truncate payloads
if not clean_options:
- print("โญ๏ธ Step 3: No cleaning options specified, skipping...")
- state.step3_complete = True
- state.cleaned_file = state.processed_file # Use processed file as final
- state.save(workflow.state_file)
- return state
+ clean_options = {"convert_to_pcap": True, "gzip": True}
print("๐งน Step 3: Cleaning output (removing headers/metadata)...")
@@ -305,7 +361,7 @@ def main():
window = Window(start=start, end=end)
# Initialize new workflow
- root_dirs = [Path(r) for r in args.root]
+ root_dirs = [Path(r) for r in args.source]
state = workflow.initialize_workflow(
root_dirs=root_dirs,
window=window,
diff --git a/README.md b/README.md
index 0e2c681..e654270 100644
--- a/README.md
+++ b/README.md
@@ -140,11 +140,19 @@ ___
### Three-Step Workflow (Recommended)
```bash
# Complete workflow - solves size inflation issues!
-pcap-puller --workspace /tmp/job --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 --snaplen 256 --gzip
+pcap-puller --workspace /tmp/job \
+ --source /mnt/dir \
+ --start "YYYY-MM-DD HH:MM:SS" \
+ --minutes 15 \
+ --selection-mode symlink \
+ --out /path/to/output.pcapng \
+ --tmpdir /path/on/large/volume/tmp \
+ --snaplen 256 \
+ --gzip
# Individual steps for more control
-pcap-puller --workspace /tmp/job --step 1 --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 # Select
-pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns" # Process
+pcap-puller --workspace /tmp/job --step 1 --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 15 --selection-mode manifest # Select (no data copy)
+pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns" --out /path/to/output.pcapng --tmpdir /big/tmp # Process
pcap-puller --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip # Clean
# Check status anytime
@@ -172,10 +180,10 @@ pcap-puller --workspace /tmp/job --status
### Direct (without install)
```bash
# New three-step workflow (recommended)
-python3 PCAPpuller.py --workspace /tmp/job --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 --snaplen 256 --gzip
+python3 PCAPpuller.py --workspace /tmp/job --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30 --snaplen 256 --gzip
# Individual steps
-python3 PCAPpuller.py --workspace /tmp/job --step 1 --root /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30
+python3 PCAPpuller.py --workspace /tmp/job --step 1 --source /mnt/dir --start "YYYY-MM-DD HH:MM:SS" --minutes 30
python3 PCAPpuller.py --workspace /tmp/job --step 2 --resume --display-filter "dns"
python3 PCAPpuller.py --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip
@@ -186,7 +194,7 @@ ___
## Arguments ๐ฅ
### Required โ
> `--workspace ` โ workspace directory for three-step workflow (NEW).
-> `--root ` โ one or more directories to search.
+> `--source ` โ one or more directories to search. (`--root` is still accepted as an alias.)
> `--start "YYYY-MM-DD HH:MM:SS"` โ window start (local time).
> `--minutes <1โ1440>` โ duration; must stay within a single calendar day. Or use `--end` with same-day end time.
### Optional โ
@@ -196,9 +204,10 @@ ___
> `--resume` โ resume from existing workflow state.
> `--status` โ show workflow status and exit.
-**Pattern Filtering (Step 1):**
-> `--include-pattern [PATTERNS...]` โ include files matching patterns (default: *.chunk_*.pcap).
-> `--exclude-pattern [PATTERNS...]` โ exclude files matching patterns (default: *.sorted.pcap, *.s256.pcap).
+**Pattern Filtering (Step 1):
+> `--include-pattern [PATTERNS...]` โ include files matching patterns (default: *.pcap, *.pcapng).
+> `--exclude-pattern [PATTERNS...]` โ optional excludes (none by default).
+> `--selection-mode {manifest|symlink}` โ how to materialize selections. Default: manifest. Use `symlink` to browse selections in a workspace folder.
**Processing Options:**
> `--end ` โ end time instead of `--minutes` (must be same day as `--start`).
@@ -208,6 +217,8 @@ ___
> `--workers ` โ concurrency for precise filter (default: auto โ 2รCPU, gently capped).
> `--display-filter ""` โ post-trim filter via tshark (e.g., "dns", "tcp.port==443").
> `--out-format {pcap|pcapng}` โ final capture format (default: pcapng).
+> `--out ` โ explicit output path for Step 2 (otherwise written under workspace).
+> `--tmpdir ` โ directory for temporary files during Step 2 (overrides system/workspace tmp).
**Cleaning Options (Step 3):**
> `--snaplen ` โ truncate packets to N bytes.
@@ -242,6 +253,14 @@ ___
- Cleaning options in Step 3 can reduce final file size by 60-90%
- Check status anytime: `--workspace /path --status`
___
+## App Icons ๐ผ๏ธ
+- Place your icons under assets/
+ - macOS: PCAPpuller.icns
+ - Linux: PCAPpuller.png (e.g., install to /usr/share/icons/hicolor/512x512/apps/PCAPpuller.png)
+ - Windows: PCAPpuller.ico
+- During development, the GUI attempts to load assets/PCAPpuller.ico/.png/.icns and set the window icon automatically.
+- The Linux desktop entry now uses Name=PCAPpuller and Exec=PCAPpuller with Icon=PCAPpuller.
+
## Development ๐ ๏ธ
- Install tooling (in a virtualenv):
- python3 -m pip install -e .[datetime]
diff --git a/WORKFLOW_GUIDE.md b/WORKFLOW_GUIDE.md
index 95660a4..d35bb6d 100644
--- a/WORKFLOW_GUIDE.md
+++ b/WORKFLOW_GUIDE.md
@@ -13,29 +13,33 @@ PCAPpuller has been enhanced with a three-step workflow that solves the file siz
```bash
python3 PCAPpuller.py \
--workspace /tmp/my_workspace \
- --root /path/to/pcap/directory \
+ --source /path/to/pcap/directory \
--start "2025-08-26 16:00:00" \
--minutes 30 \
- --slop-min 100000 \
+ --selection-mode symlink \
+ --out /path/to/output.pcapng \
+ --tmpdir /path/on/large/volume/tmp \
--snaplen 128 \
--gzip
```
### Individual Steps
```bash
-# Step 1: Select files
+# Step 1: Select files (no data copy using a manifest)
python3 PCAPpuller.py \
--workspace /tmp/my_workspace \
- --root /path/to/pcap/directory \
+ --source /path/to/pcap/directory \
--start "2025-08-26 16:00:00" \
--minutes 30 \
- --slop-min 100000 \
+ --selection-mode manifest \
--step 1
-# Step 2: Process selected files
+# Step 2: Process selected files to an explicit path
python3 PCAPpuller.py \
--workspace /tmp/my_workspace \
--step 2 \
+ --out /path/to/output.pcapng \
+ --tmpdir /path/on/large/volume/tmp \
--resume
# Step 3: Clean output
@@ -56,15 +60,15 @@ python3 PCAPpuller.py \
### File Pattern Filtering (Step 1)
- **Include patterns**: Only process files matching these patterns
- - Default: `*.chunk_*.pcap` (includes chunk files)
-- **Exclude patterns**: Skip files matching these patterns
- - Default: `*.sorted.pcap`, `*.s256.pcap` (excludes large consolidated files)
+ - Default: `*.pcap`, `*.pcapng`
+- **Exclude patterns**: Optional. Add if needed.
+- **Selection mode**: `--selection-mode {manifest|symlink}` controls how Step 1 materializes files in the workspace. Default is `manifest`; use `symlink` to create a browsable workspace.
### Example: Custom Patterns
```bash
python3 PCAPpuller.py \
--workspace /tmp/workspace \
- --root /data/pcaps \
+--source /data/pcaps
--include-pattern "*.chunk_*.pcap" "capture_*.pcap" \
--exclude-pattern "*.backup.pcap" "*.temp.*" \
--start "2025-08-26 16:00:00" \
@@ -76,6 +80,8 @@ python3 PCAPpuller.py \
- **Output format**: pcap or pcapng (default: pcapng)
- **Display filter**: Wireshark filter to apply
- **Trim per batch**: Trim each batch vs. final file only
+- **Output path**: `--out /path/to/output.pcapng`
+- **Temporary directory**: `--tmpdir /path/on/large/volume/tmp`
### Cleaning Options (Step 3)
- **Snaplen**: Truncate packets to N bytes (saves space)
@@ -135,7 +141,7 @@ python3 PCAPpuller.py --workspace /tmp/workspace --step 2 --resume
# Process 6 hours of data with optimizations
python3 PCAPpuller.py \
--workspace /tmp/large_job \
- --root /data/capture_2025_08_26 \
+ --source /data/capture_2025_08_26 \
--start "2025-08-26 12:00:00" \
--minutes 360 \
--slop-min 100000 \
@@ -152,10 +158,10 @@ python3 PCAPpuller.py \
# See what files would be selected without processing
python3 PCAPpuller.py \
--workspace /tmp/preview \
- --root /data/pcaps \
+ --source /data/pcaps \
--start "2025-08-26 16:00:00" \
--minutes 60 \
- --step 1 \
+ --step 1
--dry-run
```
@@ -164,7 +170,7 @@ python3 PCAPpuller.py \
# Step 1: Select HTTP traffic files
python3 PCAPpuller.py \
--workspace /tmp/http_analysis \
- --root /data/network_logs \
+ --source /data/network_logs \
--include-pattern "*http*" "*web*" \
--start "2025-08-26 16:00:00" \
--minutes 120 \
@@ -228,7 +234,7 @@ python3 PCAPpuller_legacy.py \
# New workflow (solves size inflation)
python3 PCAPpuller.py \
--workspace /tmp/workspace \
- --root /data/pcaps \
+ --source /data/pcaps \
--start "2025-08-26 16:00:00" \
--minutes 60 \
--slop-min 100000 \
diff --git a/assets/PCAPpuller.icns b/assets/PCAPpuller.icns
new file mode 100644
index 0000000..d1bcd93
--- /dev/null
+++ b/assets/PCAPpuller.icns
@@ -0,0 +1,7 @@
+This is a placeholder for the PCAPpuller application icon (ICNS format).
+
+Replace this file with your real macOS .icns icon:
+- Name: PCAPpuller.icns
+- Place under assets/ for development window icon (best-effort on macOS)
+
+For distribution with a bundled app, configure your bundler (py2app, PyInstaller, Briefcase, etc.) to use this .icns file.
diff --git a/assets/PCAPpuller.ico b/assets/PCAPpuller.ico
new file mode 100644
index 0000000..f42bfd1
--- /dev/null
+++ b/assets/PCAPpuller.ico
@@ -0,0 +1,7 @@
+This is a placeholder for the PCAPpuller application icon (ICO format).
+
+Replace this file with your real Windows .ico icon:
+- Name: PCAPpuller.ico
+- Place under assets/ for development window icon on Windows
+
+For packaging MSI/EXE, configure your bundler to reference this .ico file.
diff --git a/assets/PCAPpuller.png b/assets/PCAPpuller.png
new file mode 100644
index 0000000..a430387
--- /dev/null
+++ b/assets/PCAPpuller.png
@@ -0,0 +1,10 @@
+This is a placeholder for the PCAPpuller application icon (PNG format).
+
+Replace this file with your real icon:
+- Recommended sizes: 512x512 and 256x256
+- Name: PCAPpuller.png
+
+Packaging notes:
+- Linux .desktop uses Icon=PCAPpuller; install this file to a theme path like:
+ /usr/share/icons/hicolor/512x512/apps/PCAPpuller.png
+- During development, the GUI will attempt to load assets/PCAPpuller.png automatically for the window icon.
diff --git a/docs/Analyst-Guide.md b/docs/Analyst-Guide.md
index 1e53313..e638241 100644
--- a/docs/Analyst-Guide.md
+++ b/docs/Analyst-Guide.md
@@ -1,4 +1,4 @@
-# PCAPpuller Analyst Guide v0.3.0
+# PCAPpuller Analyst Guide v0.3.1
A comprehensive guide for SOC analysts to extract, clean, and analyze network traffic efficiently using the new **three-step workflow** that solves file size inflation issues.
@@ -36,20 +36,20 @@ mergecap --version
**Solves file size inflation issues!**
**GUI**: Launch PCAPpuller GUI
-1. Set **Root** directories containing PCAPs
-2. Configure **Start time** and **Duration**
+1. Set **Source Directory** containing PCAPs
+2. Configure **Start time** and **Duration** (or use All Day)
3. Enable workflow steps: โ๏ธ Step 1, โ๏ธ Step 2, โ๏ธ Step 3 (optional)
-4. Click **Pattern Settings** to configure file filtering
+4. Click **Pattern Settings** to configure file filtering (defaults include only .pcap/.pcapng)
5. Optional: Apply **Display filter** (300+ filters available)
6. Click **Run Workflow**
**CLI**:
```bash
# Complete three-step workflow (recommended)
-pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:30:00" --minutes 15 --snaplen 256 --gzip
+pcap-puller --workspace /tmp/job --source /data --start "2025-10-10 14:30:00" --minutes 15 --snaplen 256 --gzip
# Individual steps for better control
-pcap-puller --workspace /tmp/job --step 1 --root /data --start "2025-10-10 14:30:00" --minutes 15 # Select & filter
+pcap-puller --workspace /tmp/job --step 1 --source /data --start "2025-10-10 14:30:00" --minutes 15 # Select & filter
pcap-puller --workspace /tmp/job --step 2 --resume --display-filter "dns or http" # Process
pcap-puller --workspace /tmp/job --step 3 --resume --snaplen 256 --gzip # Clean
@@ -60,10 +60,11 @@ pcap-puller --workspace /tmp/job --status
#### Legacy Mode (May Cause Size Inflation)
```bash
# Use legacy mode only if needed
-pcap-puller --root /data --start "2025-10-10 14:30:00" --minutes 15 --out incident.pcapng
+pcap-puller --source /data --start "2025-10-10 14:30:00" --minutes 15 --out incident.pcapng
```
-### B. PCAP Cleaning (Enhanced in v0.3.0)
+### B. PCAP Cleaning (Enhanced in v0.3.1)
+Note: If you leave Step 3 options blank in the 3-step workflow, defaults preserve payloads (convert to pcap when possible, gzip output).
**GUI**: Click **"Clean..."** button
1. Select input PCAP/PCAPNG file
2. Configure cleaning options:
@@ -90,8 +91,10 @@ pcap-clean --input capture.pcapng --start "2025-10-10 14:00:00" \
The new pattern filtering automatically prevents duplicate data processing.
**Default Settings** (work for most cases):
-- **Include**: `*.chunk_*.pcap` (individual time-based files)
-- **Exclude**: `*.sorted.pcap`, `*.s256.pcap` (large consolidated files)
+- **Include**: `*.pcap`, `*.pcapng`
+- **Exclude**: (none by default) โ add excludes only if needed
+
+Tip: If your environment uses chunked filenames (e.g., `*.chunk_*.pcap`), add them via Advanced Options or Pattern Settings.
**Custom Patterns** (GUI: Pattern Settings button):
```bash
@@ -161,7 +164,7 @@ The new pattern filtering automatically prevents duplicate data processing.
pcap-puller --root /data --start "2025-10-10 14:00:00" --minutes 30 --out result.pcap
# NEW (solves size inflation)
-pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:00:00" --minutes 30 --snaplen 256 --gzip
+pcap-puller --workspace /tmp/job --source /data --start "2025-10-10 14:00:00" --minutes 30 --snaplen 256 --gzip
```
## 5. Performance & Best Practices
@@ -186,7 +189,7 @@ pcap-puller --workspace /tmp/job --root /data --start "2025-10-10 14:00:00" --mi
### Audit & Validation
```bash
# NEW: Validate three-step workflow with dry-run
-pcap-puller --workspace /tmp/job --step 1 --root /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run
+pcap-puller --workspace /tmp/job --step 1 --source /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run
# Check workflow status
pcap-puller --workspace /tmp/job --status
@@ -243,12 +246,12 @@ pcap-puller --root /data --start "2025-10-10 14:00:00" --minutes 30 --dry-run --
### SOAR Integration
```bash
# NEW: Automated incident response with three-step workflow
-pcap-puller --workspace "/cases/$CASE_ID/workspace" --root "$PCAP_STORAGE" \
+pcap-puller --workspace "/cases/$CASE_ID/workspace" --source "$PCAP_STORAGE" \
--start "$INCIDENT_START" --minutes "$INCIDENT_DURATION" \
--display-filter "$IOC_FILTER" --snaplen 256 --gzip --verbose
# Legacy method (if needed)
-pcap-puller --root "$PCAP_STORAGE" --start "$INCIDENT_START" \
+pcap-puller --source "$PCAP_STORAGE" --start "$INCIDENT_START" \
--minutes "$INCIDENT_DURATION" --display-filter "$IOC_FILTER" \
--out "/cases/$CASE_ID/network_evidence.pcapng" --verbose
```
@@ -257,13 +260,13 @@ pcap-puller --root "$PCAP_STORAGE" --start "$INCIDENT_START" \
```bash
# NEW: Process multiple timeframes with three-step workflow
for time in "14:00:00" "14:30:00" "15:00:00"; do
- pcap-puller --workspace "/tmp/batch_${time//:}" --root /data \
+ pcap-puller --workspace "/tmp/batch_${time//:}" --source /data \
--start "2025-10-10 $time" --minutes 15 --snaplen 256 --gzip
done
# Legacy batch processing (if needed)
for time in "14:00:00" "14:30:00" "15:00:00"; do
- pcap-puller --root /data --start "2025-10-10 $time" --minutes 15 \
+ pcap-puller --source /data --start "2025-10-10 $time" --minutes 15 \
--out "analysis_${time//:}.pcapng"
done
```
diff --git a/gui_pcappuller.py b/gui_pcappuller.py
index 48a1136..add91fb 100755
--- a/gui_pcappuller.py
+++ b/gui_pcappuller.py
@@ -180,8 +180,8 @@ def _open_pattern_settings(parent: "sg.Window", current_include: list, current_e
win.close()
return None
elif ev == "Reset to Defaults":
- win["-INCLUDE-"].update("*.chunk_*.pcap")
- win["-EXCLUDE-"].update("*.sorted.pcap\n*.s256.pcap")
+ win["-INCLUDE-"].update("*.pcap\n*.pcapng")
+ win["-EXCLUDE-"].update("")
elif ev == "Save":
include_text = vals.get("-INCLUDE-", "").strip()
exclude_text = vals.get("-EXCLUDE-", "").strip()
@@ -217,10 +217,10 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over
desired_end = dt.datetime.combine(start.date(), dt.time(23, 59, 59, 999999))
window_obj = Window(start=start, end=desired_end)
- roots = [Path(values["-ROOT-"])] if values["-ROOT-"] else []
+ roots = [Path(values["-SOURCE-"])] if values.get("-SOURCE-") else []
if not roots:
- raise PCAPPullerError("Root directory is required")
+ raise PCAPPullerError("Source directory is required")
# Create workspace in temp directory
workspace_name = f"pcappuller_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}"
@@ -230,8 +230,8 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over
workflow = ThreeStepWorkflow(workspace_dir)
# Get pattern settings from values
- include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.chunk_*.pcap"])
- exclude_patterns = values.get("-EXCLUDE-PATTERNS-", ["*.sorted.pcap", "*.s256.pcap"])
+ include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.pcap", "*.pcapng"])
+ exclude_patterns = values.get("-EXCLUDE-PATTERNS-", [])
state = workflow.initialize_workflow(
root_dirs=roots,
@@ -267,6 +267,15 @@ def progress_callback(phase: str, current: int, total: int):
run_step3 = values.get("-RUN-STEP3-", False)
try:
+ # Verbose: announce core settings
+ print("Configuration:")
+ print(f" Source: {roots[0]}")
+ print(f" Window: {window_obj.start} .. {window_obj.end}")
+ print(f" Selection: manifest (Step 1 uses mtime+pattern only)")
+ print(f" Output: {values.get('-OUT-', '(workspace default)')}")
+ print(f" Tmpdir: {values.get('-TMPDIR-', '(workspace tmp)')}")
+ print(f" Effective settings: workers={eff_settings['workers']}, batch={eff_settings['batch']}, slop={eff_settings['slop']}, trim_per_batch={eff_settings['trim_per_batch']}, precise_in_step2={eff_settings['precise_filter']}")
+
# Step 1: Select and Move
if run_step1:
window.write_event_value("-STEP-UPDATE-", ("Step 1: Selecting files...", 1))
@@ -275,7 +284,7 @@ def progress_callback(phase: str, current: int, total: int):
state = workflow.step1_select_and_move(
state=state,
slop_min=eff_settings["slop"],
- precise_filter=eff_settings["precise_filter"],
+ precise_filter=False, # moved to Step 2
workers=workers,
cache=cache,
dry_run=values.get("-DRYRUN-", False),
@@ -291,12 +300,20 @@ def progress_callback(phase: str, current: int, total: int):
return
if not state.selected_files:
+ print("Step 1 selected 0 files.")
window.write_event_value("-DONE-", "No files selected in Step 1")
return
+ else:
+ total_size_mb = sum(f.stat().st_size for f in state.selected_files) / (1024*1024)
+ print(f"Step 1 selected {len(state.selected_files)} files ({total_size_mb:.1f} MB)")
# Step 2: Process
if run_step2:
window.write_event_value("-STEP-UPDATE-", ("Step 2: Processing files...", 2))
+ print("Step 2: Applying precise filter and processing...")
+ print(f" Batch size: {eff_settings['batch']} | Trim per batch: {eff_settings['trim_per_batch']}")
+ if values.get("-DFILTER-"):
+ print(f" Display filter: {values['-DFILTER-']}")
state = workflow.step2_process(
state=state,
@@ -305,7 +322,12 @@ def progress_callback(phase: str, current: int, total: int):
display_filter=values["-DFILTER-"] or None,
trim_per_batch=eff_settings["trim_per_batch"],
progress_callback=progress_callback,
- verbose=values.get("-VERBOSE-", False)
+ verbose=values.get("-VERBOSE-", False),
+ out_path=(Path(values["-OUT-"]) if values.get("-OUT-") else None),
+ tmpdir_parent=(Path(values["-TMPDIR-"]) if values.get("-TMPDIR-") else None),
+ precise_filter=eff_settings["precise_filter"],
+ workers=parse_workers(eff_settings["workers"], 1000),
+ cache=cache,
)
# Step 3: Clean
@@ -327,13 +349,15 @@ def progress_callback(phase: str, current: int, total: int):
if values.get("-GZIP-"):
clean_options["gzip"] = True
- if clean_options:
- state = workflow.step3_clean(
- state=state,
- options=clean_options,
- progress_callback=progress_callback,
- verbose=values.get("-VERBOSE-", False)
- )
+ # If no options were specified but Step 3 is enabled, apply sensible defaults
+ if not clean_options:
+ clean_options = {"snaplen": 256, "gzip": True}
+ state = workflow.step3_clean(
+ state=state,
+ options=clean_options,
+ progress_callback=progress_callback,
+ verbose=values.get("-VERBOSE-", False)
+ )
# Determine final output
final_file = state.cleaned_file or state.processed_file
@@ -357,8 +381,8 @@ def main():
sg.theme("SystemDefault")
# Default patterns
- default_include = ["*.chunk_*.pcap"]
- default_exclude = ["*.sorted.pcap", "*.s256.pcap"]
+ default_include = ["*.pcap", "*.pcapng"]
+ default_exclude = []
# Create layout with three-step workflow
layout = [
@@ -366,12 +390,14 @@ def main():
[sg.HSeparator()],
# Basic settings
- [sg.Text("Root Directory"), sg.Input(key="-ROOT-", expand_x=True), sg.FolderBrowse()],
+ [sg.Text("Source Directory"), sg.Input(key="-SOURCE-", expand_x=True), sg.FolderBrowse()],
[sg.Text("Start Time (YYYY-MM-DD HH:MM:SS)"), sg.Input(key="-START-", expand_x=True)],
[sg.Text("Duration"),
sg.Text("Hours"), sg.Slider(range=(0, 24), orientation="h", key="-HOURS-", default_value=0, size=(20,15), enable_events=True),
sg.Text("Minutes"), sg.Slider(range=(0, 59), orientation="h", key="-MINS-", default_value=15, size=(20,15), enable_events=True),
sg.Button("All Day", key="-ALLDAY-")],
+ [sg.Text("Output File"), sg.Input(key="-OUT-", expand_x=True), sg.FileSaveAs()],
+ [sg.Text("Temporary Directory"), sg.Input(key="-TMPDIR-", expand_x=True), sg.FolderBrowse()],
[sg.HSeparator()],
@@ -392,7 +418,7 @@ def main():
], expand_x=True)],
[sg.Frame("Step 3: Cleaning Options", [
- [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space"),
+ [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space (leave blank to keep full payload)"),
sg.Checkbox("Convert to PCAP", key="-CLEAN-CONVERT-", tooltip="Force conversion to pcap format"),
sg.Checkbox("Gzip Compress", key="-GZIP-", tooltip="Compress final output")],
], expand_x=True)],
@@ -420,6 +446,24 @@ def main():
]
window = sg.Window("PCAPpuller v2", layout, size=(900, 800))
+ # Try to set a custom window icon if assets exist
+ try:
+ here = Path(__file__).resolve()
+ assets_dir = None
+ for p in [here.parent, *here.parents]:
+ cand = p / "assets"
+ if cand.exists():
+ assets_dir = cand
+ break
+ if assets_dir is None:
+ assets_dir = here.parent / "assets"
+ for icon_name in ["PCAPpuller.ico", "PCAPpuller.png", "PCAPpuller.icns"]:
+ ip = assets_dir / icon_name
+ if ip.exists():
+ window.set_icon(str(ip))
+ break
+ except Exception:
+ pass
stop_flag = {"stop": False}
worker = None
adv_overrides: dict | None = None
@@ -456,8 +500,8 @@ def _update_reco_label():
if event == "Run Workflow" and worker is None:
# Validation
- if not values.get("-ROOT-"):
- sg.popup_error("Root directory is required")
+ if not values.get("-SOURCE-"):
+ sg.popup_error("Source directory is required")
continue
if not values.get("-START-"):
sg.popup_error("Start time is required")
@@ -540,11 +584,21 @@ def _update_reco_label():
elif event == "-PROGRESS-":
phase, cur, tot = values[event]
+ friendly = {
+ "pattern-filter": "Filtering by pattern",
+ "precise": "Precise filtering",
+ "merge-batches": "Merging batches",
+ "trim-batches": "Trimming batches",
+ "trim": "Trimming final",
+ "display-filter": "Applying display filter",
+ "gzip": "Compressing",
+ }
if str(phase).startswith("scan"):
window["-STATUS-"].update(f"Scanning... {cur} files visited")
window["-PB-"].update(cur % 100)
else:
- window["-STATUS-"].update(f"{phase} {cur}/{tot}")
+ label = friendly.get(str(phase), str(phase))
+ window["-STATUS-"].update(f"{label}: {cur}/{tot}")
pct = 0 if tot <= 0 else int((cur / tot) * 100)
window["-PB-"].update(pct)
print(f"{phase}: {cur}/{tot}")
diff --git a/packaging/linux/install_desktop.sh b/packaging/linux/install_desktop.sh
new file mode 100755
index 0000000..d169b16
--- /dev/null
+++ b/packaging/linux/install_desktop.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Minimal installer for PCAPpuller desktop integration on Linux
+# - Installs desktop entry and icon for system menus
+# - Requires root privileges (via sudo)
+set -euo pipefail
+
+repo_root=$(cd "$(dirname "$0")"/../.. && pwd)
+app_desktop_src="$repo_root/pcappuller-gui.desktop"
+icon_src="$repo_root/assets/PCAPpuller.png"
+
+app_desktop_dst="/usr/share/applications/PCAPpuller.desktop"
+icon_dst_dir="/usr/share/icons/hicolor/512x512/apps"
+icon_dst="$icon_dst_dir/PCAPpuller.png"
+
+if [[ $EUID -ne 0 ]]; then
+ echo "This script requires root. Re-running with sudo..."
+ exec sudo "$0" "$@"
+fi
+
+if [[ ! -f "$app_desktop_src" ]]; then
+ echo "Desktop file not found: $app_desktop_src" >&2
+ exit 1
+fi
+if [[ ! -f "$icon_src" ]]; then
+ echo "Icon file not found: $icon_src" >&2
+ exit 1
+fi
+
+install -Dm644 "$app_desktop_src" "$app_desktop_dst"
+install -d "$icon_dst_dir"
+install -m644 "$icon_src" "$icon_dst"
+
+# Refresh desktop and icon caches if tools are present
+if command -v update-desktop-database >/dev/null 2>&1; then
+ update-desktop-database /usr/share/applications || true
+fi
+if command -v gtk-update-icon-cache >/dev/null 2>&1; then
+ gtk-update-icon-cache -q /usr/share/icons/hicolor || true
+fi
+
+echo "Installed:"
+echo " $app_desktop_dst"
+echo " $icon_dst"
diff --git a/packaging/linux/uninstall_desktop.sh b/packaging/linux/uninstall_desktop.sh
new file mode 100755
index 0000000..fc86668
--- /dev/null
+++ b/packaging/linux/uninstall_desktop.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Minimal uninstaller for PCAPpuller desktop integration on Linux
+set -euo pipefail
+
+if [[ $EUID -ne 0 ]]; then
+ echo "This script requires root. Re-running with sudo..."
+ exec sudo "$0" "$@"
+fi
+
+app_desktop_dst="/usr/share/applications/PCAPpuller.desktop"
+icon_dst="/usr/share/icons/hicolor/512x512/apps/PCAPpuller.png"
+
+rm -f "$app_desktop_dst" "$icon_dst"
+
+# Refresh caches if tools are present
+if command -v update-desktop-database >/dev/null 2>&1; then
+ update-desktop-database /usr/share/applications || true
+fi
+if command -v gtk-update-icon-cache >/dev/null 2>&1; then
+ gtk-update-icon-cache -q /usr/share/icons/hicolor || true
+fi
+
+echo "Removed:"
+echo " $app_desktop_dst"
+echo " $icon_dst"
diff --git a/packaging/macos/build_pyinstaller.sh b/packaging/macos/build_pyinstaller.sh
new file mode 100755
index 0000000..872e83a
--- /dev/null
+++ b/packaging/macos/build_pyinstaller.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Build a portable macOS app using PyInstaller
+# Requires: python3 -m pip install pyinstaller
+set -euo pipefail
+
+repo_root=$(cd "$(dirname "$0")"/../.. && pwd)
+cd "$repo_root"
+
+python3 -m pip install --upgrade pyinstaller >/dev/null
+
+# Use the existing GUI script as the entrypoint
+pyinstaller \
+ --name "PCAPpuller" \
+ --windowed \
+ --icon assets/PCAPpuller.icns \
+ --noconfirm \
+ gui_pcappuller.py
+
+echo "Built app at: dist/PCAPpuller.app"
diff --git a/packaging/windows/build_pyinstaller.ps1 b/packaging/windows/build_pyinstaller.ps1
new file mode 100644
index 0000000..2ccd87c
--- /dev/null
+++ b/packaging/windows/build_pyinstaller.ps1
@@ -0,0 +1,21 @@
+# Build a portable Windows app using PyInstaller
+# Run in PowerShell: pwsh -File packaging\windows\build_pyinstaller.ps1
+
+$ErrorActionPreference = "Stop"
+
+# Ensure pyinstaller is available
+python -m pip install --upgrade pyinstaller | Out-Null
+
+# Change to repo root
+$repoRoot = Split-Path -Parent (Split-Path -Parent $PSScriptRoot)
+Set-Location $repoRoot
+
+# Build
+pyinstaller `
+ --name "PCAPpuller" `
+ --windowed `
+ --icon assets/PCAPpuller.ico `
+ --noconfirm `
+ gui_pcappuller.py
+
+Write-Host "Built app at: dist/PCAPpuller.exe"
diff --git a/pcappuller-gui.desktop b/pcappuller-gui.desktop
index c4bff0b..17895a0 100644
--- a/pcappuller-gui.desktop
+++ b/pcappuller-gui.desktop
@@ -1,11 +1,11 @@
[Desktop Entry]
Version=1.0
Type=Application
-Name=PCAPpuller GUI
+Name=PCAPpuller
GenericName=PCAP Analysis Tool
Comment=Fast PCAP window selector, merger, trimmer, and cleaner
-Exec=pcappuller-gui
-Icon=pcappuller
+Exec=PCAPpuller
+Icon=PCAPpuller
Terminal=false
Categories=Network;System;
Keywords=pcap;wireshark;network;packet;analysis;
diff --git a/pcappuller/gui.py b/pcappuller/gui.py
index f48bdf0..de68a1b 100644
--- a/pcappuller/gui.py
+++ b/pcappuller/gui.py
@@ -175,8 +175,8 @@ def _open_pattern_settings(parent: "sg.Window", current_include: list, current_e
win.close()
return None
elif ev == "Reset to Defaults":
- win["-INCLUDE-"].update("*.chunk_*.pcap")
- win["-EXCLUDE-"].update("*.sorted.pcap\n*.s256.pcap")
+ win["-INCLUDE-"].update("*.pcap\n*.pcapng")
+ win["-EXCLUDE-"].update("")
elif ev == "Save":
include_text = vals.get("-INCLUDE-", "").strip()
exclude_text = vals.get("-EXCLUDE-", "").strip()
@@ -212,10 +212,10 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over
desired_end = dt.datetime.combine(start.date(), dt.time(23, 59, 59, 999999))
window_obj = Window(start=start, end=desired_end)
- roots = [Path(values["-ROOT-"])] if values["-ROOT-"] else []
+ roots = [Path(values["-SOURCE-"])] if values.get("-SOURCE-") else []
if not roots:
- raise PCAPPullerError("Root directory is required")
+ raise PCAPPullerError("Source directory is required")
# Create workspace in temp directory
workspace_name = f"pcappuller_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}"
@@ -225,8 +225,8 @@ def run_workflow_v2(values: dict, window: "sg.Window", stop_flag: dict, adv_over
workflow = ThreeStepWorkflow(workspace_dir)
# Get pattern settings from values
- include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.chunk_*.pcap"])
- exclude_patterns = values.get("-EXCLUDE-PATTERNS-", ["*.sorted.pcap", "*.s256.pcap"])
+ include_patterns = values.get("-INCLUDE-PATTERNS-", ["*.pcap", "*.pcapng"])
+ exclude_patterns = values.get("-EXCLUDE-PATTERNS-", [])
state = workflow.initialize_workflow(
root_dirs=roots,
@@ -262,6 +262,15 @@ def progress_callback(phase: str, current: int, total: int):
run_step3 = values.get("-RUN-STEP3-", False)
try:
+ # Verbose: announce core settings
+ print("Configuration:")
+ print(f" Source: {roots[0]}")
+ print(f" Window: {window_obj.start} .. {window_obj.end}")
+ print(f" Selection: manifest (Step 1 uses mtime+pattern only)")
+ print(f" Output: {values.get('-OUT-', '(workspace default)')}")
+ print(f" Tmpdir: {values.get('-TMPDIR-', '(workspace tmp)')}")
+ print(f" Effective settings: workers={eff_settings['workers']}, batch={eff_settings['batch']}, slop={eff_settings['slop']}, trim_per_batch={eff_settings['trim_per_batch']}, precise_in_step2={eff_settings['precise_filter']}")
+
# Step 1: Select and Move
if run_step1:
window.write_event_value("-STEP-UPDATE-", ("Step 1: Selecting files...", 1))
@@ -270,7 +279,7 @@ def progress_callback(phase: str, current: int, total: int):
state = workflow.step1_select_and_move(
state=state,
slop_min=eff_settings["slop"],
- precise_filter=eff_settings["precise_filter"],
+ precise_filter=False, # moved to Step 2
workers=workers,
cache=cache,
dry_run=values.get("-DRYRUN-", False),
@@ -286,12 +295,20 @@ def progress_callback(phase: str, current: int, total: int):
return
if not state.selected_files:
+ print("Step 1 selected 0 files.")
window.write_event_value("-DONE-", "No files selected in Step 1")
return
+ else:
+ total_size_mb = sum(f.stat().st_size for f in state.selected_files) / (1024*1024)
+ print(f"Step 1 selected {len(state.selected_files)} files ({total_size_mb:.1f} MB)")
# Step 2: Process
if run_step2:
window.write_event_value("-STEP-UPDATE-", ("Step 2: Processing files...", 2))
+ print("Step 2: Applying precise filter and processing...")
+ print(f" Batch size: {eff_settings['batch']} | Trim per batch: {eff_settings['trim_per_batch']}")
+ if values.get("-DFILTER-"):
+ print(f" Display filter: {values['-DFILTER-']}")
state = workflow.step2_process(
state=state,
@@ -300,7 +317,12 @@ def progress_callback(phase: str, current: int, total: int):
display_filter=values["-DFILTER-"] or None,
trim_per_batch=eff_settings["trim_per_batch"],
progress_callback=progress_callback,
- verbose=values.get("-VERBOSE-", False)
+ verbose=values.get("-VERBOSE-", False),
+ out_path=(Path(values["-OUT-"]) if values.get("-OUT-") else None),
+ tmpdir_parent=(Path(values["-TMPDIR-"]) if values.get("-TMPDIR-") else None),
+ precise_filter=eff_settings["precise_filter"],
+ workers=parse_workers(eff_settings["workers"], 1000),
+ cache=cache,
)
# Step 3: Clean
@@ -322,13 +344,15 @@ def progress_callback(phase: str, current: int, total: int):
if values.get("-GZIP-"):
clean_options["gzip"] = True
- if clean_options:
- state = workflow.step3_clean(
- state=state,
- options=clean_options,
- progress_callback=progress_callback,
- verbose=values.get("-VERBOSE-", False)
- )
+ # If no options were specified but Step 3 is enabled, apply sensible defaults
+ if not clean_options:
+ clean_options = {"snaplen": 256, "gzip": True}
+ state = workflow.step3_clean(
+ state=state,
+ options=clean_options,
+ progress_callback=progress_callback,
+ verbose=values.get("-VERBOSE-", False)
+ )
# Determine final output
final_file = state.cleaned_file or state.processed_file
@@ -353,8 +377,8 @@ def main():
sg.theme("SystemDefault")
# Default patterns
- default_include = ["*.chunk_*.pcap"]
- default_exclude = ["*.sorted.pcap", "*.s256.pcap"]
+ default_include = ["*.pcap", "*.pcapng"]
+ default_exclude = []
# Create layout with three-step workflow
layout = [
@@ -362,12 +386,14 @@ def main():
[sg.HSeparator()],
# Basic settings
- [sg.Text("Root Directory"), sg.Input(key="-ROOT-", expand_x=True), sg.FolderBrowse()],
+ [sg.Text("Source Directory"), sg.Input(key="-SOURCE-", expand_x=True), sg.FolderBrowse()],
[sg.Text("Start Time (YYYY-MM-DD HH:MM:SS)"), sg.Input(key="-START-", expand_x=True)],
[sg.Text("Duration"),
sg.Text("Hours"), sg.Slider(range=(0, 24), orientation="h", key="-HOURS-", default_value=0, size=(20,15), enable_events=True),
sg.Text("Minutes"), sg.Slider(range=(0, 59), orientation="h", key="-MINS-", default_value=15, size=(20,15), enable_events=True),
sg.Button("All Day", key="-ALLDAY-")],
+ [sg.Text("Output File"), sg.Input(key="-OUT-", expand_x=True), sg.FileSaveAs()],
+ [sg.Text("Temporary Directory"), sg.Input(key="-TMPDIR-", expand_x=True), sg.FolderBrowse()],
[sg.HSeparator()],
@@ -388,7 +414,7 @@ def main():
], expand_x=True)],
[sg.Frame("Step 3: Cleaning Options", [
- [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space"),
+ [sg.Text("Snaplen (bytes)"), sg.Input("", key="-CLEAN-SNAPLEN-", size=(8,1), tooltip="Truncate packets to save space (leave blank to keep full payload)"),
sg.Checkbox("Convert to PCAP", key="-CLEAN-CONVERT-", tooltip="Force conversion to pcap format"),
sg.Checkbox("Gzip Compress", key="-GZIP-", tooltip="Compress final output")],
], expand_x=True)],
@@ -416,6 +442,25 @@ def main():
]
window = sg.Window("PCAPpuller", layout, size=(900, 800))
+ # Try to set a custom window icon if assets exist
+ try:
+ here = Path(__file__).resolve()
+ assets_dir = None
+ # Search upwards for a top-level 'assets' directory (repo layout)
+ for p in [here.parent, *here.parents]:
+ cand = p / "assets"
+ if cand.exists():
+ assets_dir = cand
+ break
+ if assets_dir is None:
+ assets_dir = here.parent.parent / "assets"
+ for icon_name in ["PCAPpuller.ico", "PCAPpuller.png", "PCAPpuller.icns"]:
+ ip = assets_dir / icon_name
+ if ip.exists():
+ window.set_icon(str(ip))
+ break
+ except Exception:
+ pass
stop_flag = {"stop": False}
worker = None
adv_overrides: dict | None = None
@@ -452,8 +497,8 @@ def _update_reco_label():
if event == "Run Workflow" and worker is None:
# Validation
- if not values.get("-ROOT-"):
- sg.popup_error("Root directory is required")
+ if not values.get("-SOURCE-"):
+ sg.popup_error("Source directory is required")
continue
if not values.get("-START-"):
sg.popup_error("Start time is required")
@@ -536,11 +581,21 @@ def _update_reco_label():
elif event == "-PROGRESS-":
phase, cur, tot = values[event]
+ friendly = {
+ "pattern-filter": "Filtering by pattern",
+ "precise": "Precise filtering",
+ "merge-batches": "Merging batches",
+ "trim-batches": "Trimming batches",
+ "trim": "Trimming final",
+ "display-filter": "Applying display filter",
+ "gzip": "Compressing",
+ }
if str(phase).startswith("scan"):
window["-STATUS-"].update(f"Scanning... {cur} files visited")
window["-PB-"].update(cur % 100)
else:
- window["-STATUS-"].update(f"{phase} {cur}/{tot}")
+ label = friendly.get(str(phase), str(phase))
+ window["-STATUS-"].update(f"{label}: {cur}/{tot}")
pct = 0 if tot <= 0 else int((cur / tot) * 100)
window["-PB-"].update(pct)
print(f"{phase}: {cur}/{tot}")
diff --git a/pcappuller/workflow.py b/pcappuller/workflow.py
index 98d0dcd..73f2225 100644
--- a/pcappuller/workflow.py
+++ b/pcappuller/workflow.py
@@ -3,6 +3,7 @@
import json
import logging
import shutil
+import os
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import List, Optional, Dict, Any, Callable
@@ -105,11 +106,12 @@ def step1_select_and_move(
self,
state: WorkflowState,
slop_min: int = 120,
- precise_filter: bool = True,
+ precise_filter: bool = False,
workers: Optional[int] = None,
cache: Optional[CapinfosCache] = None,
dry_run: bool = False,
- progress_callback: Optional[Callable[[str, int, int], None]] = None
+ progress_callback: Optional[Callable[[str, int, int], None]] = None,
+ selection_mode: str = "manifest" # one of: 'manifest', 'symlink'
) -> WorkflowState:
"""
Step 1: Select and move PCAP files based on time window and patterns.
@@ -124,8 +126,9 @@ def step1_select_and_move(
logging.info("Step 1 already complete, skipping...")
return state
- # Create selected directory
- if not dry_run:
+ # Create selected directory only if we will materialize files
+ materialize = selection_mode == "symlink"
+ if not dry_run and materialize:
self.selected_dir.mkdir(parents=True, exist_ok=True)
# Find candidates using existing logic
@@ -137,12 +140,11 @@ def step1_select_and_move(
if progress_callback:
progress_callback("pattern-filter", len(filtered_candidates), len(all_candidates))
- # Apply precise filtering if requested
+ # Step 1 is now mtime/pattern only by default; precise filtering moved to Step 2
if precise_filter and filtered_candidates:
if workers is None:
from .core import parse_workers
workers = parse_workers("auto", len(filtered_candidates))
-
final_candidates = precise_filter_parallel(
filtered_candidates, state.window, workers, 0, progress_callback, cache
)
@@ -156,30 +158,40 @@ def step1_select_and_move(
logging.info(f" After precise filtering: {len(final_candidates)}")
return state
- # Copy files to workspace
- copied_files = []
- for i, src_file in enumerate(final_candidates):
- dst_file = self.selected_dir / src_file.name
- # Handle name conflicts by appending a counter
- counter = 1
- while dst_file.exists():
- stem = src_file.stem
- suffix = src_file.suffix
- dst_file = self.selected_dir / f"{stem}_{counter:03d}{suffix}"
- counter += 1
-
- shutil.copy2(src_file, dst_file)
- copied_files.append(dst_file)
-
- if progress_callback:
- progress_callback("copy-files", i + 1, len(final_candidates))
+ selected_list: List[Path] = []
+ if selection_mode == "manifest":
+ # Do not materialize files; just record original paths
+ selected_list = list(final_candidates)
+ else:
+ # Materialize files via symlink only
+ for i, src_file in enumerate(final_candidates):
+ dst_file = self.selected_dir / src_file.name
+ # Handle name conflicts by appending a counter
+ counter = 1
+ while dst_file.exists():
+ stem = src_file.stem
+ suffix = src_file.suffix
+ dst_file = self.selected_dir / f"{stem}_{counter:03d}{suffix}"
+ counter += 1
+ try:
+ os.symlink(src_file, dst_file)
+ selected_list.append(dst_file)
+ except Exception as e:
+ logging.warning("Failed to symlink %s -> %s (%s); recording manifest path instead", src_file, dst_file, e)
+ selected_list.append(src_file)
+
+ if progress_callback:
+ progress_callback("copy-files", i + 1, len(final_candidates))
# Update state
- state.selected_files = copied_files
+ state.selected_files = selected_list
state.step1_complete = True
state.save(self.state_file)
- logging.info(f"Step 1 complete: Selected and copied {len(copied_files)} files to {self.selected_dir}")
+ if selection_mode == "manifest":
+ logging.info(f"Step 1 complete: Selected {len(selected_list)} files (manifest-only, no data copied)")
+ else:
+ logging.info(f"Step 1 complete: Materialized {len(selected_list)} files to {self.selected_dir} via {selection_mode}")
return state
def step2_process(
@@ -190,7 +202,12 @@ def step2_process(
display_filter: Optional[str] = None,
trim_per_batch: Optional[bool] = None,
progress_callback: Optional[Callable[[str, int, int], None]] = None,
- verbose: bool = False
+ verbose: bool = False,
+ out_path: Optional[Path] = None,
+ tmpdir_parent: Optional[Path] = None,
+ precise_filter: bool = True,
+ workers: Optional[int] = None,
+ cache: Optional[CapinfosCache] = None,
) -> WorkflowState:
"""
Step 2: Process selected files using existing merge/trim logic.
@@ -213,25 +230,40 @@ def step2_process(
# Create processed directory
self.processed_dir.mkdir(parents=True, exist_ok=True)
- # Determine output filename
+ # Determine output filename or use provided path
timestamp = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
- output_file = self.processed_dir / f"merged_{timestamp}.{out_format}"
+ default_output = self.processed_dir / f"merged_{timestamp}.{out_format}"
+ output_file = out_path if out_path else default_output
# Auto-determine trim_per_batch if not specified
if trim_per_batch is None:
duration_minutes = int((state.window.end - state.window.start).total_seconds() // 60)
trim_per_batch = duration_minutes > 60
- # Ensure tmp directory exists
- tmp_dir = self.workspace_dir / "tmp"
- tmp_dir.mkdir(parents=True, exist_ok=True)
+ # Ensure tmp directory exists (use override if provided)
+ if tmpdir_parent is None:
+ tmp_dir = self.workspace_dir / "tmp"
+ tmp_dir.mkdir(parents=True, exist_ok=True)
+ tmp_parent = tmp_dir
+ else:
+ Path(tmpdir_parent).mkdir(parents=True, exist_ok=True)
+ tmp_parent = Path(tmpdir_parent)
+ # Optionally apply precise filtering now (moved from Step 1)
+ candidates_for_merge = list(state.selected_files)
+ if precise_filter and candidates_for_merge:
+ if workers is None:
+ from .core import parse_workers
+ workers = parse_workers("auto", len(candidates_for_merge))
+ candidates_for_merge = precise_filter_parallel(
+ candidates_for_merge, state.window, workers, 0, progress_callback, cache
+ )
# Use existing build_output logic
result_file = build_output(
- candidates=state.selected_files,
+ candidates=candidates_for_merge,
window=state.window,
out_path=output_file,
- tmpdir_parent=tmp_dir,
+ tmpdir_parent=tmp_parent,
batch_size=batch_size,
out_format=out_format,
display_filter=display_filter,
diff --git a/pyproject.toml b/pyproject.toml
index 69a959e..8acaef7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "pcappuller"
-version = "0.2.3"
+version = "0.3.1"
description = "A fast PCAP window selector, merger, trimmer, and cleaner"
readme = "README.md"
authors = [
@@ -50,6 +50,7 @@ datetime = ["python-dateutil"]
[project.scripts]
pcap-puller = "pcappuller.cli:main"
pcap-puller-gui = "pcappuller.gui:main"
+PCAPpuller = "pcappuller.gui:main"
pcap-clean = "pcappuller.clean_cli:main"
[tool.setuptools]