From 52ba8bc8907f1bb4e5d920b90d4e060514e9c6f9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 17:04:57 +0000 Subject: [PATCH 1/6] Add gowitness screenshots and asset discovery analysis documentation Co-authored-by: ek --- asset_discovery_analysis.md | 136 ++++++++++++++++++++++++++++++++++++ main.py | 77 ++++++++++++++++++-- 2 files changed, 209 insertions(+), 4 deletions(-) create mode 100644 asset_discovery_analysis.md diff --git a/asset_discovery_analysis.md b/asset_discovery_analysis.md new file mode 100644 index 0000000..48d65bf --- /dev/null +++ b/asset_discovery_analysis.md @@ -0,0 +1,136 @@ +# Asset Discovery Analysis - HaxUnit + +## Current Techniques Implemented in main.py + +### 1. Subdomain Discovery +- **subfinder**: Passive subdomain enumeration from multiple sources +- **chaos**: ProjectDiscovery's Chaos database +- **dnsx_subdomains**: DNS bruteforce with wordlists +- **alterx**: Subdomain permutation generation +- **subwiz**: AI-powered subdomain prediction +- **katana**: Web crawler to discover additional endpoints + +### 2. Network Discovery +- **dnsx_ips**: A record resolution +- **naabu**: Port scanning on discovered subdomains +- **httpx**: HTTP service detection and technology identification + +### 3. Virtual Host Discovery +- **ffuf_vhosts**: Virtual host enumeration using ffuf + +### 4. Content Discovery +- **ffuf**: Directory and file fuzzing +- **katana**: Web crawling for endpoint discovery + +### 5. Vulnerability Scanning +- **nuclei**: Comprehensive vulnerability scanning +- **wpscan**: WordPress-specific scanning +- **acunetix**: Commercial vulnerability scanner integration + +### 6. Certificate Transparency +- Partially covered through subfinder and chaos sources + +## Missing Asset Discovery Techniques + +### 1. **Screenshot and Visual Discovery** +**Missing Tool: gowitness/aquatone** +- **Purpose**: Visual reconnaissance and screenshot capture +- **Value**: Quickly identify interesting applications, login pages, and unique interfaces +- **Implementation Priority**: HIGH + +### 2. **JavaScript Analysis and Endpoint Discovery** +**Missing Tools: subdomainizer, linkfinder, secretfinder** +- **Purpose**: Extract subdomains, endpoints, and secrets from JavaScript files +- **Value**: Discovers hidden endpoints and API keys in client-side code +- **Implementation Priority**: HIGH + +### 3. **Archive Data Mining** +**Missing Tools: waybackurls, gau (GetAllUrls)** +- **Purpose**: Historical URL discovery from web archives +- **Value**: Finds old endpoints, parameters, and forgotten assets +- **Implementation Priority**: MEDIUM + +### 4. **Cloud Asset Discovery** +**Missing Tools: cloud_enum, S3Scanner, bucket_finder** +- **Purpose**: Discover cloud storage buckets and services +- **Value**: Finds misconfigured cloud resources and data leaks +- **Implementation Priority**: HIGH + +### 5. **Advanced OSINT** +**Missing Tools: amass intel, theHarvester, shodan** +- **Purpose**: Intelligence gathering from multiple OSINT sources +- **Value**: Discovers related organizations, ASNs, and infrastructure +- **Implementation Priority**: MEDIUM + +### 6. **API Discovery and Testing** +**Missing Tools: kiterunner, arjun, paramspider** +- **Purpose**: API endpoint and parameter discovery +- **Value**: Finds hidden APIs and parameters for testing +- **Implementation Priority**: HIGH + +### 7. **Social Media and Git Repository Mining** +**Missing Tools: GitDorker, truffleHog, github-subdomains** +- **Purpose**: Extract assets and secrets from public repositories +- **Value**: Finds leaked credentials and infrastructure information +- **Implementation Priority**: MEDIUM + +### 8. **ASN and Network Range Discovery** +**Missing Tools: ASNLookup, amass intel** +- **Purpose**: Map organizational network ranges +- **Value**: Discovers additional IP ranges owned by the organization +- **Implementation Priority**: MEDIUM + +### 9. **Certificate Transparency Enhanced** +**Missing Tools: crt.sh direct API, certspotter** +- **Purpose**: Enhanced certificate transparency log analysis +- **Value**: More comprehensive subdomain discovery from SSL certificates +- **Implementation Priority**: LOW (partially covered) + +### 10. **Technology Stack Analysis** +**Missing Tools: wappalyzer, whatweb, builtwith** +- **Purpose**: Detailed technology identification +- **Value**: Better targeting of technology-specific vulnerabilities +- **Implementation Priority**: MEDIUM + +### 11. **Domain Reputation and Threat Intelligence** +**Missing Tools: virustotal, alienvault OTX** +- **Purpose**: Threat intelligence and reputation analysis +- **Value**: Identifies potentially malicious or compromised assets +- **Implementation Priority**: LOW + +### 12. **Mobile Application Analysis** +**Missing Tools: MobSF integration, APK analysis** +- **Purpose**: Mobile app asset discovery +- **Value**: Discovers mobile-specific endpoints and secrets +- **Implementation Priority**: LOW + +## Recommended Implementation + +### Phase 1 - High Priority (Immediate Implementation) +1. **gowitness/aquatone** - Visual reconnaissance +2. **subdomainizer** - JavaScript analysis for subdomains +3. **cloud_enum** - Cloud asset discovery +4. **kiterunner** - API discovery +5. **arjun/paramspider** - Parameter discovery + +### Phase 2 - Medium Priority +1. **waybackurls/gau** - Archive mining +2. **theHarvester** - Enhanced OSINT +3. **amass intel** - ASN mapping +4. **wappalyzer** - Technology identification + +### Phase 3 - Lower Priority +1. **GitHub mining tools** +2. **Threat intelligence integration** +3. **Mobile application analysis** + +## Implementation Strategy + +The missing tools should be implemented as modular methods following the existing pattern: +- Each tool as a separate method +- Integration with the existing workflow +- Proper error handling and output formatting +- Configuration options in the argument parser +- Results integration with existing output formats + +The most impactful additions would be visual reconnaissance (gowitness), JavaScript analysis (subdomainizer), and cloud asset discovery (cloud_enum) as these provide unique value not covered by existing tools. \ No newline at end of file diff --git a/main.py b/main.py index 77dde4c..6f618d2 100644 --- a/main.py +++ b/main.py @@ -78,7 +78,8 @@ def __init__( htb: bool, fuzz: bool, use_gpt: bool, - skip_installers: bool + skip_installers: bool, + screenshots: bool ): """ Initialize HaxUnit with configuration parameters. @@ -102,6 +103,7 @@ def __init__( fuzz: Enable fuzzing use_gpt: Enable GPT suggestions skip_installers: Skip tool installation checks + screenshots: Enable visual reconnaissance with screenshots """ # Initialize instance variables self.site = site @@ -139,6 +141,7 @@ def __init__( self.use_gpt = use_gpt self.use_notify = use_notify self.cloud_upload = cloud_upload + self.screenshots = screenshots # HTB specific configuration self.htb = htb @@ -346,7 +349,7 @@ def check_tools(self) -> bool: """ required_tools = [ "dnsx", "subfinder", "katana", "unfurl", "alterx", - "dnsx", "naabu", "httpx", "nuclei", "notify", "ffuf" + "dnsx", "naabu", "httpx", "nuclei", "notify", "ffuf", "gowitness" ] for tool in required_tools: @@ -1130,7 +1133,8 @@ def install_all_tools(self): other_tools = { "gau": "go install github.com/lc/gau/v2/cmd/gau@latest", "unfurl": "go install github.com/tomnomnom/unfurl@latest", - "ffuf": "go install -v github.com/ffuf/ffuf/v2@latest" + "ffuf": "go install -v github.com/ffuf/ffuf/v2@latest", + "gowitness": "go install github.com/sensepost/gowitness@latest" } for tool, install_cmd in other_tools.items(): @@ -1141,6 +1145,61 @@ def install_all_tools(self): else: self.print("Installer", f"{tool} is already installed.", Colors.SUCCESS) + def gowitness(self) -> None: + """Run gowitness to capture screenshots of active subdomains for visual reconnaissance.""" + if not self.all_subdomains_up: + self.print("Gowitness", "No active subdomains found - skipping visual reconnaissance") + return + + self.print("Gowitness", "Starting visual reconnaissance and screenshot capture...") + + # Create screenshots directory + screenshots_dir = f"{self.dir_path}/screenshots" + self.cmd(f"mkdir -p {screenshots_dir}") + + # Check if gowitness is installed + if not self.cmd("command -v gowitness", silent=True): + self.print("Gowitness", "gowitness not found. Install with: go install github.com/sensepost/gowitness@latest", Colors.WARNING) + return + + # Run gowitness on active subdomains + threads = "20" if not self.quick else "10" + timeout = "15" if not self.quick else "10" + + gowitness_cmd = ( + f"gowitness file " + f"-f {self.dir_path}/all_subdomains_up.txt " + f"-P {screenshots_dir}/ " + f"-t {timeout} " + f"--threads {threads} " + f"--log-level {'info' if self.verbose else 'fatal'} " + f"--disable-logging-colors " + f"--screenshot-format png " + f"--chrome-path /usr/bin/google-chrome " + f"--user-agent 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'" + ) + + self.cmd(gowitness_cmd) + + # Generate gowitness report + report_cmd = ( + f"gowitness report export " + f"-f {screenshots_dir}/gowitness.sqlite3 " + f"--format csv " + f"> {self.dir_path}/gowitness_report.csv" + ) + self.cmd(report_cmd) + + # Count screenshots taken + screenshot_count = self.cmd(f"ls {screenshots_dir}/*.png 2>/dev/null | wc -l", silent=True) + if screenshot_count and screenshot_count.strip() != "0": + self.print("Gowitness", f"Captured {screenshot_count.strip()} screenshots in {screenshots_dir}/") + self.print("Gowitness", f"Report saved to {self.dir_path}/gowitness_report.csv") + else: + self.print("Gowitness", "No screenshots captured - check if subdomains are accessible", Colors.WARNING) + + self.event("gowitness_completed") + def create_argument_parser() -> argparse.ArgumentParser: """ @@ -1269,6 +1328,12 @@ def create_argument_parser() -> argparse.ArgumentParser: help='Enable GPT suggestions' ) + parser.add_argument( + '--screenshots', + action='store_true', + help='Enable visual reconnaissance with gowitness screenshots' + ) + return parser @@ -1356,7 +1421,8 @@ def main(): htb=args.htb, fuzz=args.fuzz, use_gpt=args.use_gpt, - skip_installers=args.skip_installers + skip_installers=args.skip_installers, + screenshots=args.screenshots ) try: @@ -1379,6 +1445,9 @@ def main(): hax.naabu() hax.httpx() + # Visual reconnaissance + if hax.screenshots: + hax.gowitness() # Vulnerability scanning hax.ffuf() From 496cc3f50de84736e5931c0858cb43c7a1b25f56 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 17:22:18 +0000 Subject: [PATCH 2/6] Fix gowitness screenshot condition and simplify method call Co-authored-by: ek --- main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 6f618d2..829757d 100644 --- a/main.py +++ b/main.py @@ -1147,6 +1147,9 @@ def install_all_tools(self): def gowitness(self) -> None: """Run gowitness to capture screenshots of active subdomains for visual reconnaissance.""" + if not self.screenshots: + return + if not self.all_subdomains_up: self.print("Gowitness", "No active subdomains found - skipping visual reconnaissance") return @@ -1446,8 +1449,7 @@ def main(): hax.httpx() # Visual reconnaissance - if hax.screenshots: - hax.gowitness() + hax.gowitness() # Vulnerability scanning hax.ffuf() From d33ba2da77ad636b2d9c9acb870e0d81346898b9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 23:04:51 +0000 Subject: [PATCH 3/6] Add gowitness for visual reconnaissance and update Dockerfile Co-authored-by: ek --- Dockerfile | 1 + asset_discovery_analysis.md | 136 ------------------------------------ 2 files changed, 1 insertion(+), 136 deletions(-) delete mode 100644 asset_discovery_analysis.md diff --git a/Dockerfile b/Dockerfile index 75dbeff..f23959d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,6 +33,7 @@ RUN go install -v github.com/projectdiscovery/pdtm/cmd/pdtm@latest RUN /home/builder/go/bin/pdtm -ia -bp /home/builder/go/bin RUN go install -v github.com/tomnomnom/unfurl@latest RUN go install -v github.com/ffuf/ffuf/v2@latest +RUN go install -v github.com/sensepost/gowitness@latest # Update Nuclei templates and authenticate if the API key is provided. # This runs as the non-root builder user. diff --git a/asset_discovery_analysis.md b/asset_discovery_analysis.md deleted file mode 100644 index 48d65bf..0000000 --- a/asset_discovery_analysis.md +++ /dev/null @@ -1,136 +0,0 @@ -# Asset Discovery Analysis - HaxUnit - -## Current Techniques Implemented in main.py - -### 1. Subdomain Discovery -- **subfinder**: Passive subdomain enumeration from multiple sources -- **chaos**: ProjectDiscovery's Chaos database -- **dnsx_subdomains**: DNS bruteforce with wordlists -- **alterx**: Subdomain permutation generation -- **subwiz**: AI-powered subdomain prediction -- **katana**: Web crawler to discover additional endpoints - -### 2. Network Discovery -- **dnsx_ips**: A record resolution -- **naabu**: Port scanning on discovered subdomains -- **httpx**: HTTP service detection and technology identification - -### 3. Virtual Host Discovery -- **ffuf_vhosts**: Virtual host enumeration using ffuf - -### 4. Content Discovery -- **ffuf**: Directory and file fuzzing -- **katana**: Web crawling for endpoint discovery - -### 5. Vulnerability Scanning -- **nuclei**: Comprehensive vulnerability scanning -- **wpscan**: WordPress-specific scanning -- **acunetix**: Commercial vulnerability scanner integration - -### 6. Certificate Transparency -- Partially covered through subfinder and chaos sources - -## Missing Asset Discovery Techniques - -### 1. **Screenshot and Visual Discovery** -**Missing Tool: gowitness/aquatone** -- **Purpose**: Visual reconnaissance and screenshot capture -- **Value**: Quickly identify interesting applications, login pages, and unique interfaces -- **Implementation Priority**: HIGH - -### 2. **JavaScript Analysis and Endpoint Discovery** -**Missing Tools: subdomainizer, linkfinder, secretfinder** -- **Purpose**: Extract subdomains, endpoints, and secrets from JavaScript files -- **Value**: Discovers hidden endpoints and API keys in client-side code -- **Implementation Priority**: HIGH - -### 3. **Archive Data Mining** -**Missing Tools: waybackurls, gau (GetAllUrls)** -- **Purpose**: Historical URL discovery from web archives -- **Value**: Finds old endpoints, parameters, and forgotten assets -- **Implementation Priority**: MEDIUM - -### 4. **Cloud Asset Discovery** -**Missing Tools: cloud_enum, S3Scanner, bucket_finder** -- **Purpose**: Discover cloud storage buckets and services -- **Value**: Finds misconfigured cloud resources and data leaks -- **Implementation Priority**: HIGH - -### 5. **Advanced OSINT** -**Missing Tools: amass intel, theHarvester, shodan** -- **Purpose**: Intelligence gathering from multiple OSINT sources -- **Value**: Discovers related organizations, ASNs, and infrastructure -- **Implementation Priority**: MEDIUM - -### 6. **API Discovery and Testing** -**Missing Tools: kiterunner, arjun, paramspider** -- **Purpose**: API endpoint and parameter discovery -- **Value**: Finds hidden APIs and parameters for testing -- **Implementation Priority**: HIGH - -### 7. **Social Media and Git Repository Mining** -**Missing Tools: GitDorker, truffleHog, github-subdomains** -- **Purpose**: Extract assets and secrets from public repositories -- **Value**: Finds leaked credentials and infrastructure information -- **Implementation Priority**: MEDIUM - -### 8. **ASN and Network Range Discovery** -**Missing Tools: ASNLookup, amass intel** -- **Purpose**: Map organizational network ranges -- **Value**: Discovers additional IP ranges owned by the organization -- **Implementation Priority**: MEDIUM - -### 9. **Certificate Transparency Enhanced** -**Missing Tools: crt.sh direct API, certspotter** -- **Purpose**: Enhanced certificate transparency log analysis -- **Value**: More comprehensive subdomain discovery from SSL certificates -- **Implementation Priority**: LOW (partially covered) - -### 10. **Technology Stack Analysis** -**Missing Tools: wappalyzer, whatweb, builtwith** -- **Purpose**: Detailed technology identification -- **Value**: Better targeting of technology-specific vulnerabilities -- **Implementation Priority**: MEDIUM - -### 11. **Domain Reputation and Threat Intelligence** -**Missing Tools: virustotal, alienvault OTX** -- **Purpose**: Threat intelligence and reputation analysis -- **Value**: Identifies potentially malicious or compromised assets -- **Implementation Priority**: LOW - -### 12. **Mobile Application Analysis** -**Missing Tools: MobSF integration, APK analysis** -- **Purpose**: Mobile app asset discovery -- **Value**: Discovers mobile-specific endpoints and secrets -- **Implementation Priority**: LOW - -## Recommended Implementation - -### Phase 1 - High Priority (Immediate Implementation) -1. **gowitness/aquatone** - Visual reconnaissance -2. **subdomainizer** - JavaScript analysis for subdomains -3. **cloud_enum** - Cloud asset discovery -4. **kiterunner** - API discovery -5. **arjun/paramspider** - Parameter discovery - -### Phase 2 - Medium Priority -1. **waybackurls/gau** - Archive mining -2. **theHarvester** - Enhanced OSINT -3. **amass intel** - ASN mapping -4. **wappalyzer** - Technology identification - -### Phase 3 - Lower Priority -1. **GitHub mining tools** -2. **Threat intelligence integration** -3. **Mobile application analysis** - -## Implementation Strategy - -The missing tools should be implemented as modular methods following the existing pattern: -- Each tool as a separate method -- Integration with the existing workflow -- Proper error handling and output formatting -- Configuration options in the argument parser -- Results integration with existing output formats - -The most impactful additions would be visual reconnaissance (gowitness), JavaScript analysis (subdomainizer), and cloud asset discovery (cloud_enum) as these provide unique value not covered by existing tools. \ No newline at end of file From 623843daa94fc53a89440da622dce396ac9548ba Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 23:08:00 +0000 Subject: [PATCH 4/6] Fix gowitness screenshot workflow with custom db path and error handling Co-authored-by: ek --- main.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index 829757d..602c901 100644 --- a/main.py +++ b/main.py @@ -1168,30 +1168,34 @@ def gowitness(self) -> None: # Run gowitness on active subdomains threads = "20" if not self.quick else "10" timeout = "15" if not self.quick else "10" + db_path = f"{self.dir_path}/gowitness.sqlite3" gowitness_cmd = ( f"gowitness file " f"-f {self.dir_path}/all_subdomains_up.txt " f"-P {screenshots_dir}/ " + f"-D {db_path} " f"-t {timeout} " f"--threads {threads} " f"--log-level {'info' if self.verbose else 'fatal'} " f"--disable-logging-colors " f"--screenshot-format png " - f"--chrome-path /usr/bin/google-chrome " f"--user-agent 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'" ) self.cmd(gowitness_cmd) - # Generate gowitness report - report_cmd = ( - f"gowitness report export " - f"-f {screenshots_dir}/gowitness.sqlite3 " - f"--format csv " - f"> {self.dir_path}/gowitness_report.csv" - ) - self.cmd(report_cmd) + # Generate gowitness report only if database exists + if self.cmd(f"test -f {db_path}", silent=True) == "": + report_cmd = ( + f"gowitness report export " + f"-f {db_path} " + f"--format csv " + f"> {self.dir_path}/gowitness_report.csv" + ) + self.cmd(report_cmd) + else: + self.print("Gowitness", "Database not found, skipping report generation", Colors.WARNING) # Count screenshots taken screenshot_count = self.cmd(f"ls {screenshots_dir}/*.png 2>/dev/null | wc -l", silent=True) From 146a26caa2f91f39be4fdf1474c2bb54661d8ec2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 23:11:38 +0000 Subject: [PATCH 5/6] Remove gowitness report generation and simplify screenshot logging Co-authored-by: ek --- main.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/main.py b/main.py index 602c901..e58fc53 100644 --- a/main.py +++ b/main.py @@ -1185,23 +1185,10 @@ def gowitness(self) -> None: self.cmd(gowitness_cmd) - # Generate gowitness report only if database exists - if self.cmd(f"test -f {db_path}", silent=True) == "": - report_cmd = ( - f"gowitness report export " - f"-f {db_path} " - f"--format csv " - f"> {self.dir_path}/gowitness_report.csv" - ) - self.cmd(report_cmd) - else: - self.print("Gowitness", "Database not found, skipping report generation", Colors.WARNING) - # Count screenshots taken screenshot_count = self.cmd(f"ls {screenshots_dir}/*.png 2>/dev/null | wc -l", silent=True) if screenshot_count and screenshot_count.strip() != "0": self.print("Gowitness", f"Captured {screenshot_count.strip()} screenshots in {screenshots_dir}/") - self.print("Gowitness", f"Report saved to {self.dir_path}/gowitness_report.csv") else: self.print("Gowitness", "No screenshots captured - check if subdomains are accessible", Colors.WARNING) From 8cea34a54d535c8e346cec9e54b58bbb38f3d7d3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 17 Jul 2025 23:27:36 +0000 Subject: [PATCH 6/6] Add gowitness report generation with database check and logging Co-authored-by: ek --- main.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/main.py b/main.py index e58fc53..602c901 100644 --- a/main.py +++ b/main.py @@ -1185,10 +1185,23 @@ def gowitness(self) -> None: self.cmd(gowitness_cmd) + # Generate gowitness report only if database exists + if self.cmd(f"test -f {db_path}", silent=True) == "": + report_cmd = ( + f"gowitness report export " + f"-f {db_path} " + f"--format csv " + f"> {self.dir_path}/gowitness_report.csv" + ) + self.cmd(report_cmd) + else: + self.print("Gowitness", "Database not found, skipping report generation", Colors.WARNING) + # Count screenshots taken screenshot_count = self.cmd(f"ls {screenshots_dir}/*.png 2>/dev/null | wc -l", silent=True) if screenshot_count and screenshot_count.strip() != "0": self.print("Gowitness", f"Captured {screenshot_count.strip()} screenshots in {screenshots_dir}/") + self.print("Gowitness", f"Report saved to {self.dir_path}/gowitness_report.csv") else: self.print("Gowitness", "No screenshots captured - check if subdomains are accessible", Colors.WARNING)