Skip to content
This repository was archived by the owner on Dec 17, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ __pycache__
scripts/pulse-results/*.json
.DS_Store
venv
build/
dist/
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions gatherers/censys.py → domain_scan/gatherers/censys.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from google.oauth2 import service_account
import google.api_core.exceptions

from gatherers.gathererabc import Gatherer
from utils import utils
from domain_scan.gatherers.gathererabc import Gatherer
from domain_scan.utils import utils

# Options:
#
Expand Down
2 changes: 1 addition & 1 deletion gatherers/rdns.py → domain_scan/gatherers/rdns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Generator, List, Pattern

from gatherers.gathererabc import Gatherer
from domain_scan.gatherers.gathererabc import Gatherer

# Reverse DNS
#
Expand Down
4 changes: 2 additions & 2 deletions gatherers/url.py → domain_scan/gatherers/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import requests

from gatherers.gathererabc import Gatherer
from utils import utils
from domain_scan.gatherers.gathererabc import Gatherer
from domain_scan.utils import utils


class Gatherer(Gatherer):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion runner/runner.py → domain_scan/runner/runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from utils import utils
from domain_scan.utils import utils


def write_rows(rows, domain, base_domain, scanner, csv_writer, meta=None):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion scanners/a11y.py → domain_scan/scanners/a11y.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import requests
import yaml

from utils import utils
from domain_scan.utils import utils


workers = 3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os

from utils import utils
from domain_scan.utils import utils

# Check whether a domain is present in a CSV, set in --analytics.

Expand Down
2 changes: 1 addition & 1 deletion scanners/csp.py → domain_scan/scanners/csp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import requests
from scanners import utils
from domain_scan.scanners import utils

###
# CSP Scanner - check the presence of CSP headers
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import json

from utils import utils
from domain_scan.utils import utils

###
# Local Python bridge to the JS bridge to the JS scanner.
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion scanners/pshtt.py → domain_scan/scanners/pshtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re

from pshtt import pshtt
from utils import utils
from domain_scan.utils import utils

###
# Measure a site's HTTP behavior using DHS NCATS' pshtt tool.
Expand Down
2 changes: 1 addition & 1 deletion scanners/sslyze.py → domain_scan/scanners/sslyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from cryptography.hazmat.primitives.serialization import Encoding
from cryptography.hazmat.primitives.asymmetric import ec, dsa, rsa

from utils import utils
from domain_scan.utils import utils

# Number of seconds to wait during sslyze connection check.
# Not much patience here, and very willing to move on.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from utils import utils
from domain_scan.utils import utils

# Evaluate third party service usage using Chrome headless.

Expand Down
Empty file added domain_scan/utils/__init__.py
Empty file.
File renamed without changes.
6 changes: 3 additions & 3 deletions gather
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import requests
import logging
import importlib

from utils import utils
from domain_scan.utils import utils

# some metadata about the scan itself
start_time = utils.local_now()
Expand Down Expand Up @@ -55,14 +55,14 @@ def run(options=None, cache_dir="./cache", results_dir="./results"):

try:
gatherer_module = importlib.import_module(
"gatherers.%s" % source)
"domain_scan.gatherers.%s" % source)
gatherer = gatherer_module.Gatherer(suffixes, options, extra)
except ImportError:
# If it's not a registered module, allow it to be "hot registered"
# as long as the user gave us a flag with that name that can be
# used as the --url option to the URL module.
if options.get(source):
gatherer_module = importlib.import_module("gatherers.url")
gatherer_module = importlib.import_module("domain_scan.gatherers.url")
extra['name'] = source
gatherer = gatherer_module.Gatherer(suffixes, options, extra)
else:
Expand Down
5 changes: 2 additions & 3 deletions lambda/lambda_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import logging

from utils import utils
from domain_scan.utils import utils

# Central handler for all Lambda events.
def handler(event, context):
Expand All @@ -19,7 +19,7 @@ def handler(event, context):

# Might be acceptable to let this crash the module, in Lambda.
try:
scanner = importlib.import_module("scanners.%s" % name)
scanner = importlib.import_module("domain_scan.scanners.%s" % name)
except ImportError:
exc_type, exc_value, exc_traceback = sys.exc_info()
logging.error("[%s] Scanner not found, or had an error during loading.\n\tERROR: %s\n\t%s" % (name, exc_type, exc_value))
Expand Down Expand Up @@ -49,4 +49,3 @@ def handler(event, context):
# date transform functions in one place, before Amazon's built-in
# JSON serialization prepares the data for transport.
return utils.from_json(utils.json_for(response))

8 changes: 4 additions & 4 deletions scan
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ import boto3
import botocore
from concurrent.futures import ThreadPoolExecutor

from scanners.headless.local_bridge import headless_scan
from utils import utils
from runner import runner
from domain_scan.scanners.headless.local_bridge import headless_scan
from domain_scan.utils import utils
from domain_scan.runner import runner


# Default and maximum for local workers (threads) per-scanner.
Expand Down Expand Up @@ -130,7 +130,7 @@ def run(options=None):

for name in options.get("scan").split(","):
try:
scanner = importlib.import_module("scanners.%s" % name)
scanner = importlib.import_module("domain_scan.scanners.%s" % name)
except ImportError:
exc_type, exc_value, exc_traceback = sys.exc_info()
logging.error("[%s] Scanner not found, or had an error during loading.\n\tERROR: %s\n\t%s" % (name, exc_type, exc_value))
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[bdist_wheel]
universal = true
84 changes: 84 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
setup module for domain-scan

Based on:

- https://packaging.python.org/distributing/
- https://github.com/pypa/sampleproject/blob/master/setup.py
- https://github.com/dhs-ncats/pshtt/blob/master/setup.py
"""

from setuptools import setup, find_packages

setup(
name='domain-scan',

# Versions should comply with PEP440
version='0.1.0-dev1',
description='lightweight scan pipeline for orchestrating third party tools, at scale and (optionally) using serverless infrastructure',

# NCATS "homepage"
url='https://18f.gsa.gov',
# The project's main homepage
download_url='https://github.com/18F/domain-scan',

# Author details
author='GSA 18F',
author_email='pulse@cio.gov',

license='License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication',

# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
classifiers=[
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 4 - Beta',

# Indicate who your project is intended for
'Intended Audience :: Developers',

# Pick your license as you wish (should match "license" above)
'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication',

# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],

# What does your project relate to?
keywords='https best practices web-crawling domain scanning',

packages=find_packages(),

install_requires=[
'strict-rfc3339',
'publicsuffix',
'boto3',
'ipython',
'sslyze>=1.3.4,<1.4.0',
'cryptography',
'pyyaml',
'requests',
'google-cloud-bigquery',
'google-auth-oauthlib'
],

extras_require={
'test': [
'pytest'
],
},

# Conveniently allows one to run the CLI scripts
scripts=[
'gather',
'scan',
]
)
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import pytest
from .context import utils # noqa
from utils import utils as subutils
from domain_scan.utils import utils as subutils


def get_default_false_values(parser):
Expand Down