diff --git a/.github/workflows/import.yml b/.github/workflows/import.yml new file mode 100644 index 0000000..f7f24d9 --- /dev/null +++ b/.github/workflows/import.yml @@ -0,0 +1,28 @@ +name: Import data to ES + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.6' + + - name: Install + run: | + pip3 install -r tools/requirements.txt + + - name: Run + run: | + python tools/import.py --host ${{ secrets.ES_HOST }} --user ${{ secrets.ES_USER }} --passwd ${{ secrets.ES_TOKEN }} diff --git a/tools/import.py b/tools/import.py new file mode 100644 index 0000000..a29448d --- /dev/null +++ b/tools/import.py @@ -0,0 +1,46 @@ +import time + +import click +from elasticsearch import Elasticsearch +from elasticsearch.helpers import streaming_bulk +import yaml + + +def generate_projects(): + with open("./data.yml") as f: + x = yaml.load(f, Loader=yaml.FullLoader) + users = x.get("users") + for user in users: + repos = user.get('repos', []) + user.get('repos_all_branches', []) + for repo in repos: + gitee_user = user.get('gitee_id', 'Unknow') + github_user = user.get('github_id', 'Unknow') + doc = { + "name": user.get('name', 'Unknow'), + "user": gitee_user if 'gitee.com' in repo else github_user, + "repo": repo, + "created_at": time.strftime("%Y-%m-%dT%H:00:00+0800") + } + yield doc + + +@click.command() +@click.option('--host') +@click.option('--user') +@click.option('--passwd') +def _main(host, user, passwd): + es = Elasticsearch([host], http_auth=(user, passwd), use_ssl=True, verify_certs=False) + # Cleanup es index + es.indices.delete(index='whitebox_projects', ignore=[404]) + + actions = streaming_bulk( + client=es, index="whitebox_projects", actions=generate_projects() + ) + for ok, action in actions: + if not ok: + print("Failed to insert doc...") + print("Load complete.") + + +if __name__ == "__main__": + _main() diff --git a/tools/requirements.txt b/tools/requirements.txt new file mode 100644 index 0000000..7e31e0a --- /dev/null +++ b/tools/requirements.txt @@ -0,0 +1,3 @@ +click==8.0.1 +elasticsearch==7.13.2 +PyYAML==5.4.1