From 25afae659e1a982f5f994dcfcd24b628c5c04541 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:30:35 +0800 Subject: [PATCH 1/6] Add import.py --- tools/import.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tools/import.py diff --git a/tools/import.py b/tools/import.py new file mode 100644 index 0000000..cef62e5 --- /dev/null +++ b/tools/import.py @@ -0,0 +1,46 @@ +import time + +import click +from elasticsearch import Elasticsearch +from elasticsearch.helpers import streaming_bulk +import yaml + + +def generate_projects(): + with open("../data.yml") as f: + x = yaml.load(f, Loader=yaml.FullLoader) + users = x.get("users") + for user in users: + repos = user.get('repos', []) + user.get('repos_all_branches', []) + for repo in repos: + gitee_user = user.get('gitee_id', 'Unknow') + github_user = user.get('github_id', 'Unknow') + doc = { + "name": user.get('name', 'Unknow'), + "user": gitee_user if 'gitee.com' in repo else github_user, + "repo": repo, + "created_at": time.strftime("%Y-%m-%dT%H:00:00+0800") + } + yield doc + + +@click.command() +@click.option('--host') +@click.option('--user') +@click.option('--passwd') +def _main(host, user, passwd): + es = Elasticsearch([host], http_auth=(user, passwd), use_ssl=True, verify_certs=False) + # Cleanup es index + es.indices.delete(index='whitebox_projects') + + actions = streaming_bulk( + client=es, index="whitebox_projects", actions=generate_projects() + ) + for ok, action in actions: + if not ok: + print("Failed to insert doc...") + print("Load complete.") + + +if __name__ == "__main__": + _main() From 9ba31f182af38c58e1053399b2e5bbb4684812e0 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:32:28 +0800 Subject: [PATCH 2/6] Create requirements.txt --- tools/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/requirements.txt diff --git a/tools/requirements.txt b/tools/requirements.txt new file mode 100644 index 0000000..7e31e0a --- /dev/null +++ b/tools/requirements.txt @@ -0,0 +1,3 @@ +click==8.0.1 +elasticsearch==7.13.2 +PyYAML==5.4.1 From 79d83829d3faae3217248bc96465f80b97ca6171 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:39:59 +0800 Subject: [PATCH 3/6] Add action --- .github/workflows/import.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/import.yml diff --git a/.github/workflows/import.yml b/.github/workflows/import.yml new file mode 100644 index 0000000..8ff5f1c --- /dev/null +++ b/.github/workflows/import.yml @@ -0,0 +1,28 @@ +name: Import data to ES + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.6' + + - name: Install + run: | + pip3 install -r requirements.txt + + - name: Run + run: | + python tools/import.py --host ${{ secrets.ES_HOST }} --user ${{ secrets.ES_USER }} --passwd ${{ secrets.ES_TOKEN }} From d4537423f5daaf5587aaa46e792b7a393fd762dc Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:41:21 +0800 Subject: [PATCH 4/6] Update import.yml --- .github/workflows/import.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/import.yml b/.github/workflows/import.yml index 8ff5f1c..f7f24d9 100644 --- a/.github/workflows/import.yml +++ b/.github/workflows/import.yml @@ -21,7 +21,7 @@ jobs: - name: Install run: | - pip3 install -r requirements.txt + pip3 install -r tools/requirements.txt - name: Run run: | From ce5364583903a77824c13507711d7e8d5d50987b Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:44:31 +0800 Subject: [PATCH 5/6] Update import.py --- tools/import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/import.py b/tools/import.py index cef62e5..e037af4 100644 --- a/tools/import.py +++ b/tools/import.py @@ -7,7 +7,7 @@ def generate_projects(): - with open("../data.yml") as f: + with open("./data.yml") as f: x = yaml.load(f, Loader=yaml.FullLoader) users = x.get("users") for user in users: From 76d50f8fd1d30b9fbdb6012b34fc5f526f4c3be5 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 24 Jun 2021 11:46:50 +0800 Subject: [PATCH 6/6] Update import.py --- tools/import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/import.py b/tools/import.py index e037af4..a29448d 100644 --- a/tools/import.py +++ b/tools/import.py @@ -31,7 +31,7 @@ def generate_projects(): def _main(host, user, passwd): es = Elasticsearch([host], http_auth=(user, passwd), use_ssl=True, verify_certs=False) # Cleanup es index - es.indices.delete(index='whitebox_projects') + es.indices.delete(index='whitebox_projects', ignore=[404]) actions = streaming_bulk( client=es, index="whitebox_projects", actions=generate_projects()