aliyun · everettli · Dec 4, 2023 · Dec 4, 2023 · Dec 6, 2023 · Dec 27, 2023
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -0,0 +1,35 @@
+name: Lint test
+
+on: [push]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  common-lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit
+      - name: Linting
+        run: pre-commit run --all-files
+  doc-lint:
@@ -2,6 +2,9 @@
 on: [push]
+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
@@ -2,6 +2,9 @@

 on: [push]

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+      - name: Install Nox
+        run: |
+          pip install nox
+      - name: Linting
+        run: nox -s doc
@@ -1,5 +1,8 @@
 name: Lint test
+permissions:
+  contents: read
+
 on: [push]
 concurrency:
@@ -1,5 +1,8 @@
 name: Lint test

+permissions:
+  contents: read
+
 on: [push]

 concurrency:
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -0,0 +1,41 @@
+name: Publish Package
+on:
+  push:
+    tags:
+      - 'v*'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  publish:
+    name: Publish Package
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      PAI_PYPI_TOKEN: ${{ secrets.PAI_PYPI_TOKEN }}
+      ALIPAI_PYPI_TOKEN: ${{ secrets.ALIPAI_PYPI_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.8'
+      - name: Install dependencies
+        run: pip install wheel setuptools twine
+      # build and upload package pai
+      - name: Build package for pai
+        run: python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI (pai)
+        run: twine upload dist/* --skip-existing -u __token__ -p $PAI_PYPI_TOKEN
+      - name: cleanup
+        run: |
+          rm -rf dist
+          rm -rf build
+          rm -rf pai.egg-info
+      # build and upload package alipai
+      - name: Build package for alipai
+        run: PACKAGE_NAME=alipai python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI (alipai)
+        run: twine upload dist/* --skip-existing -u __token__ -p $ALIPAI_PYPI_TOKEN
diff --git a/.github/workflows/release_trigger.yaml b/.github/workflows/release_trigger.yaml
@@ -0,0 +1,59 @@
+name: Release Trigger
+on:
+  pull_request:
+    types: [closed]
+    branches:
+      - master
+    paths:
+      - 'pai/version.py'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  publish:
+    name: Release Trigger
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == true && startsWith(github.head_ref, 'releases/v')
+    env:
+      PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+      PAI_PYPI_TOKEN: ${{ secrets.PAI_PYPI_TOKEN }}
+      ALIPAI_PYPI_TOKEN: ${{ secrets.ALIPAI_PYPI_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.8'
+      - name: Check version match
+        id: check_version
+        run: |
+          BRANCH_VERSION=${{ github.head_ref }}
+          BRANCH_VERSION=${BRANCH_VERSION#releases/v}
+          FILE_VERSION=$(python -c "from pai.version import VERSION; print(VERSION)")
+          if [[ "$BRANCH_VERSION" != "$FILE_VERSION" ]]; then
+            echo "Version in branch name ($BRANCH_VERSION) does not match version in file ($FILE_VERSION)"
+            exit 1
+          fi
+      - name: Get version and create version tag
+        run: |
+          VERSION=$(python -c "from pai.version import VERSION; print(VERSION)")
+          git tag v$VERSION
+          git push origin v$VERSION
+#          git tag pushed by GitHub action bot will not trigger another action.
+      - name: Install dependencies
+        run: pip install wheel setuptools twine
+      - name: Build package for pai
+        run: python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI (pai)
+        run: twine upload dist/* --skip-existing -u __token__ -p $PAI_PYPI_TOKEN
+      - name: cleanup
+        run: |
+          rm -rf dist
+          rm -rf build
+          rm -rf pai.egg-info
+      - name: Build package for alipai
+        run: PACKAGE_NAME=alipai python setup.py sdist bdist_wheel
+      - name: Publish package to PyPI (alipai)
+        run: twine upload dist/* --skip-existing -u __token__ -p $ALIPAI_PYPI_TOKEN
diff --git a/.github/workflows/unit.yaml b/.github/workflows/unit.yaml
@@ -0,0 +1,22 @@
+name: Unit test
+
+on: [push]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+      - name: Install Nox
+        run: |
+          pip install nox
+      - name: Linting
+        run: nox -s unit
@@ -2,6 +2,9 @@
 on: [push]
+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
@@ -2,6 +2,9 @@

 on: [push]

+permissions:
+  contents: read
+
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
           - -w
 
   - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
         name: isort (python)
@@ -53,3 +53,8 @@ repos:
     rev: 0.6.1
     hooks:
       - id: nbstripout
+
+  - repo: https://github.com/gitleaks/gitleaks
+    rev: v8.16.1
+    hooks:
+      - id: gitleaks
diff --git a/README.md b/README.md
@@ -1,82 +1,119 @@
 # PAI Python SDK
 
+[English](./README_CN.md) \| 简体中文
 
-English \| [简体中文](./README_CN.md)
+PAI Python SDK是阿里云 [机器学习平台 PAI(Platform for Artificial Intelligence)](https://www.aliyun.com/product/bigdata/learn) 提供的Python SDK，提供了更易用的HighLevel API，支持机器学习工程师简单地使用Python在PAI完成模型训练和部署，串联机器学习的流程。
 
-The PAI Python SDK is provided by Alibaba Cloud\'s [Platform for Artificial Intelligence (PAI)](https://www.aliyun.com/product/bigdata/learn). It offers a user-friendly High-Level API, enabling machine learning engineers to easily train and deploy models on PAI using Python, streamlining the machine learning workflow.
+## 🔧 安装
 
-## Installation 🔧
-
-Install the PAI Python SDK using the following command, which supports Python versions \>= 3.6 (it is recommended to use Python \>= 3.8):
+使用以下命令安装PAI Python SDK（支持Python版本 \>= 3.8）：
 
 ```shell
-python -m pip install alipai
+python -m pip install pai
 ```
 
-## Documentation 📖
+## 📖 文档
 
-Find detailed documentation, including API references and user guides, in the [docs]{.title-ref} directory or visit [PAI Python SDK Documentation](https://pai-sdk.oss-cn-shanghai.aliyuncs.com/pai/doc/latest/index.html).
+请通过访问 [PAI Python SDK文档](https://pai.readthedocs.io/) 或是查看 [docs](./docs) 目录下的文件获取SDK的详细文档，包括用户指南和API文档。
 
-## Basic Usage 🛠
+## 🛠 使用示例
 
-- Submit a custom training job
+- 提交自定义训练任务
 
-The following example demonstrates how to submit a custom training job to PAI:
+以下代码演示了如何通过SDK提交一个自定义的训练作业:
 
 ```python
 from pai.estimator import Estimator
 from pai.image import retrieve
 
 est = Estimator(
-    # Retrieve the latest PyTorch image provided by PAI
+    # 获取PAI提供的最新PyTorch镜像
     image_uri=retrieve(
         framework_name="PyTorch", framework_version="latest"
     ).image_uri,
     command="echo hello",
-    # Optionally, specify the source_dir to upload your training code:
+    # 可选，指定source_dir上传你的训练代码：
     # source_dir="./train_src",
     instance_type="ecs.c6.large",
 )
-
-# Submit the training job
+# 提交训练任务
 est.fit()
-
 print(est.model_data())
+
 ```
 
-- Deploy Large Language Model
+- 部署大语言模型
 
-PAI provides numerous pretrained models that you can easily deploy using the PAI Python SDK:
+PAI提供了大量预训练模型，可以使用PAI Python SDK轻松部署：
 
 ```python
 from pai.model import RegisteredModel
 
-# Retrieve the QWen-7b model provided by PAI
-qwen_model = RegisteredModel("qwen-7b-chat-lora", model_provider="pai")
+# 获取PAI提供的QWen1.5-7b模型
+qwen_model = RegisteredModel("qwen1.5-7b-chat", model_provider="pai")
 
-# Deploy the model
+# 部署模型
 p = qwen_model.deploy(service_name="qwen_service")
 
-# Call the service
+# 调用服务
 p.predict(
     data={
-        "prompt": "How to install PyTorch?",
-        "system_prompt": "Act like you are programmer with 5+ years of experience.",
+        "prompt": "What is the purpose of life?",
+        "system_prompt": "You are helpful assistant.",
         "temperature": 0.8,
     }
 )
+
+# PAI提供的大语言模型支持OpenAI API，可以通过openai SDK调用
+openai_client = p.openai()
+res = openai_client.chat.completions.create(
+    model="default",
+    max_tokens=1024,
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is the purpose of life?"}
+    ]
+)
+print(res.choices[0].message.content)
+
 ```
 
-For more details, please refer to the [PAI Python SDK Documentation](https://pai-sdk.oss-cn-shanghai.aliyuncs.com/pai/doc/latest/index.html).
+- 微调预训练模型
+
+通过PAI提供的微调脚本，提交一个模型微调任务
+
+```python
+
+from pai.model import ModelTrainingRecipe
+
+training_recipe = ModelTrainingRecipe(
+    model_name="qwen2-0.5b-instruct",
+    model_provider="pai",
+    instance_type="ecs.gn6e-c12g1.3xlarge",
+)
+
+training_recipe.train(
+    inputs={
+        # 本地或是阿里云OSS上的数据路径(oss://<bucketname>/path/to/data)
+        "train": "<YourTrainingDataPath>"
+    }
+)
+
+
+```
+
+通过访问PAI提供的示例仓库，可以了解更多使用示例：[pai-examples](https://github.com/aliyun/pai-examples/tree/master/pai-python-sdk)
+
+## 🤝 贡献代码
 
-## Contributing 🤝
+我们欢迎为PAI Python SDK贡献代码。请阅读 [CONTRIBUTING](./CONTRIBUTING.md) 文件了解如何为本项目贡献代码。
 
-Contributions to the PAI Python SDK are welcome. Please read our contribution guidelines in the [CONTRIBUTING](./CONTRIBUTING.md) file.
+## 📝 许可证
 
-## License 📝
+PAI Python SDK是由阿里云开发，并根据Apache许可证（版本2.0）授权使用。
 
-PAI Python SDK is developed by Alibaba Cloud and licensed under the Apache License (Version 2.0).
+## 📬 联系方式
 
-## Contact 📬
+如需支持或咨询，请在GitHub仓库中提交issue，或通过钉钉群联系我们：
 
-For support or inquiries, please open an issue on the GitHub repository.
+<img src="./assets/dingtalk-group.png" alt="DingTalkGroup" width="500"/>