Skip to content

Commit 42c642b

Browse files
Merge to main (#141)
1 parent 7b944d1 commit 42c642b

File tree

11 files changed

+30
-26
lines changed

11 files changed

+30
-26
lines changed

Dockerfile

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,18 @@ RUN curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s
66
rm Miniconda3-latest-Linux-x86_64.sh
77
ENV PATH="/opt/conda/bin:${PATH}"
88
RUN conda create -n twinkle python=3.12 -y --override-channels -c conda-forge
9-
SHELL ["conda", "run", "-n", "twinkle", "/bin/bash", "-c"]
9+
ENV PATH="/opt/conda/envs/twinkle/bin:${PATH}"
1010

1111
# Clone and install twinkle, checkout to latest v-tag
1212
RUN git clone https://github.com/modelscope/twinkle.git
1313
WORKDIR /twinkle
14-
RUN echo "Available v-tags:" && git tag -l 'v*' --sort=-v:refname && \
15-
LATEST_TAG=$(git tag -l 'v*' --sort=-v:refname | head -n 1) && \
16-
echo "Checking out: $LATEST_TAG" && \
17-
git checkout "$LATEST_TAG"
14+
RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --sort=-v:refname && \
15+
LATEST_RELEASE=$(git branch -r -l 'origin/release/*' --sort=-v:refname | head -n 1 | tr -d ' ') && \
16+
echo "Checking out: $LATEST_RELEASE" && \
17+
git checkout --track "$LATEST_RELEASE"
1818

1919
RUN sh INSTALL_MEGATRON.sh
2020

2121
RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U
2222

2323
RUN pip install -e . --no-build-isolation
24-
25-
CMD ["bash", "cookbook/client/server/megatron/run.sh"]

INSTALL_MEGATRON.sh

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# which always occur error
55

66
set -e # Exit immediately on error
7-
7+
export SETUPTOOLS_USE_DISTUTILS=local
88
echo "=========================================="
99
echo "Starting deep learning dependencies installation..."
1010
echo "=========================================="
@@ -55,8 +55,8 @@ echo "Using CUDA architecture: $TORCH_CUDA_ARCH_LIST"
5555

5656
# Install latest base packages
5757
echo ""
58-
echo "Installing peft, accelerate, transformers, modelscope, oss2..."
59-
pip install --upgrade peft accelerate transformers "modelscope[framework]" oss2
58+
echo "Installing peft, accelerate, transformers, modelscope..."
59+
pip install --upgrade peft accelerate transformers "modelscope[framework]"
6060

6161
# Install latest vllm
6262
echo ""
@@ -71,7 +71,9 @@ echo "Site-packages path: $SITE_PACKAGES"
7171

7272
CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \
7373
CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
74-
pip install --no-build-isolation "transformer_engine[pytorch]" megatron_core --no-cache-dir
74+
pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir
75+
76+
pip install megatron_core mcore_bridge --no-cache-dir
7577

7678
# Install flash-attention (force local build)
7779
echo ""
@@ -87,11 +89,6 @@ pip install flash-linear-attention -U
8789
echo ""
8890
echo "Installing numpy==2.2 and deep_gemm..."
8991
pip install numpy==2.2
90-
pip uninstall deep_gemm -y
91-
cd /tmp
92-
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git
93-
cd DeepGEMM
94-
pip install . --no-build-isolation
9592

9693
# Verify installation
9794
echo ""
@@ -100,7 +97,7 @@ echo ""
10097
python -c "
10198
import pkg_resources
10299
103-
packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'oss2', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']
100+
packages = ['peft', 'accelerate', 'transformers', 'modelscope', 'vllm', 'transformer_engine', 'megatron_core', 'flash_attn', 'numpy']
104101
105102
print('Installed package versions:')
106103
print('-' * 40)

cookbook/client/server/megatron/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ export RAY_ROTATION_BACKUP_COUNT=1
33
CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false
44
CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4
55
CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0
6-
python server.py
6+
python "$(dirname "$0")/server.py"

cookbook/client/server/megatron/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import os
1010

1111
# Enable Ray debug mode for verbose logging during development
12-
os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '1'
12+
os.environ['TWINKLE_TRUST_REMOTE_CODE'] = '0'
1313

1414
from twinkle.server import launch_server
1515

cookbook/client/server/megatron/server_config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ applications:
4242
import_path: sampler
4343
args:
4444
model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier
45-
nproc_per_node: 8 # Number of GPU processes per node
45+
nproc_per_node: 4 # Number of GPU processes per node
4646
sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
4747
engine_args: # vLLM engine-specific settings
4848
max_model_len: 32000 # Maximum sequence length the engine supports
@@ -84,7 +84,7 @@ applications:
8484
model_id: "ms://Qwen/Qwen3.5-27B" # ModelScope model identifier
8585
max_length: 32000 # model max length
8686
max_loras: 5 # model max loras
87-
nproc_per_node: 8 # Number of GPU processes per node
87+
nproc_per_node: 4 # Number of GPU processes per node
8888
device_group:
8989
name: model
9090
ranks: 4 # GPU rank indices

cookbook/client/tinker/modelscope/sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
]
4646
)
4747

48-
input_feature = template.encode(trajectory, add_generation_prompt=True)
48+
input_feature = template.batch_encode([trajectory], add_generation_prompt=True)[0]
4949

5050
input_ids = input_feature['input_ids'].tolist()
5151

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies = [
1010
"datasets>=3.0,<4.0",
1111
"omegaconf>=2.3.0,<3.0.0",
1212
"fastapi",
13-
"modelscope[framework]>=1.34.0",
13+
"modelscope[framework]>=1.35.0",
1414
"safetensors",
1515
"peft>=0.11.0,<=0.19.0",
1616
"transformers",

src/twinkle/dataset/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ class Dataset(TorchDataset):
5151
"""
5252

5353
def __init__(self, dataset_meta: DatasetMeta, **kwargs):
54+
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
55+
if not trust_remote_code:
56+
kwargs['trust_remote_code'] = False
5457
dataset = self._load_dataset(dataset_meta, **kwargs)
5558
self.datasets = {dataset_meta.get_id(): dataset}
5659
self.dataset = dataset
@@ -247,6 +250,9 @@ def add_dataset(self, dataset_meta: DatasetMeta, **kwargs):
247250
Args:
248251
dataset_meta: The dataset_meta information of the loaded dataset.
249252
"""
253+
trust_remote_code = bool(os.environ.get('TWINKLE_TRUST_REMOTE_CODE', '1'))
254+
if not trust_remote_code:
255+
kwargs['trust_remote_code'] = False
250256
dataset = self._load_dataset(dataset_meta, **kwargs)
251257
self.datasets[dataset_meta.get_id()] = dataset
252258

src/twinkle/hub/hub.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def load_dataset(cls,
401401
cls.try_login(token)
402402
if revision is None or revision == 'main':
403403
revision = 'master'
404-
load_kwargs = {'trust_remote_code': True}
404+
load_kwargs = {'trust_remote_code': kwargs.get('trust_remote_code', True)}
405405
return MsDataset.load(
406406
dataset_id,
407407
subset_name=subset_name,
@@ -595,13 +595,15 @@ def load_dataset(cls,
595595
from datasets import load_dataset
596596
if revision is None or revision == 'master':
597597
revision = 'main'
598+
trust_remote_code = kwargs.get('trust_remote_code', True)
598599
return load_dataset(
599600
dataset_id,
600601
name=subset_name,
601602
split=split,
602603
streaming=streaming,
603604
revision=revision,
604605
download_mode=download_mode,
606+
trust_remote_code=trust_remote_code,
605607
num_proc=num_proc)
606608

607609
@classmethod

src/twinkle/processor/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ def to_tensor(_input):
9797
# so tensor ops like labels != ignore_index or .to(device) would fail without this.
9898
if isinstance(value, np.ndarray):
9999
value = torch.from_numpy(value)
100-
elif isinstance(value, list) and isinstance(value[0], (int, float, np.number)):
100+
elif (isinstance(value, list) and isinstance(value[0],
101+
(int, float, np.number))) or key == 'position_ids':
101102
value = torch.tensor(value)
102103
elif key in self.VLM_CONCAT_FIELDS:
103104
if not isinstance(value[0], torch.Tensor):

0 commit comments

Comments
 (0)