diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..24b3a8b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 + +#COPY sources.list /etc/apt/sources.list +ARG DEBIAN_FRONTEND=noninteractive + +# Base tools and repo setup +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + gnupg \ + build-essential \ + curl \ + ca-certificates \ + cmake \ + vim \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update && apt-get install -y --no-install-recommends \ + python3.10 \ + python3.10-dev \ + python3.10-venv \ + python3.10-distutils \ + && rm -rf /var/lib/apt/lists/* + +# Create a dedicated Python 3.10 venv and make it default on PATH +RUN python3.10 -m venv /opt/py310 \ + && /opt/py310/bin/python -m pip install --upgrade pip setuptools wheel + +ENV VIRTUAL_ENV=/opt/py310 +ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" +ENV PIP_NO_CACHE_DIR=1 + +# Sanity check +RUN python -V && pip -V + +RUN pip config set global.extra-index-url "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" +RUN pip install torch==2.0.0 --index-url https://download.pytorch.org/whl/cu118 +RUN pip install onnxruntime-gpu==1.16.0 onnx==1.14.1 + +# install app +WORKDIR /workspace +ADD . Dipoorlet +RUN cd Dipoorlet \ + && pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \ + && python3 setup.py install diff --git a/dipoorlet/tensor_cali/basic_algorithm.py b/dipoorlet/tensor_cali/basic_algorithm.py index 487de65..7ab52b2 100644 --- a/dipoorlet/tensor_cali/basic_algorithm.py +++ b/dipoorlet/tensor_cali/basic_algorithm.py @@ -44,7 +44,9 @@ def find_clip_val_hist(onnx_graph, args, store_stats=None, **kwargs): for i in range(len(hist)): accum += hist[i] if accum >= args.threshold: - clip_value = (i + 0.5) * (data_max / args.bins) + bins = int(args.bins) + dmax = float(data_max) + clip_value = np.float32((i + 0.5) * (dmax / bins)) clip_val[name] = [max(-clip_value, np.min(stats_min_max[name]['min'])), min(clip_value, np.max(stats_min_max[name]['max']))] break diff --git a/dipoorlet/utils.py b/dipoorlet/utils.py index 69bd8a4..ab77cf6 100644 --- a/dipoorlet/utils.py +++ b/dipoorlet/utils.py @@ -311,12 +311,16 @@ def update_model_path(name, args): def save_clip_val(act_clip_val, weight_clip_val, args, act_fname='act_clip_val.json', weight_fname='weight_clip_val.json'): + def _jsonify(val): + # Convert numpy values/arrays to JSON-serializable Python types. + return val.tolist() if hasattr(val, "tolist") else val + for k, v in act_clip_val.items(): - act_clip_val[k][0] = act_clip_val[k][0].tolist() - act_clip_val[k][1] = act_clip_val[k][1].tolist() + act_clip_val[k][0] = _jsonify(act_clip_val[k][0]) + act_clip_val[k][1] = _jsonify(act_clip_val[k][1]) for k, v in weight_clip_val.items(): - weight_clip_val[k][0] = weight_clip_val[k][0].tolist() - weight_clip_val[k][1] = weight_clip_val[k][1].tolist() + weight_clip_val[k][0] = _jsonify(weight_clip_val[k][0]) + weight_clip_val[k][1] = _jsonify(weight_clip_val[k][1]) with open(os.path.join(args.output_dir, act_fname), 'w') as f: json.dump(act_clip_val, f, indent=4) with open(os.path.join(args.output_dir, weight_fname), 'w') as f: @@ -432,4 +436,4 @@ def deploy_QOperator(model, tensor_range, args): None, args.skip_layers, op_types_to_quantize) quantizer.quantize_model() model_output = os.path.join(args.output_dir, 'qop_model.onnx') - quantizer.model.save_model_to_file(model_output) \ No newline at end of file + quantizer.model.save_model_to_file(model_output) diff --git a/dipoorlet/weight_transform/weight_trans_base.py b/dipoorlet/weight_transform/weight_trans_base.py index fd3912b..a526988 100644 --- a/dipoorlet/weight_transform/weight_trans_base.py +++ b/dipoorlet/weight_transform/weight_trans_base.py @@ -24,7 +24,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args): dist.barrier() update_model_path('update_bias_model', args) model = onnx.load(args.model) - graph_after_wt = ONNXGraph(model, args.output_dir) + graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type) # Update bias range. weight_clip_val = find_clip_val_minmax_weight(graph_after_wt, args) @@ -34,7 +34,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args): dist.barrier() update_model_path('weight_equal_model', args) model = onnx.load(args.model) - graph_after_wt = ONNXGraph(model, args.output_dir) + graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type) act_clip_val, weight_clip_val = tensor_calibration(graph_after_wt, args) if args.update_bn: @@ -43,7 +43,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args): dist.barrier() update_model_path('update_bn_model', args) model = onnx.load(args.model) - graph_after_wt = ONNXGraph(model, args.output_dir) + graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type) if dist.get_rank() == 0: logger.info("Re calibration...") act_clip_val, weight_clip_val = tensor_calibration(graph_after_wt, args) diff --git a/requirements.txt b/requirements.txt index f05f51e..0960c57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,7 @@ torch onnx>=1.10.0 onnxsim onnxruntime-gpu -numpy +numpy==1.26.4 tqdm +termcolor +pyyaml