diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..24b3a8b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,43 @@
+FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+
+#COPY sources.list /etc/apt/sources.list
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Base tools and repo setup
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    gnupg \
+    build-essential \
+    curl \
+    ca-certificates \
+    cmake \
+    vim \
+ && add-apt-repository -y ppa:deadsnakes/ppa \
+ && apt-get update && apt-get install -y --no-install-recommends \
+    python3.10 \
+    python3.10-dev \
+    python3.10-venv \
+    python3.10-distutils \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create a dedicated Python 3.10 venv and make it default on PATH
+RUN python3.10 -m venv /opt/py310 \
+ && /opt/py310/bin/python -m pip install --upgrade pip setuptools wheel
+
+ENV VIRTUAL_ENV=/opt/py310
+ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+ENV PIP_NO_CACHE_DIR=1
+
+# Sanity check
+RUN python -V && pip -V
+
+RUN pip config set global.extra-index-url "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
+RUN pip install torch==2.0.0 --index-url https://download.pytorch.org/whl/cu118
+RUN pip install onnxruntime-gpu==1.16.0 onnx==1.14.1
+
+# install app
+WORKDIR /workspace
+ADD . Dipoorlet 
+RUN cd Dipoorlet \
+    && pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
+    && python3 setup.py install 
diff --git a/dipoorlet/tensor_cali/basic_algorithm.py b/dipoorlet/tensor_cali/basic_algorithm.py
index 487de65..7ab52b2 100644
--- a/dipoorlet/tensor_cali/basic_algorithm.py
+++ b/dipoorlet/tensor_cali/basic_algorithm.py
@@ -44,7 +44,9 @@ def find_clip_val_hist(onnx_graph, args, store_stats=None, **kwargs):
         for i in range(len(hist)):
             accum += hist[i]
             if accum >= args.threshold:
-                clip_value = (i + 0.5) * (data_max / args.bins)
+                bins = int(args.bins)
+                dmax = float(data_max)
+                clip_value = np.float32((i + 0.5) * (dmax / bins))
                 clip_val[name] = [max(-clip_value, np.min(stats_min_max[name]['min'])),
                                   min(clip_value, np.max(stats_min_max[name]['max']))]
                 break
diff --git a/dipoorlet/utils.py b/dipoorlet/utils.py
index 69bd8a4..ab77cf6 100644
--- a/dipoorlet/utils.py
+++ b/dipoorlet/utils.py
@@ -311,12 +311,16 @@ def update_model_path(name, args):
 
 
 def save_clip_val(act_clip_val, weight_clip_val, args, act_fname='act_clip_val.json', weight_fname='weight_clip_val.json'):
+    def _jsonify(val):
+        # Convert numpy values/arrays to JSON-serializable Python types.
+        return val.tolist() if hasattr(val, "tolist") else val
+
     for k, v in act_clip_val.items():
-        act_clip_val[k][0] = act_clip_val[k][0].tolist()
-        act_clip_val[k][1] = act_clip_val[k][1].tolist()
+        act_clip_val[k][0] = _jsonify(act_clip_val[k][0])
+        act_clip_val[k][1] = _jsonify(act_clip_val[k][1])
     for k, v in weight_clip_val.items():
-        weight_clip_val[k][0] = weight_clip_val[k][0].tolist()
-        weight_clip_val[k][1] = weight_clip_val[k][1].tolist()
+        weight_clip_val[k][0] = _jsonify(weight_clip_val[k][0])
+        weight_clip_val[k][1] = _jsonify(weight_clip_val[k][1])
     with open(os.path.join(args.output_dir, act_fname), 'w') as f:
         json.dump(act_clip_val, f, indent=4)
     with open(os.path.join(args.output_dir, weight_fname), 'w') as f:
@@ -432,4 +436,4 @@ def deploy_QOperator(model, tensor_range, args):
                               None, args.skip_layers, op_types_to_quantize)
     quantizer.quantize_model()
     model_output = os.path.join(args.output_dir, 'qop_model.onnx')
-    quantizer.model.save_model_to_file(model_output)
\ No newline at end of file
+    quantizer.model.save_model_to_file(model_output)
diff --git a/dipoorlet/weight_transform/weight_trans_base.py b/dipoorlet/weight_transform/weight_trans_base.py
index fd3912b..a526988 100644
--- a/dipoorlet/weight_transform/weight_trans_base.py
+++ b/dipoorlet/weight_transform/weight_trans_base.py
@@ -24,7 +24,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args):
         dist.barrier()
         update_model_path('update_bias_model', args)
         model = onnx.load(args.model)
-        graph_after_wt = ONNXGraph(model, args.output_dir)
+        graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type)
         # Update bias range.
         weight_clip_val = find_clip_val_minmax_weight(graph_after_wt, args)
 
@@ -34,7 +34,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args):
         dist.barrier()
         update_model_path('weight_equal_model', args)
         model = onnx.load(args.model)
-        graph_after_wt = ONNXGraph(model, args.output_dir)
+        graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type)
         act_clip_val, weight_clip_val = tensor_calibration(graph_after_wt, args)
 
     if args.update_bn:
@@ -43,7 +43,7 @@ def weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args):
         dist.barrier()
         update_model_path('update_bn_model', args)
         model = onnx.load(args.model)
-        graph_after_wt = ONNXGraph(model, args.output_dir)
+        graph_after_wt = ONNXGraph(model, args.output_dir, args.deploy, args.model_type)
         if dist.get_rank() == 0:
             logger.info("Re calibration...")
             act_clip_val, weight_clip_val = tensor_calibration(graph_after_wt, args)
diff --git a/requirements.txt b/requirements.txt
index f05f51e..0960c57 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,5 +2,7 @@ torch
 onnx>=1.10.0
 onnxsim
 onnxruntime-gpu
-numpy
+numpy==1.26.4
 tqdm
+termcolor
+pyyaml