From 81a4d7cda85976c77fabf181cbf6c0f099f0687c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 8 Jun 2022 18:53:46 +0800 Subject: [PATCH 001/150] remote --> local --- det-yolov5-tmi/cuda102.dockerfile | 40 +++++ det-yolov5-tmi/cuda111-devel.dockerfile | 43 +++++ det-yolov5-tmi/cuda111.dockerfile | 42 +++++ det-yolov5-tmi/infer-template.yaml | 12 ++ det-yolov5-tmi/mining-template.yaml | 12 ++ det-yolov5-tmi/mining/data_augment.py | 198 ++++++++++++++++++++ det-yolov5-tmi/mining/mining_cald.py | 144 +++++++++++++++ det-yolov5-tmi/mypy.ini | 8 + det-yolov5-tmi/requirements.txt | 9 +- det-yolov5-tmi/start.py | 128 +++++++++++++ det-yolov5-tmi/train.py | 29 ++- det-yolov5-tmi/training-template.yaml | 14 ++ det-yolov5-tmi/utils/datasets.py | 47 ++--- det-yolov5-tmi/utils/general.py | 4 + det-yolov5-tmi/utils/ymir_yolov5.py | 230 ++++++++++++++++++++++++ 15 files changed, 930 insertions(+), 30 deletions(-) create mode 100644 det-yolov5-tmi/cuda102.dockerfile create mode 100644 det-yolov5-tmi/cuda111-devel.dockerfile create mode 100644 det-yolov5-tmi/cuda111.dockerfile create mode 100644 det-yolov5-tmi/infer-template.yaml create mode 100644 det-yolov5-tmi/mining-template.yaml create mode 100644 det-yolov5-tmi/mining/data_augment.py create mode 100644 det-yolov5-tmi/mining/mining_cald.py create mode 100644 det-yolov5-tmi/mypy.ini create mode 100644 det-yolov5-tmi/start.py create mode 100644 det-yolov5-tmi/training-template.yaml create mode 100644 det-yolov5-tmi/utils/ymir_yolov5.py diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile new file mode 100644 index 0000000..3c359ee --- /dev/null +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -0,0 +1,40 @@ +ARG PYTORCH="1.8.1" +ARG CUDA="10.2" +ARG CUDNN="7" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 + +# Install linux package +RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ + libgl1-mesa-glx ffmpeg build-essential curl wget zip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install python package +RUN pip install -U pip && \ + pip install cython xtcocotools onnx onnx-simplifier loguru \ + tensorboard==2.5.0 numba progress yacs pthflops imagesize pydantic pytest \ + scipy pyyaml opencv-python thop pandas seaborn + +# Install ymir-exc sdk +RUN pip install ymir-exc + +# Copy file from host to docker +ADD ./det-yolov5-tmi /app +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ + +# Download pretrained weight and font file +RUN cd /app && bash data/scripts/download_weights.sh +RUN mkdir -p /root/.config/Ultralytics && \ + wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf + +# Make PYTHONPATH find local package +ENV PYTHONPATH=. + +WORKDIR /app +CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/cuda111-devel.dockerfile b/det-yolov5-tmi/cuda111-devel.dockerfile new file mode 100644 index 0000000..6378b8b --- /dev/null +++ b/det-yolov5-tmi/cuda111-devel.dockerfile @@ -0,0 +1,43 @@ +ARG PYTORCH="1.8.0" +ARG CUDA="11.1" +ARG CUDNN="8" + +# cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 + +# Install linux package +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ + apt-get update && apt-get install -y gnupg2 git ninja-build libglib2.0-0 libsm6 \ + libxrender-dev libxext6 libgl1-mesa-glx ffmpeg sudo openssh-server \ + libyaml-dev vim tmux tree curl wget zip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install python package +RUN pip install -U pip && \ + pip install cython xtcocotools jupyter onnx onnx-simplifier loguru \ + tensorboard==2.5.0 numba progress yacs pthflops pytest \ + scipy pydantic pyyaml imagesize opencv-python thop pandas seaborn + +# Install ymir-exc sdk +RUN pip install ymir-exc + +# Copy file from host to docker +ADD ./det-yolov5-tmi /app +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ + +# Download pretrained weight and font file +RUN cd /app && bash data/scripts/download_weights.sh +RUN mkdir -p /root/.config/Ultralytics && \ + wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf + +# setup PYTHONPATH to find local package +ENV PYTHONPATH=. + +WORKDIR /app +CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile new file mode 100644 index 0000000..4b637ec --- /dev/null +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -0,0 +1,42 @@ +ARG PYTORCH="1.8.0" +ARG CUDA="11.1" +ARG CUDNN="8" + +# cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 + +# Install linux package +RUN apt-get update && apt-get install -y gnupg2 git ninja-build libglib2.0-0 libsm6 \ + libxrender-dev libxext6 libgl1-mesa-glx ffmpeg sudo openssh-server \ + libyaml-dev vim tmux tree curl wget zip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install python package +RUN pip install -U pip && \ + pip install cython xtcocotools onnx onnx-simplifier loguru \ + tensorboard==2.5.0 numba progress yacs pthflops imagesize pydantic pytest \ + scipy pyyaml opencv-python thop pandas seaborn + +# Install ymir-exc sdk +RUN pip install ymir-exc + +# Copy file from host to docker +ADD ./det-yolov5-tmi /app +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ + +# Download pretrained weight and font file +RUN cd /app && bash data/scripts/download_weights.sh +RUN mkdir -p /root/.config/Ultralytics && \ + wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf + +# Make PYTHONPATH find local package +ENV PYTHONPATH=. + +WORKDIR /app +CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/infer-template.yaml b/det-yolov5-tmi/infer-template.yaml new file mode 100644 index 0000000..7574512 --- /dev/null +++ b/det-yolov5-tmi/infer-template.yaml @@ -0,0 +1,12 @@ +# infer template for your executor app +# after build image, it should at /img-man/infer-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +gpu_id: '0' +task_id: 'default-infer-task' +model_params_path: [] +class_names: [] + +img_size: 640 +conf_thres: 0.25 +iou_thres: 0.45 diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml new file mode 100644 index 0000000..5f2a3b2 --- /dev/null +++ b/det-yolov5-tmi/mining-template.yaml @@ -0,0 +1,12 @@ +# mining template for your executor app +# after build image, it should at /img-man/mining-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +gpu_id: '0' +task_id: 'default-training-task' +model_params_path: [] +class_names: [] + +img_size: 640 +conf_thres: 0.25 +iou_thres: 0.45 diff --git a/det-yolov5-tmi/mining/data_augment.py b/det-yolov5-tmi/mining/data_augment.py new file mode 100644 index 0000000..47b1d50 --- /dev/null +++ b/det-yolov5-tmi/mining/data_augment.py @@ -0,0 +1,198 @@ +""" +data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout +official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py +""" +import random +from typing import Any, List, Tuple + +import cv2 +import numpy as np +from nptyping import NDArray + +from utils.ymir_yolov5 import BBOX, CV_IMAGE + + +def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: + ''' + Find intersection of every box combination between two sets of box + boxes1: bounding boxes 1, a tensor of dimensions (n1, 4) + boxes2: bounding boxes 2, a tensor of dimensions (n2, 4) + + Out: Intersection each of boxes1 with respect to each of boxes2, + a tensor of dimensions (n1, n2) + ''' + n1 = boxes1.shape[0] + n2 = boxes2.shape[0] + max_xy = np.minimum(np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + + min_xy = np.maximum(np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) + return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) + + +def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ + -> Tuple[CV_IMAGE, BBOX]: + """ + image: opencv image, [height,width,channels] + bbox: numpy.ndarray, [N,4] --> [x1,y1,x2,y2] + """ + image = image.copy() + + width = image.shape[1] + # Flip image horizontally + image = image[:, ::-1, :] + if len(bbox) > 0: + bbox = bbox.copy() + # Flip bbox horizontally + bbox[:, [0, 2]] = width - bbox[:, [2, 0]] + return image, bbox + + +def cutout(image: CV_IMAGE, bbox: BBOX, cut_num: int = 2, fill_val: int = 0, + bbox_remove_thres: float = 0.4, bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: + ''' + Cutout augmentation + image: A PIL image + boxes: bounding boxes, a tensor of dimensions (#objects, 4) + labels: labels of object, a tensor of dimensions (#objects) + fill_val: Value filled in cut out + bbox_remove_thres: Theshold to remove bbox cut by cutout + + Out: new image, new_boxes, new_labels + ''' + image = image.copy() + bbox = bbox.copy() + + if len(bbox) == 0: + return image, bbox + + original_h, original_w, original_channel = image.shape + count = 0 + for _ in range(50): + # Random cutout size: [0.15, 0.5] of original dimension + cutout_size_h = random.uniform(0.05 * original_h, 0.2 * original_h) + cutout_size_w = random.uniform(0.05 * original_w, 0.2 * original_w) + + # Random position for cutout + left = random.uniform(0, original_w - cutout_size_w) + right = left + cutout_size_w + top = random.uniform(0, original_h - cutout_size_h) + bottom = top + cutout_size_h + cutout = np.array([[float(left), float(top), float(right), float(bottom)]]) + + # Calculate intersect between cutout and bounding boxes + overlap_size = intersect(cutout, bbox) + area_boxes = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) + ratio = overlap_size / (area_boxes + 1e-14) + # If all boxes have Iou greater than bbox_remove_thres, try again + if ratio.max() > bbox_remove_thres or ratio.max() < bbox_min_thres: + continue + + image[int(top):int(bottom), int(left):int(right), :] = fill_val + count += 1 + if count >= cut_num: + break + return image, bbox + + +def rotate(image: CV_IMAGE, bbox: BBOX, rot: float = 5) -> Tuple[CV_IMAGE, BBOX]: + image = image.copy() + bbox = bbox.copy() + h, w, c = image.shape + center = np.array([w / 2.0, h / 2.0]) + s = max(h, w) * 1.0 + trans = get_affine_transform(center, s, rot, [w, h]) + if len(bbox) > 0: + for i in range(bbox.shape[0]): + x1, y1 = affine_transform(bbox[i, :2], trans) + x2, y2 = affine_transform(bbox[i, 2:], trans) + x3, y3 = affine_transform(bbox[i, [2, 1]], trans) + x4, y4 = affine_transform(bbox[i, [0, 3]], trans) + bbox[i, :2] = [min(x1, x2, x3, x4), min(y1, y2, y3, y4)] + bbox[i, 2:] = [max(x1, x2, x3, x4), max(y1, y2, y3, y4)] + image = cv2.warpAffine(image, trans, (w, h), flags=cv2.INTER_LINEAR) + return image, bbox + + +def get_3rd_point(a: NDArray, b: NDArray) -> NDArray: + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_dir(src_point: NDArray, rot_rad: float) -> List: + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result + + +def transform_preds(coords: NDArray, center: NDArray, scale: Any, rot: float, output_size: List) -> NDArray: + trans = get_affine_transform(center, scale, rot, output_size, inv=True) + target_coords = affine_transform(coords, trans) + return target_coords + + +def get_affine_transform(center: NDArray, + scale: Any, + rot: float, + output_size: List, + shift: NDArray = np.array([0, 0], dtype=np.float32), + inv: bool = False) -> NDArray: + if not isinstance(scale, np.ndarray) and not isinstance(scale, list): + scale = np.array([scale, scale], dtype=np.float32) + + scale_tmp = scale + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = get_dir([0, src_w * -0.5], rot_rad) + dst_dir = np.array([0, dst_w * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def affine_transform(pt: NDArray, t: NDArray) -> NDArray: + new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def resize(img: CV_IMAGE, boxes: BBOX, ratio: float = 0.8) -> Tuple[CV_IMAGE, BBOX]: + """ + ratio: <= 1.0 + """ + assert ratio <= 1.0, f'resize ratio {ratio} must <= 1.0' + + h, w, _ = img.shape + ow = int(w * ratio) + oh = int(h * ratio) + resize_img = cv2.resize(img, (ow, oh)) + new_img = np.zeros_like(img) + new_img[:oh, :ow] = resize_img + + if len(boxes) == 0: + return new_img, boxes + else: + return new_img, boxes * ratio diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py new file mode 100644 index 0000000..77bfcf6 --- /dev/null +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -0,0 +1,144 @@ +""" +Consistency-based Active Learning for Object Detection CVPR 2022 workshop +official code: https://github.com/we1pingyu/CALD/blob/master/cald_train.py +""" +import sys +from typing import Dict, List, Tuple + +import cv2 +import numpy as np +from nptyping import NDArray +from scipy.stats import entropy +from tqdm import tqdm +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw + +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5, YmirStage, get_ymir_process + + +def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: + if len(result) > 0: + bboxes = result[:, :4].astype(np.int32) + conf = result[:, 4] + class_id = result[:, 5] + else: + bboxes = np.zeros(shape=(0, 4), dtype=np.int32) + conf = np.zeros(shape=(0, 1), dtype=np.float32) + class_id = np.zeros(shape=(0, 1), dtype=np.int32) + + return bboxes, conf, class_id + + +class MiningCald(YmirYolov5): + def mining(self) -> List: + N = dr.items_count(env.DatasetType.CANDIDATE) + monitor_gap = max(1, N // 100) + idx = -1 + beta = 1.3 + mining_result = [] + for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): + img = cv2.imread(asset_path) + # xyxy,conf,cls + result = self.predict(img) + bboxes, conf, _ = split_result(result) + if len(result) == 0: + # no result for the image without augmentation + mining_result.append((asset_path, -beta)) + continue + + consistency = 0.0 + aug_bboxes_dict, aug_results_dict = self.aug_predict(img, bboxes) + for key in aug_results_dict: + # no result for the image with augmentation f'{key}' + if len(aug_results_dict[key]) == 0: + consistency += beta + continue + + bboxes_key, conf_key, _ = split_result(aug_results_dict[key]) + cls_scores_aug = 1 - conf_key + cls_scores = 1 - conf + + consistency_per_aug = 2.0 + ious = get_ious(bboxes_key, aug_bboxes_dict[key]) + aug_idxs = np.argmax(ious, axis=0) + for origin_idx, aug_idx in enumerate(aug_idxs): + max_iou = ious[aug_idx, origin_idx] + if max_iou == 0: + consistency_per_aug = min(consistency_per_aug, beta) + p = cls_scores_aug[aug_idx] + q = cls_scores[origin_idx] + m = (p + q) / 2. + js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + if js < 0: + js = 0 + consistency_box = max_iou + consistency_cls = 0.5 * (conf[origin_idx] + conf_key[aug_idx]) * (1 - js) + consistency_per_inst = abs(consistency_box + consistency_cls - beta) + consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) + + consistency += consistency_per_aug + + consistency /= len(aug_results_dict) + + mining_result.append((asset_path, consistency)) + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + monitor.write_monitor_logger(percent=percent) + + return mining_result + + def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], Dict[str, NDArray]]: + """ + for different augmentation methods: flip, cutout, rotate and resize + augment the image and bbox and use model to predict them. + + return the predict result and augment bbox. + """ + aug_dict = dict(flip=horizontal_flip, + cutout=cutout, + rotate=rotate, + resize=resize) + + aug_bboxes = dict() + aug_results = dict() + for key in aug_dict: + aug_img, aug_bbox = aug_dict[key](image, bboxes) + + aug_result = self.predict(aug_img) + aug_bboxes[key] = aug_bbox + aug_results[key] = aug_result + + return aug_bboxes, aug_results + + +def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: + """ + args: + boxes1: np.array, (N, 4), xyxy + boxes2: np.array, (M, 4), xyxy + return: + iou: np.array, (N, M) + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + iner_area = intersect(boxes1, boxes2) + area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) + area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) + iou = iner_area / (area1 + area2 - iner_area + 1e-14) + return iou + + +def main(): + miner = MiningCald() + mining_result = miner.mining() + rw.write_mining_result(mining_result=mining_result) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-yolov5-tmi/mypy.ini b/det-yolov5-tmi/mypy.ini new file mode 100644 index 0000000..85e751a --- /dev/null +++ b/det-yolov5-tmi/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +ignore_missing_imports = True +disallow_untyped_defs = False +files = [mining/*.py, utils/ymir_yolov5.py, start.py, train.py] +exclude = [utils/general.py] + +[mypy-torch.*] +ignore_errors = True diff --git a/det-yolov5-tmi/requirements.txt b/det-yolov5-tmi/requirements.txt index 96fc9d1..3e65c34 100755 --- a/det-yolov5-tmi/requirements.txt +++ b/det-yolov5-tmi/requirements.txt @@ -22,8 +22,8 @@ seaborn>=0.11.0 # Export -------------------------------------- # coremltools>=4.1 # CoreML export -# onnx>=1.9.0 # ONNX export -# onnx-simplifier>=0.3.6 # ONNX simplifier +onnx>=1.9.0 # ONNX export +onnx-simplifier>=0.3.6 # ONNX simplifier # scikit-learn==0.19.2 # CoreML quantization # tensorflow>=2.4.1 # TFLite export # tensorflowjs>=3.9.0 # TF.js export @@ -35,3 +35,8 @@ seaborn>=0.11.0 # pycocotools>=2.0 # COCO mAP # roboflow thop # FLOPs computation + +# Ymir --------------------------------------- +imagesize # fast obtain image size without load image +nptyping # numpy type hint +easydict \ No newline at end of file diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py new file mode 100644 index 0000000..d22b3b8 --- /dev/null +++ b/det-yolov5-tmi/start.py @@ -0,0 +1,128 @@ +import logging +import os +import os.path as osp +import shutil +import subprocess +import sys + +import cv2 +from easydict import EasyDict as edict +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw + +from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, download_weight_file, get_merged_config, + get_weight_file, get_ymir_process) + + +def start() -> int: + cfg = get_merged_config() + + logging.info(f'merged config: {cfg}') + + if cfg.ymir.run_training: + _run_training(cfg) + elif cfg.ymir.run_mining: + _run_mining(cfg) + elif cfg.ymir.run_infer: + _run_infer(cfg) + else: + logging.warning('no task running') + + return 0 + + +def _run_training(cfg: edict) -> None: + """ + function for training task + 1. convert dataset + 2. training model + 3. save model weight/hyperparameter/... to design directory + """ + # 1. convert dataset + logging.info('convert ymir dataset to yolov5 dataset') + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg, out_dir) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + # 2. training model + epochs = cfg.param.epochs + batch_size = cfg.param.batch_size + model = cfg.param.model + img_size = cfg.param.img_size + weights = get_weight_file(cfg) + if not weights: + # download pretrained weight + weights = download_weight_file(model) + + models_dir = cfg.ymir.output.models_dir + command = f'python3 train.py --epochs {epochs} ' + \ + f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ + f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ + f'--img-size {img_size} --hyp data/hyps/hyp.scratch-low.yaml ' + \ + '--exist-ok' + logging.info(f'start training: {command}') + + subprocess.run(command.split(), check=True) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) + + # 3. convert to onnx and save model weight to design directory + opset = cfg.param.opset + command = f'python3 export.py --weights {models_dir}/best.pt --opset {opset} --include onnx' + logging.info(f'export onnx weight: {command}') + subprocess.run(command.split(), check=True) + + # save hyperparameter + shutil.copy(f'models/{model}.yaml', f'{models_dir}/{model}.yaml') + + # if task done, write 100% percent log + monitor.write_monitor_logger(percent=1.0) + + +def _run_mining(cfg: edict()) -> None: + logging.info('convert ymir dataset to yolov5 dataset') + out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') + convert_ymir_to_yolov5(cfg, out_dir) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + command = 'python3 mining/mining_cald.py' + logging.info(f'mining: {command}') + subprocess.run(command.split(), check=True) + monitor.write_monitor_logger(percent=1.0) + + +def _run_infer(cfg: edict) -> None: + # generate data.yaml for infer + logging.info('convert ymir dataset to yolov5 dataset') + out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') + convert_ymir_to_yolov5(cfg, out_dir) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + N = dr.items_count(env.DatasetType.CANDIDATE) + infer_result = dict() + model = YmirYolov5(cfg) + idx = -1 + + monitor_gap = max(1, N // 100) + for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): + img = cv2.imread(asset_path) + result = model.infer(img) + infer_result[asset_path] = result + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + monitor.write_monitor_logger(percent=percent) + + rw.write_infer_result(infer_result=infer_result) + monitor.write_monitor_logger(percent=1.0) + + +if __name__ == '__main__': + logging.basicConfig(stream=sys.stdout, + format='%(levelname)-8s: [%(asctime)s] %(message)s', + datefmt='%Y%m%d-%H:%M:%S', + level=logging.INFO) + + os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') + sys.exit(start()) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index d8df31b..6dd190e 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -47,7 +47,7 @@ from utils.datasets import create_dataloader from utils.downloads import attempt_download from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, - check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, + check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import Loggers @@ -56,6 +56,8 @@ from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first +from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config +from ymir_exc import monitor LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -70,9 +72,12 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze + ymir_cfg = opt.ymir_cfg + opt.ymir_cfg = '' # yaml cannot dump edict, remove it here + log_dir = Path(ymir_cfg.ymir.output.tensorboard_dir) # Directories - w = save_dir / 'weights' # weights dir + w = save_dir # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' @@ -92,7 +97,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Loggers data_dict = None if RANK in [-1, 0]: - loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance + loggers = Loggers(log_dir, weights, opt, hyp, LOGGER) # loggers instance if loggers.wandb: data_dict = loggers.wandb.data_dict if resume: @@ -253,7 +258,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # DDP mode if cuda and RANK != -1: - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + if check_version(torch.__version__, '1.11.0'): + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) + else: + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) @@ -281,9 +289,16 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') + + monitor_gap = max(1, (epochs - start_epoch + 1) // 100) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() + # ymir monitor + if epoch % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=epoch/(epochs-start_epoch+1)) + monitor.write_monitor_logger(percent=percent) + # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights @@ -398,8 +413,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Save last, best and delete torch.save(ckpt, last) + write_ymir_training_result(ymir_cfg, results, maps, rewrite=False) if best_fitness == fi: torch.save(ckpt, best) + write_ymir_training_result(ymir_cfg, results, maps, rewrite=True) if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt @@ -518,8 +535,10 @@ def main(opt, callbacks=Callbacks()): if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') - opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) + ymir_cfg = get_merged_config() + opt.ymir_cfg = ymir_cfg + # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml new file mode 100644 index 0000000..8cacec8 --- /dev/null +++ b/det-yolov5-tmi/training-template.yaml @@ -0,0 +1,14 @@ +# training template for your executor app +# after build image, it should at /img-man/training-template.yaml +# key: gpu_id, task_id, pretrained_model_paths, class_names should be preserved + +gpu_id: '0' +task_id: 'default-training-task' +pretrained_model_paths: [] +class_names: [] + +model: 'yolov5s' +batch_size: 16 +epochs: 300 +img_size: 640 +opset: 11 diff --git a/det-yolov5-tmi/utils/datasets.py b/det-yolov5-tmi/utils/datasets.py index e132e04..cb36851 100755 --- a/det-yolov5-tmi/utils/datasets.py +++ b/det-yolov5-tmi/utils/datasets.py @@ -18,6 +18,7 @@ from zipfile import ZipFile import cv2 +import imagesize import numpy as np import torch import torch.nn.functional as F @@ -28,9 +29,9 @@ from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, - segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) + segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, ymir_xyxy2xywh) from utils.torch_utils import torch_distributed_zero_first - +from loguru import logger # Parameters HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'] # include image suffixes @@ -369,10 +370,8 @@ def __len__(self): return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years -def img2label_paths(img_paths): - # Define label paths as a function of image paths - sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings - return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] +def img2label_paths(img_paths, img2label_map={}): + return [img2label_map[img] for img in img_paths] class LoadImagesAndLabels(Dataset): @@ -394,19 +393,19 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r try: f = [] # image files + img2label_map = dict() # map image files to label files for p in path if isinstance(path, list) else [path]: p = Path(p) # os-agnostic - if p.is_dir(): # dir - f += glob.glob(str(p / '**' / '*.*'), recursive=True) - # f = list(p.rglob('*.*')) # pathlib - elif p.is_file(): # file + if p.is_file(): # ymir index file with open(p) as t: t = t.read().strip().splitlines() - parent = str(p.parent) + os.sep - f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path - # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) + for x in t: + # x = f'{image_path}\t{label_path}\n' + image_path, label_path = x.split() + f.append(image_path) + img2label_map[image_path] = label_path else: - raise Exception(f'{prefix}{p} does not exist') + raise Exception(f'{prefix}{p} is not valid ymir index file') self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert self.img_files, f'{prefix}No images found' @@ -414,7 +413,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}') # Check cache - self.label_files = img2label_paths(self.img_files) # labels + self.label_files = img2label_paths(self.img_files, img2label_map) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') try: cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict @@ -438,7 +437,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.labels = list(labels) self.shapes = np.array(shapes, dtype=np.float64) self.img_files = list(cache.keys()) # update - self.label_files = img2label_paths(cache.keys()) # update + self.label_files = img2label_paths(cache.keys(), img2label_map) # update n = len(shapes) # number of images bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches @@ -841,7 +840,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets import lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): with open(lb_file) as f: - lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels + lb = np.array([x.split(',') for x in f.read().strip().splitlines()], dtype=np.float32) # labels for j, x in enumerate(lb): c = int(x[0]) # class @@ -905,14 +904,16 @@ def verify_image_label(args): if os.path.isfile(lb_file): nf = 1 # label found with open(lb_file) as f: - lb = [x.split() for x in f.read().strip().splitlines() if len(x)] - if any([len(x) > 8 for x in lb]): # is segment - classes = np.array([x[0] for x in lb], dtype=np.float32) - segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) - lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) - lb = np.array(lb, dtype=np.float32) + lb = [x.split(',') for x in f.read().strip().splitlines() if len(x)] + nl = len(lb) if nl: + classes = np.array([x[0] for x in lb], dtype=np.float32) + width, height = imagesize.get(im_file) + ymir_xyxy = np.array([x[1:] for x in lb], dtype=np.float32) + lb = np.concatenate( + (classes.reshape(-1, 1), ymir_xyxy2xywh(ymir_xyxy, width, height)), 1) # (cls, xywh) + assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' diff --git a/det-yolov5-tmi/utils/general.py b/det-yolov5-tmi/utils/general.py index 3044b9c..a2a1971 100755 --- a/det-yolov5-tmi/utils/general.py +++ b/det-yolov5-tmi/utils/general.py @@ -578,6 +578,10 @@ def xyxy2xywh(x): y[:, 3] = x[:, 3] - x[:, 1] # height return y +def ymir_xyxy2xywh(x, width, height): + x[:,0:3:2]/=width # normal x1,x2 + x[:,1:4:2]/=height # normal y1,y2 + return xyxy2xywh(x) def xywh2xyxy(x): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py new file mode 100644 index 0000000..64ce9be --- /dev/null +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -0,0 +1,230 @@ +""" +utils function for ymir and yolov5 +""" +import os.path as osp +from enum import IntEnum +from typing import Any, List, Tuple + +import numpy as np +import torch +import yaml +from easydict import EasyDict as edict +from nptyping import NDArray, Shape, UInt8 +from ymir_exc import env +from ymir_exc import result_writer as rw + +from models.common import DetectMultiBackend +from models.experimental import attempt_download +from utils.augmentations import letterbox +from utils.general import check_img_size, non_max_suppression, scale_coords +from utils.torch_utils import select_device + + +class YmirStage(IntEnum): + PREPROCESS = 1 # convert dataset + TASK = 2 # training/mining/infer + POSTPROCESS = 3 # export model + + +BBOX = NDArray[Shape['*,4'], Any] +CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] + + +def get_ymir_process(stage: YmirStage, p: float) -> float: + # const value for ymir process + PREPROCESS_PERCENT = 0.1 + TASK_PERCENT = 0.8 + POSTPROCESS_PERCENT = 0.1 + + if p < 0 or p > 1.0: + raise Exception(f'p not in [0,1], p={p}') + + if stage == YmirStage.PREPROCESS: + return PREPROCESS_PERCENT * p + elif stage == YmirStage.TASK: + return PREPROCESS_PERCENT + TASK_PERCENT * p + elif stage == YmirStage.POSTPROCESS: + return PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p + else: + raise NotImplementedError(f'unknown stage {stage}') + + +def get_merged_config() -> edict: + """ + merge ymir_config and executor_config + """ + merged_cfg = edict() + # the hyperparameter information + merged_cfg.param = env.get_executor_config() + + # the ymir path information + merged_cfg.ymir = env.get_current_env() + return merged_cfg + +def get_weight_file(cfg: edict, try_download: bool = True) -> str: + """ + return the weight file path by priority + + 1. find weight file in cfg.param.model_params_path or cfg.param.model_params_path + 2. if try_download and no weight file offered + for training task, yolov5 will download it from github. + """ + if cfg.ymir.run_training: + model_params_path = cfg.param.pretrained_model_paths + else: + model_params_path = cfg.param.model_params_path + + model_dir = osp.join(cfg.ymir.input.root_dir, + cfg.ymir.input.models_dir) + model_params_path = [p for p in model_params_path if osp.exists(osp.join(model_dir, p))] + + # choose weight file by priority, best.pt > xxx.pt + if 'best.pt' in model_params_path: + return osp.join(model_dir, 'best.pt') + else: + for f in model_params_path: + if f.endswith('.pt'): + return osp.join(model_dir, f) + + return "" + + +def download_weight_file(model_name): + weights = attempt_download(f'{model_name}.pt') + return weights + + +class YmirYolov5(): + """ + used for mining and inference to init detector and predict. + """ + + def __init__(self, cfg: edict): + self.cfg = cfg + device = select_device(cfg.param.get('gpu_id', 'cpu')) + + self.model = self.init_detector(device) + self.device = device + self.class_names = cfg.param.class_names + self.stride = self.model.stride + self.conf_thres = float(cfg.param.conf_thres) + self.iou_thres = float(cfg.param.iou_thres) + + img_size = int(cfg.param.img_size) + imgsz = (img_size, img_size) + imgsz = check_img_size(imgsz, s=self.stride) + + self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup + self.img_size = imgsz + + def init_detector(self, device: torch.device) -> DetectMultiBackend: + weights = get_weight_file(self.cfg) + + model = DetectMultiBackend(weights=weights, + device=device, + dnn=False, # not use opencv dnn for onnx inference + data='data.yaml') # dataset.yaml path + + return model + + def predict(self, img: CV_IMAGE) -> NDArray: + """ + predict single image and return bbox information + img: opencv BGR, uint8 format + """ + # preprocess: padded resize + img1 = letterbox(img, self.img_size, stride=self.stride, auto=True)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + img1.unsqueeze_(dim=0) # expand for batch dim + pred = self.model(img1) + + # postprocess + conf_thres = self.conf_thres + iou_thres = self.iou_thres + classes = None # not filter class_idx in results + agnostic_nms = False + max_det = 1000 + + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + + result = [] + for det in pred: + if len(det): + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(img1.shape[2:], det[:, :4], img.shape).round() + result.append(det) + + # xyxy, conf, cls + if len(result) > 0: + tensor_result = torch.cat(result, dim=0) + numpy_result = tensor_result.data.cpu().numpy() + else: + numpy_result = np.zeros(shape=(0, 6), dtype=np.float32) + + return numpy_result + + def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: + anns = [] + result = self.predict(img) + + for i in range(result.shape[0]): + xmin, ymin, xmax, ymax, conf, cls = result[i, :6].tolist() + ann = rw.Annotation(class_name=self.class_names[int(cls)], score=conf, box=rw.Box( + x=int(xmin), y=int(ymin), w=int(xmax - xmin), h=int(ymax - ymin))) + + anns.append(ann) + + return anns + + +def convert_ymir_to_yolov5(cfg: edict, output_root_dir: str) -> None: + """ + convert ymir format dataset to yolov5 format + generate data.yaml for training/mining/infer + output_root_dir: the output root dir + """ + data = dict(path=cfg.ymir.input.root_dir, + train=cfg.ymir.input.training_index_file, + val=cfg.ymir.input.val_index_file, + test=cfg.ymir.input.candidate_index_file, + nc=len(cfg.param.class_names), + names=cfg.param.class_names) + + with open(osp.join(output_root_dir, 'data.yaml'), 'w') as fw: + fw.write(yaml.safe_dump(data)) + + +def write_ymir_training_result(cfg: edict, results: Tuple, maps: NDArray, rewrite=False) -> int: + """ + cfg: ymir config + results: (mp, mr, map50, map, loss) + maps: map@0.5:0.95 for all classes + rewrite: set true to ensure write the best result + """ + if not rewrite: + training_result_file = cfg.ymir.output.training_result_file + if osp.exists(training_result_file): + return 0 + + model = cfg.param.model + class_names = cfg.param.class_names + mp = results[0] # mean of precision + mr = results[1] # mean of recall + map50 = results[2] # mean of ap@0.5 + map = results[3] # mean of ap@0.5:0.95 + + # use `rw.write_training_result` to save training result + rw.write_training_result(model_names=[f'{model}.yaml', 'best.pt', 'last.pt', 'best.onnx'], + mAP=float(map), + mAP50=float(map50), + precision=float(mp), + recall=float(mr), + classAPs={class_name: v + for class_name, v in zip(class_names, maps.tolist())}) + return 0 From 4991ee7b61a9540ff097c2c91a2933a0abed8703 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 8 Jun 2022 19:08:52 +0800 Subject: [PATCH 002/150] update docker file --- det-yolov5-tmi/cuda102.dockerfile | 12 ++---------- det-yolov5-tmi/cuda111-devel.dockerfile | 17 ++++------------- det-yolov5-tmi/cuda111.dockerfile | 15 +++------------ 3 files changed, 9 insertions(+), 35 deletions(-) diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index 3c359ee..eeaf599 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -11,22 +11,14 @@ ENV LANG=C.UTF-8 # Install linux package RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ - libgl1-mesa-glx ffmpeg build-essential curl wget zip \ + libgl1-mesa-glx curl wget zip \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install python package -RUN pip install -U pip && \ - pip install cython xtcocotools onnx onnx-simplifier loguru \ - tensorboard==2.5.0 numba progress yacs pthflops imagesize pydantic pytest \ - scipy pyyaml opencv-python thop pandas seaborn - -# Install ymir-exc sdk -RUN pip install ymir-exc - # Copy file from host to docker ADD ./det-yolov5-tmi /app RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ +RUN pip install ymir-exc && pip install -r /app/requirements.txt # Download pretrained weight and font file RUN cd /app && bash data/scripts/download_weights.sh diff --git a/det-yolov5-tmi/cuda111-devel.dockerfile b/det-yolov5-tmi/cuda111-devel.dockerfile index 6378b8b..77389b9 100644 --- a/det-yolov5-tmi/cuda111-devel.dockerfile +++ b/det-yolov5-tmi/cuda111-devel.dockerfile @@ -12,31 +12,22 @@ ENV LANG=C.UTF-8 # Install linux package RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ - apt-get update && apt-get install -y gnupg2 git ninja-build libglib2.0-0 libsm6 \ - libxrender-dev libxext6 libgl1-mesa-glx ffmpeg sudo openssh-server \ - libyaml-dev vim tmux tree curl wget zip \ + apt-get update && apt-get install -y gnupg2 git libglib2.0-0 libgl1-mesa-glx \ + curl wget zip \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install python package -RUN pip install -U pip && \ - pip install cython xtcocotools jupyter onnx onnx-simplifier loguru \ - tensorboard==2.5.0 numba progress yacs pthflops pytest \ - scipy pydantic pyyaml imagesize opencv-python thop pandas seaborn - -# Install ymir-exc sdk -RUN pip install ymir-exc - # Copy file from host to docker ADD ./det-yolov5-tmi /app RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ +RUN pip install ymir-exc && pip install -r /app/requirements.txt # Download pretrained weight and font file RUN cd /app && bash data/scripts/download_weights.sh RUN mkdir -p /root/.config/Ultralytics && \ wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf -# setup PYTHONPATH to find local package +# Make PYTHONPATH find local package ENV PYTHONPATH=. WORKDIR /app diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index 4b637ec..9c8c061 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -11,24 +11,15 @@ ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV LANG=C.UTF-8 # Install linux package -RUN apt-get update && apt-get install -y gnupg2 git ninja-build libglib2.0-0 libsm6 \ - libxrender-dev libxext6 libgl1-mesa-glx ffmpeg sudo openssh-server \ - libyaml-dev vim tmux tree curl wget zip \ +RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ + libgl1-mesa-glx curl wget zip \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install python package -RUN pip install -U pip && \ - pip install cython xtcocotools onnx onnx-simplifier loguru \ - tensorboard==2.5.0 numba progress yacs pthflops imagesize pydantic pytest \ - scipy pyyaml opencv-python thop pandas seaborn - -# Install ymir-exc sdk -RUN pip install ymir-exc - # Copy file from host to docker ADD ./det-yolov5-tmi /app RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ +RUN pip install ymir-exc && pip install -r /app/requirements.txt # Download pretrained weight and font file RUN cd /app && bash data/scripts/download_weights.sh From 9d16def7a12f5bdebf63263b7136a660c8150f20 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 10 Jun 2022 15:24:56 +0800 Subject: [PATCH 003/150] add many file --- det-mmdetection-tmi/README_ymir.md | 8 + .../mmdet/core/evaluation/eval_hooks.py | 25 ++- .../mmdet/datasets/__init__.py | 3 +- det-mmdetection-tmi/mmdet/datasets/coco.py | 11 +- det-mmdetection-tmi/mmdet/datasets/ymir.py | 201 ++++++++++++++++++ det-mmdetection-tmi/mmdet/utils/util_ymir.py | 149 +++++++++++++ det-mmdetection-tmi/start.py | 95 +++++++++ det-mmdetection-tmi/ymir_log.py | 53 +++++ det-mmdetection-tmi/ymir_train.py | 121 +++++++++++ 9 files changed, 663 insertions(+), 3 deletions(-) create mode 100644 det-mmdetection-tmi/README_ymir.md create mode 100644 det-mmdetection-tmi/mmdet/datasets/ymir.py create mode 100644 det-mmdetection-tmi/mmdet/utils/util_ymir.py create mode 100644 det-mmdetection-tmi/start.py create mode 100644 det-mmdetection-tmi/ymir_log.py create mode 100644 det-mmdetection-tmi/ymir_train.py diff --git a/det-mmdetection-tmi/README_ymir.md b/det-mmdetection-tmi/README_ymir.md new file mode 100644 index 0000000..de86768 --- /dev/null +++ b/det-mmdetection-tmi/README_ymir.md @@ -0,0 +1,8 @@ +# det-mmdetection-tmi + +`mmdetection` framework for object `det`ection `t`raining/`m`ining/`i`nfer task + +# changelog +- modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format +- modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process +- modify `mmdet/datasets/__init__.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index 7c1fbe9..15c47bc 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -7,7 +7,9 @@ from mmcv.runner import DistEvalHook as BaseDistEvalHook from mmcv.runner import EvalHook as BaseEvalHook from torch.nn.modules.batchnorm import _BatchNorm - +from ymir_exc import monitor +from mmdet.utils.util_ymir import update_training_result_file +import os.path as osp def _calc_dynamic_intervals(start_interval, dynamic_interval_list): assert mmcv.is_list_of(dynamic_interval_list, tuple) @@ -43,6 +45,12 @@ def before_train_epoch(self, runner): self._decide_interval(runner) super().before_train_epoch(runner) + def after_train_epoch(self, runner): + """Report the training process for ymir""" + percent=0.95*(runner.epoch/runner.max_epochs) + monitor.write_monitor_logger(percent=percent) + super().after_train_epoch(runner) + def before_train_iter(self, runner): self._decide_interval(runner) super().before_train_iter(runner) @@ -60,6 +68,10 @@ def _do_evaluate(self, runner): # the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) + best_score = runner.meta['hook_msgs'].get( + 'best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + update_training_result_file(key_score) # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, @@ -87,6 +99,12 @@ def before_train_epoch(self, runner): self._decide_interval(runner) super().before_train_epoch(runner) + def after_train_epoch(self, runner): + """Report the training process for ymir""" + percent=0.1+0.8*(runner.epoch/runner.max_epochs) + monitor.write_monitor_logger(percent=percent) + super().after_train_epoch(runner) + def before_train_iter(self, runner): self._decide_interval(runner) super().before_train_iter(runner) @@ -128,3 +146,8 @@ def _do_evaluate(self, runner): # the action to save the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) + + best_score = runner.meta['hook_msgs'].get( + 'best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + update_training_result_file(key_score) diff --git a/det-mmdetection-tmi/mmdet/datasets/__init__.py b/det-mmdetection-tmi/mmdet/datasets/__init__.py index f251d07..ff66046 100644 --- a/det-mmdetection-tmi/mmdet/datasets/__init__.py +++ b/det-mmdetection-tmi/mmdet/datasets/__init__.py @@ -15,6 +15,7 @@ from .voc import VOCDataset from .wider_face import WIDERFaceDataset from .xml_style import XMLDataset +from .ymir import YmirDataset __all__ = [ 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset', @@ -24,5 +25,5 @@ 'ClassBalancedDataset', 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline', 'NumClassCheckHook', 'CocoPanopticDataset', 'MultiImageMixDataset', - 'OpenImagesDataset', 'OpenImagesChallengeDataset' + 'OpenImagesDataset', 'OpenImagesChallengeDataset', 'YmirDataset' ] diff --git a/det-mmdetection-tmi/mmdet/datasets/coco.py b/det-mmdetection-tmi/mmdet/datasets/coco.py index efd6949..cde2de7 100644 --- a/det-mmdetection-tmi/mmdet/datasets/coco.py +++ b/det-mmdetection-tmi/mmdet/datasets/coco.py @@ -3,6 +3,7 @@ import io import itertools import logging +import os import os.path as osp import tempfile import warnings @@ -12,7 +13,6 @@ import numpy as np from mmcv.utils import print_log from terminaltables import AsciiTable - from mmdet.core import eval_recalls from .api_wrappers import COCO, COCOeval from .builder import DATASETS @@ -562,6 +562,15 @@ def evaluate(self, results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) + + COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') + if COCO_EVAL_TMP_FILE is not None: + mmcv.dump({name:value for name,value in results_per_category}, COCO_EVAL_TMP_FILE, file_format='json') + else: + raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + + print_log(f'\n write eval result to {COCO_EVAL_TMP_FILE}', logger=logger) + num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) diff --git a/det-mmdetection-tmi/mmdet/datasets/ymir.py b/det-mmdetection-tmi/mmdet/datasets/ymir.py new file mode 100644 index 0000000..5cbbbfa --- /dev/null +++ b/det-mmdetection-tmi/mmdet/datasets/ymir.py @@ -0,0 +1,201 @@ +# Copyright (c) OpenMMLab voc.py. All rights reserved. +# wangjiaxin 2022-04-25 + +from collections import OrderedDict +import os.path as osp + +# from PIL import Image +import imagesize + +import json +from .builder import DATASETS +from .api_wrappers import COCO +from .coco import CocoDataset + +@DATASETS.register_module() +class YmirDataset(CocoDataset): + """ + converted dataset by ymir system 1.0.0 + /in/assets: image files directory + /in/annotations: annotation files directory + /in/train-index.tsv: image_file \t annotation_file + /in/val-index.tsv: image_file \t annotation_file + """ + def __init__(self, + min_size=0, + ann_prefix='annotations', + **kwargs): + self.min_size=min_size + self.ann_prefix=ann_prefix + super(YmirDataset, self).__init__(**kwargs) + + def load_annotations(self, ann_file): + """Load annotation from TXT style ann_file. + + Args: + ann_file (str): Path of TXT file. + + Returns: + list[dict]: Annotation info from TXT file. + """ + + images = [] + categories = [] + # category_id is from 1 for coco, not 0 + for i, name in enumerate(self.CLASSES): + categories.append({'supercategory':'none', + 'id': i+1, + 'name': name}) + + annotations = [] + instance_counter = 1 + image_counter = 1 + + with open(ann_file,'r') as fp: + lines=fp.readlines() + + for line in lines: + # split any white space + img_path, ann_path = line.strip().split() + img_path = osp.join(self.data_root, self.img_prefix, img_path) + ann_path = osp.join(self.data_root, self.ann_prefix, ann_path) + # img = Image.open(img_path) + # width, height = img.size + width, height = imagesize.get(img_path) + images.append( + dict(id=image_counter, + file_name=img_path, + ann_path=ann_path, + width=width, + height=height)) + + try: + anns = self.get_txt_ann_info(ann_path) + except Exception as e: + print(f'bad annotation for {ann_path} with {e}') + anns = [] + + for ann in anns: + ann['image_id']=image_counter + ann['id']=instance_counter + annotations.append(ann) + instance_counter+=1 + + image_counter+=1 + + ### pycocotool coco init + self.coco = COCO() + self.coco.dataset['type']='instances' + self.coco.dataset['categories']=categories + self.coco.dataset['images']=images + self.coco.dataset['annotations']=annotations + self.coco.createIndex() + + ### mmdetection coco init + # avoid the filter problem in CocoDataset, view coco_api.py for detail + self.coco.img_ann_map = self.coco.imgToAnns + self.coco.cat_img_map = self.coco.catToImgs + + # get valid category_id (in annotation, start from 1, arbitary) + self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES) + # convert category_id to label(train_id, start from 0) + self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} + self.img_ids = self.coco.get_img_ids() + # self.img_ids = list(self.coco.imgs.keys()) + assert len(self.img_ids) > 0, 'image number must > 0' + N=len(self.img_ids) + print(f'load {N} image from YMIR dataset') + + data_infos = [] + total_ann_ids = [] + for i in self.img_ids: + info = self.coco.load_imgs([i])[0] + info['filename'] = info['file_name'] + data_infos.append(info) + ann_ids = self.coco.get_ann_ids(img_ids=[i]) + total_ann_ids.extend(ann_ids) + assert len(set(total_ann_ids)) == len( + total_ann_ids), f"Annotation ids in '{ann_file}' are not unique!" + return data_infos + + def dump(self, ann_file): + with open(ann_file,'w') as fp: + json.dump(self.coco.dataset, fp) + + def get_ann_path_from_img_path(self,img_path): + img_id=osp.splitext(osp.basename(img_path))[0] + return osp.join(self.data_root, self.ann_prefix, img_id+'.txt') + + def get_txt_ann_info(self, txt_path): + """Get annotation from TXT file by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + # img_id = self.data_infos[idx]['id'] + # txt_path = osp.splitext(img_path)[0]+'.txt' + # txt_path = self.get_ann_path_from_img_path(img_path) + anns = [] + if osp.exists(txt_path): + with open(txt_path,'r') as fp: + lines=fp.readlines() + else: + lines=[] + for line in lines: + obj=[int(x) for x in line.strip().split(',')] + # YMIR category id starts from 0, coco from 1 + category_id, xmin, ymin, xmax, ymax = obj + bbox = [xmin, ymin, xmax, ymax] + h,w=ymax-ymin,xmax-xmin + ignore = 0 + if self.min_size: + assert not self.test_mode + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + if w < self.min_size or h < self.min_size: + ignore = 1 + + ann = dict( + segmentation=[[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]], + area=w*h, + iscrowd=0, + image_id=None, + bbox=[xmin, ymin, w, h], + category_id=category_id+1, # category id is from 1 for coco + id=None, + ignore=ignore + ) + anns.append(ann) + return anns + + def get_cat_ids(self, idx): + """Get category ids in TXT file by index. + + Args: + idx (int): Index of data. + + Returns: + list[int]: All categories in the image of specified index. + """ + + cat_ids = [] + # img_path = self.data_infos[idx]['file_name'] + # txt_path = self.get_ann_path_from_img_path(img_path) + txt_path = self.data_infos[idx]['ann_path'] + txt_path = osp.join(self.data_root, self.ann_prefix, txt_path) + if osp.exists(txt_path): + with open(txt_path,'r') as fp: + lines = fp.readlines() + else: + lines = [] + + for line in lines: + obj = [int(x) for x in line.strip().split(',')] + # label, xmin, ymin, xmax, ymax = obj + cat_ids.append(obj[0]) + + return cat_ids diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py new file mode 100644 index 0000000..3b5008b --- /dev/null +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -0,0 +1,149 @@ +""" +utils function for ymir and yolov5 +""" +import glob +import os +import os.path as osp +import sys +from enum import IntEnum +from typing import Any, List, Tuple +from urllib.parse import urlparse + +import mmcv +from easydict import EasyDict as edict +from nptyping import NDArray, Shape, UInt8 +from torch.hub import HASH_REGEX, _get_torch_home, download_url_to_file +from ymir_exc import env +from ymir_exc import result_writer as rw + + +class YmirStage(IntEnum): + PREPROCESS = 1 # convert dataset + TASK = 2 # training/mining/infer + POSTPROCESS = 3 # export model + + +BBOX = NDArray[Shape['*,4'], Any] +CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] + + +def get_ymir_process(stage: YmirStage, p: float = 0.0) -> float: + # const value for ymir process + PREPROCESS_PERCENT = 0.1 + TASK_PERCENT = 0.8 + POSTPROCESS_PERCENT = 0.1 + + if p < 0 or p > 1.0: + raise Exception(f'p not in [0,1], p={p}') + + if stage == YmirStage.PREPROCESS: + return PREPROCESS_PERCENT * p + elif stage == YmirStage.TASK: + return PREPROCESS_PERCENT + TASK_PERCENT * p + elif stage == YmirStage.POSTPROCESS: + return PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p + else: + raise NotImplementedError(f'unknown stage {stage}') + + +def get_merged_config() -> edict: + """ + merge ymir_config and executor_config + """ + merged_cfg = edict() + # the hyperparameter information + merged_cfg.param = env.get_executor_config() + + # the ymir path information + merged_cfg.ymir = env.get_current_env() + return merged_cfg + + +def get_weight_file(cfg: edict) -> str: + """ + return the weight file path by priority + find weight file in cfg.param.model_params_path or cfg.param.model_params_path + """ + if cfg.ymir.run_training: + model_params_path = cfg.param.pretrained_model_paths + else: + model_params_path = cfg.param.model_params_path + + model_dir = osp.join(cfg.ymir.input.root_dir, + cfg.ymir.input.models_dir) + model_params_path = [ + p for p in model_params_path if osp.exists(osp.join(model_dir, p))] + + # choose weight file by priority, best.pt > xxx.pt + if 'best.pt' in model_params_path: + return osp.join(model_dir, 'best.pt') + else: + for f in model_params_path: + if f.endswith('.pt'): + return osp.join(model_dir, f) + + return "" + + +def download_weight_file(model: str) -> str: + """ + download weight file from web if not exist. + """ + model_to_url = dict( + faster_rcnn_r50_fpn='https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth', + faster_rcnn_r101_fpn='https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth', + yolox_tiny='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth', + yolox_s='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth', + yolox_l='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth', + yolox_x='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth', + yolox_nano='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth' + ) + + url = model_to_url[model] + torch_home = _get_torch_home() + model_dir = os.path.join(torch_home, 'checkpoints') + + os.makedirs(model_dir, exist_ok=True) + parts = urlparse(url) + filename = os.path.basename(parts.path) + cached_file = os.path.join(model_dir, filename) + + if not os.path.exists(cached_file): + sys.stderr.write('Downloading: "{}" to {}\n'.format( + url, cached_file)) + r = HASH_REGEX.search(filename) # r is Optional[Match[str]] + hash_prefix = r.group(1) if r else None + download_url_to_file( + url, cached_file, hash_prefix, progress=True) + + return cached_file + + +def update_training_result_file(key_score): + COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') + if COCO_EVAL_TMP_FILE is None: + raise Exception( + 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + + results_per_category = mmcv.load(COCO_EVAL_TMP_FILE) + + work_dir = os.getenv('YMIR_MODELS_DIR') + if work_dir is None or osp.isdir(work_dir): + raise Exception( + f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {work_dir}') + + # assert only one model config file in work_dir + model_config_file = glob.glob(osp.join(work_dir, '*.py'))[0] + weight_files = glob.glob(osp.join(work_dir, 'best_bbox_mAP_epoch_*.pth')) + if len(weight_files) == 0: + weight_files = glob.glob(osp.join(work_dir, 'epoch_*.pth')) + + if len(weight_files) == 0: + raise Exception(f'no weight file found in {work_dir}') + + # sort the weight files by time, use the latest file. + weight_files.sort(key=lambda fn: osp.getmtime(fn)) + model_weight_file = osp.basename(weight_files[-1]) + rw.write_training_result(model_names=[model_weight_file, osp.basename(model_config_file)], + mAP=key_score, + classAPs=results_per_category) \ No newline at end of file diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py new file mode 100644 index 0000000..16e8e89 --- /dev/null +++ b/det-mmdetection-tmi/start.py @@ -0,0 +1,95 @@ +import logging +import os +import os.path as osp +import shutil +import subprocess +import sys + +import cv2 +from easydict import EasyDict as edict +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw + +from mmdet.utils.util_ymir import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, download_weight_file, get_merged_config, + get_weight_file, get_ymir_process) + + +def start() -> int: + cfg = get_merged_config() + + logging.info(f'merged config: {cfg}') + + if cfg.ymir.run_training: + _run_training(cfg) + elif cfg.ymir.run_mining: + _run_mining(cfg) + elif cfg.ymir.run_infer: + _run_infer(cfg) + else: + logging.warning('no task running') + + return 0 + + +def _run_training(cfg: edict) -> None: + """ + function for training task + 1. convert dataset + 2. training model + 3. save model weight/hyperparameter/... to design directory + """ + command = 'python3 ymir_train.py' + logging.info(f'start training: {command}') + subprocess.run(command.split(), check=True) + # if task done, write 100% percent log + monitor.write_monitor_logger(percent=1.0) + + +def _run_mining(cfg: edict()) -> None: + logging.info('convert ymir dataset to yolov5 dataset') + out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') + convert_ymir_to_yolov5(cfg, out_dir) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + command = 'python3 mining/mining_cald.py' + logging.info(f'mining: {command}') + subprocess.run(command.split(), check=True) + monitor.write_monitor_logger(percent=1.0) + + +def _run_infer(cfg: edict) -> None: + # generate data.yaml for infer + logging.info('convert ymir dataset to yolov5 dataset') + out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') + convert_ymir_to_yolov5(cfg, out_dir) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + N = dr.items_count(env.DatasetType.CANDIDATE) + infer_result = dict() + model = YmirYolov5(cfg) + idx = -1 + + monitor_gap = max(1, N // 100) + for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): + img = cv2.imread(asset_path) + result = model.infer(img) + infer_result[asset_path] = result + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + monitor.write_monitor_logger(percent=percent) + + rw.write_infer_result(infer_result=infer_result) + monitor.write_monitor_logger(percent=1.0) + + +if __name__ == '__main__': + logging.basicConfig(stream=sys.stdout, + format='%(levelname)-8s: [%(asctime)s] %(message)s', + datefmt='%Y%m%d-%H:%M:%S', + level=logging.INFO) + + os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') + sys.exit(start()) diff --git a/det-mmdetection-tmi/ymir_log.py b/det-mmdetection-tmi/ymir_log.py new file mode 100644 index 0000000..29f2ec8 --- /dev/null +++ b/det-mmdetection-tmi/ymir_log.py @@ -0,0 +1,53 @@ +import time +import os.path as osp +from typing import Generator +from pygtail import Pygtail +from mmcv.util import TORCH_VERSION, digit_version + +if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.1')): + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' + 'TensorboardLoggerHook.') +else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)') + + +def read_log(f: str, wait: bool = True, sleep: float = 0.1) -> Generator[str]: + """ + Basically tail -f with a configurable sleep + """ + with open(f) as logfile: + # logfile.seek(0, os.SEEK_END) + while True: + new_line = logfile.readline() + if new_line: + yield new_line + else: + if wait: + # wait for new line + time.sleep(sleep) + else: + # read all line in file + break + +def write_tensorboard_text(tb_log_file: str, executor_log_file: str) -> None: + global _TENSORBOARD_GLOBAL_STEP + # tb_log_file = osp.join(cfg.ymir.output.tensorboard_dir, 'tensorboard_text.log') + # executor_log_file = cfg.ymir.output.executor_log_file + writer = SummaryWriter(tb_log_file) + + # Pygtail always return the new lines + for line in Pygtail(executor_log_file): + writer.add_text(tag='ymir-executor', text_string=line, global_step=_TENSORBOARD_GLOBAL_STEP) + _TENSORBOARD_GLOBAL_STEP += 1 + + writer.close() \ No newline at end of file diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py new file mode 100644 index 0000000..205bbc7 --- /dev/null +++ b/det-mmdetection-tmi/ymir_train.py @@ -0,0 +1,121 @@ +import glob +import logging +import os +import os.path as osp +import subprocess +import sys + +from easydict import EasyDict as edict +from ymir_exc import monitor +from mmdet.utils.util_ymir import get_merged_config, get_weight_file, download_weight_file, get_ymir_process, YmirStage, update_training_result_file + + +def main(cfg: edict) -> int: + # default ymir config + gpu_id = cfg.param.get("gpu_id", '0') + num_gpus = len(gpu_id.split(",")) + if num_gpus == 0: + raise Exception(f'gpu_id = {gpu_id} is not valid, eg: 0 or 2,4') + + classes = cfg.param.class_names + num_classes = len(classes) + model = cfg.param.model + if num_classes==0: + raise Exception('not find class_names in config!') + + weight_file = get_weight_file(cfg) + if not weight_file: + weight_file = download_weight_file(model) + + # user define config + learning_rate = cfg.param.learning_rate + epochs = cfg.param.epochs + + samples_per_gpu = cfg.param.samples_per_gpu + workers_per_gpu = min(4, max(1, samples_per_gpu//2)) + + supported_models = [] + if model.startswith("faster_rcnn"): + files = glob.glob( + osp.join('configs/faster_rcnn/faster_rcnn_*_ymir_coco.py')) + supported_models = ['faster_rcnn_r50_fpn', 'faster_rcnn_r101_fpn'] + elif model.startswith("yolox"): + files = glob.glob(osp.join('configs/yolox/yolox_*_8x8_300e_ymir_coco.py')) + supported_models = ['yolox_nano', 'yolox_tiny', + 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x'] + else: + files = glob.glob(osp.join('configs/*/*_ymir_coco.py')) + supported_models = [osp.basename(f) for f in files] + + assert model in supported_models, f'unknown model {model}, not in {supported_models}' + + # modify base config file + base_config_file = './configs/_base_/datasets/ymir_coco.py' + + modify_dict = dict( + classes=classes, + num_classes=num_classes, + max_epochs=epochs, + lr=learning_rate, + samples_per_gpu=samples_per_gpu, + workers_per_gpu=workers_per_gpu, + data_root=cfg.ymir.input.root_dir, + img_prefix=cfg.ymir.input.assets_dir, + ann_prefix=cfg.ymir.input.annotations_dir, + train_ann_file=cfg.ymir.input.training_index_file, + val_ann_file=cfg.ymir.input.val_index_file, + tensorboard_dir=cfg.ymir.output.tensorboard_dir, + work_dir=cfg.ymir.output.models_dir, + checkpoints_path=weight_file + ) + + logging.info(f'modified config is {modify_dict}') + with open(base_config_file, 'r') as fp: + lines = fp.readlines() + + fw = open(base_config_file, 'w') + for line in lines: + for key in modify_dict: + if line.startswith((f"{key}=", f"{key} =")): + value = modify_dict[key] + if isinstance(value, str): + line = f"{key} = '{value}' \n" + else: + line = f"{key} = {value} \n" + break + fw.write(line) + fw.close() + + # train_config_file will use the config in base_config_file + train_config_file = '' + for f in files: + if osp.basename(f).startswith(model): + train_config_file = f + + monitor.write_monitor_logger(percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) + + work_dir = cfg.ymir.output.models_dir + if num_gpus == 1: + cmd = f"python tools/train.py {train_config_file} " + \ + f"--work-dir {work_dir} --gpu-id {gpu_id}" + else: + os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id + cmd = f"./tools/dist_train.sh {train_config_file} {num_gpus} " + \ + f"--work-dir {work_dir}" + + logging.info(f"training command: {cmd}") + subprocess.run(cmd.split(), check=True) + + # eval_hooks will generate training_result_file if current map is best. + # create a fake map = 0 if no training_result_file generate in eval_hooks + if not osp.exists(cfg.ymir.output.training_result_file): + update_training_result_file(0) + + return 0 + +if __name__ == '__main__': + cfg = get_merged_config() + os.environ.setdefault('YMIR_MODELS_DIR','') + os.environ.setdefault('COCO_EVAL_TMP_FILE', '') + os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') + sys.exit(main(cfg)) From 7d8d091a0c1ddd018b948f19a63612ce902d675f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 10 Jun 2022 15:28:58 +0800 Subject: [PATCH 004/150] clone code to /workspace/app instead of /app --- live-code-executor/ymir_start.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index bd1ae27..96680dc 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -15,7 +15,7 @@ def show_ymir_info(executor_config: dict) -> None: def main(): - # step 1. read config.yaml and clone git_url:git_branch to /app + # step 1. read config.yaml and clone git_url:git_branch to /workspace/app executor_config = env.get_executor_config() show_ymir_info(executor_config) @@ -23,14 +23,14 @@ def main(): git_branch = executor_config.get('git_branch', '') if not git_branch: - cmd = f'git clone {git_url} /app' + cmd = f'git clone {git_url} /workspace/app' else: - cmd = f'git clone {git_url} -b {git_branch} /app' + cmd = f'git clone {git_url} -b {git_branch} /workspace/app' logger.info(f'clone code: {cmd}') subprocess.check_output(cmd.split()) - # step 2. read /app/extra-requirements.txt and install it. - pypi_file = '/app/extra-requirements.txt' + # step 2. read /workspace/app/extra-requirements.txt and install it. + pypi_file = '/workspace/app/extra-requirements.txt' if osp.exists(pypi_file): pypi_mirror = executor_config.get('pypi_mirror', '') @@ -42,10 +42,10 @@ def main(): else: logger.info('no python package needs to install') - # step 3. run /app/start.py + # step 3. run /workspace/app/start.py cmd = 'python3 start.py' logger.info(f'run task: {cmd}') - subprocess.check_output(cmd.split(), cwd='/app') + subprocess.check_output(cmd.split(), cwd='/workspace/app') logger.info('live code executor run successfully') return 0 From a79deba854ff25c2945eab0962a37dea5eaaf27e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 10 Jun 2022 17:44:12 +0800 Subject: [PATCH 005/150] update for non-root user, git clone to /workspace/app instead of /app --- live-code-executor/mxnet.dockerfile | 8 ++++++++ live-code-executor/torch.dockerfile | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 7056b80..f66d60a 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -4,6 +4,9 @@ ARG BUILD="runtime" # runtime/devel ARG SYSTEM="ubuntu20.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} +ARG USER_GID=1000 +ARG USER_UID=1000 +ARG USER=ymir ARG MXNET="1.9.1" ENV LANG=C.UTF-8 @@ -29,4 +32,9 @@ COPY ymir_start.py /workspace/ymir_start.py # set up python path ENV PYTHONPATH=. +# Create non-root user and chown /workspace +RUN groupadd --gid $USER_GID $USER \ + && useradd --uid $USER_UID --gid $USER_GID -m $USER --create-home \ + && chown ${USER_GID}:${USER_GID} /workspace + CMD bash /usr/bin/start.sh diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index e2c606d..cd848ab 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -4,6 +4,9 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +ARG USER_GID=1000 +ARG USER_UID=1000 +ARG USER=ymir ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -34,4 +37,9 @@ COPY ymir_start.py /workspace/ymir_start.py # set up python path ENV PYTHONPATH=. +# Create non-root user and chown /workspace +RUN groupadd --gid $USER_GID $USER \ + && useradd --uid $USER_UID --gid $USER_GID -m $USER --create-home \ + && chown ${USER_GID}:${USER_GID} /workspace + CMD bash /usr/bin/start.sh From 26b18106e5afa31bdbae56d13c2b20a3c1fdd301 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 10 Jun 2022 18:41:36 +0800 Subject: [PATCH 006/150] output the subprocess to main process directly --- live-code-executor/ymir_start.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index 96680dc..e807bd4 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -27,7 +27,7 @@ def main(): else: cmd = f'git clone {git_url} -b {git_branch} /workspace/app' logger.info(f'clone code: {cmd}') - subprocess.check_output(cmd.split()) + subprocess.run(cmd.split(), check=True) # step 2. read /workspace/app/extra-requirements.txt and install it. pypi_file = '/workspace/app/extra-requirements.txt' @@ -38,14 +38,14 @@ def main(): cmd += ' -i {pypi_mirror}' if pypi_mirror else '' logger.info(f'install python package: {cmd}') - subprocess.check_output(cmd.split()) + subprocess.run(cmd.split(), check=True) else: logger.info('no python package needs to install') # step 3. run /workspace/app/start.py cmd = 'python3 start.py' logger.info(f'run task: {cmd}') - subprocess.check_output(cmd.split(), cwd='/workspace/app') + subprocess.run(cmd.split(), check=True, cwd='/workspace/app') logger.info('live code executor run successfully') return 0 From 9479327ecf67dc0d8f0ef717d845f7d2bb32cbdb Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 13 Jun 2022 14:37:07 +0800 Subject: [PATCH 007/150] sigmoid --> hardswish --- det-yolov5-tmi/models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/models/common.py b/det-yolov5-tmi/models/common.py index 0dae024..5dda9ce 100644 --- a/det-yolov5-tmi/models/common.py +++ b/det-yolov5-tmi/models/common.py @@ -26,7 +26,7 @@ make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import copy_attr, time_sync - +from utils.activations import SiLU, Hardswish def autopad(k, p=None): # kernel, padding # Pad to 'same' @@ -41,7 +41,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, k super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) - self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + self.act = Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) def forward(self, x): return self.act(self.bn(self.conv(x))) From db6abd3118bd942f2c3cb898a42a67210bb91ef6 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 11:52:25 +0800 Subject: [PATCH 008/150] fix mining and infer bug --- det-yolov5-tmi/mining/mining_cald.py | 5 +++-- det-yolov5-tmi/models/common.py | 2 +- det-yolov5-tmi/start.py | 17 +++++++++-------- det-yolov5-tmi/utils/ymir_yolov5.py | 15 ++++++--------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index 77bfcf6..d93fb43 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -15,7 +15,7 @@ from ymir_exc import result_writer as rw from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5, YmirStage, get_ymir_process +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5, YmirStage, get_ymir_process, get_merged_config def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: @@ -133,7 +133,8 @@ def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: def main(): - miner = MiningCald() + cfg = get_merged_config() + miner = MiningCald(cfg) mining_result = miner.mining() rw.write_mining_result(mining_result=mining_result) diff --git a/det-yolov5-tmi/models/common.py b/det-yolov5-tmi/models/common.py index 5dda9ce..a2ec35a 100644 --- a/det-yolov5-tmi/models/common.py +++ b/det-yolov5-tmi/models/common.py @@ -41,7 +41,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, k super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) - self.act = Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + self.act = nn.Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) def forward(self, x): return self.act(self.bn(self.conv(x))) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index d22b3b8..ba06400 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -40,9 +40,9 @@ def _run_training(cfg: edict) -> None: 3. save model weight/hyperparameter/... to design directory """ # 1. convert dataset - logging.info('convert ymir dataset to yolov5 dataset') out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg, out_dir) + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) # 2. training model @@ -80,9 +80,10 @@ def _run_training(cfg: edict) -> None: def _run_mining(cfg: edict()) -> None: - logging.info('convert ymir dataset to yolov5 dataset') - out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') - convert_ymir_to_yolov5(cfg, out_dir) + # generate data.yaml for mining + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) command = 'python3 mining/mining_cald.py' @@ -93,9 +94,9 @@ def _run_mining(cfg: edict()) -> None: def _run_infer(cfg: edict) -> None: # generate data.yaml for infer - logging.info('convert ymir dataset to yolov5 dataset') - out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') - convert_ymir_to_yolov5(cfg, out_dir) + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) N = dr.items_count(env.DatasetType.CANDIDATE) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 64ce9be..9010340 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -61,13 +61,10 @@ def get_merged_config() -> edict: merged_cfg.ymir = env.get_current_env() return merged_cfg -def get_weight_file(cfg: edict, try_download: bool = True) -> str: +def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority - - 1. find weight file in cfg.param.model_params_path or cfg.param.model_params_path - 2. if try_download and no weight file offered - for training task, yolov5 will download it from github. + find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ if cfg.ymir.run_training: model_params_path = cfg.param.pretrained_model_paths @@ -120,10 +117,11 @@ def __init__(self, cfg: edict): def init_detector(self, device: torch.device) -> DetectMultiBackend: weights = get_weight_file(self.cfg) + data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') model = DetectMultiBackend(weights=weights, device=device, dnn=False, # not use opencv dnn for onnx inference - data='data.yaml') # dataset.yaml path + data=data_yaml) # dataset.yaml path return model @@ -183,11 +181,10 @@ def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: return anns -def convert_ymir_to_yolov5(cfg: edict, output_root_dir: str) -> None: +def convert_ymir_to_yolov5(cfg: edict) -> None: """ convert ymir format dataset to yolov5 format generate data.yaml for training/mining/infer - output_root_dir: the output root dir """ data = dict(path=cfg.ymir.input.root_dir, train=cfg.ymir.input.training_index_file, @@ -196,7 +193,7 @@ def convert_ymir_to_yolov5(cfg: edict, output_root_dir: str) -> None: nc=len(cfg.param.class_names), names=cfg.param.class_names) - with open(osp.join(output_root_dir, 'data.yaml'), 'w') as fw: + with open(osp.join(cfg.ymir.output.root_dir, 'data.yaml'), 'w') as fw: fw.write(yaml.safe_dump(data)) From 79c72a19bdbf7b05f85161f56346484edae6747d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 12:23:35 +0800 Subject: [PATCH 009/150] revert to /app --- live-code-executor/ymir_start.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index e807bd4..71adf5c 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -15,7 +15,7 @@ def show_ymir_info(executor_config: dict) -> None: def main(): - # step 1. read config.yaml and clone git_url:git_branch to /workspace/app + # step 1. read config.yaml and clone git_url:git_branch to /app executor_config = env.get_executor_config() show_ymir_info(executor_config) @@ -23,14 +23,14 @@ def main(): git_branch = executor_config.get('git_branch', '') if not git_branch: - cmd = f'git clone {git_url} /workspace/app' + cmd = f'git clone {git_url} /app' else: - cmd = f'git clone {git_url} -b {git_branch} /workspace/app' + cmd = f'git clone {git_url} -b {git_branch} /app' logger.info(f'clone code: {cmd}') subprocess.run(cmd.split(), check=True) - # step 2. read /workspace/app/extra-requirements.txt and install it. - pypi_file = '/workspace/app/extra-requirements.txt' + # step 2. read /app/extra-requirements.txt and install it. + pypi_file = '/app/extra-requirements.txt' if osp.exists(pypi_file): pypi_mirror = executor_config.get('pypi_mirror', '') @@ -42,10 +42,10 @@ def main(): else: logger.info('no python package needs to install') - # step 3. run /workspace/app/start.py + # step 3. run /app/start.py cmd = 'python3 start.py' logger.info(f'run task: {cmd}') - subprocess.run(cmd.split(), check=True, cwd='/workspace/app') + subprocess.run(cmd.split(), check=True, cwd='/app') logger.info('live code executor run successfully') return 0 From 3f40dac76621cb588ccbb917de5ef2b1772ad64c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 12:24:24 +0800 Subject: [PATCH 010/150] Revert "update for non-root user, git clone to /workspace/app instead of /app" This reverts commit a79deba854ff25c2945eab0962a37dea5eaaf27e. --- live-code-executor/mxnet.dockerfile | 8 -------- live-code-executor/torch.dockerfile | 8 -------- 2 files changed, 16 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 449bfc4..e55f478 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -4,9 +4,6 @@ ARG BUILD="runtime" # runtime/devel ARG SYSTEM="ubuntu20.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} -ARG USER_GID=1000 -ARG USER_UID=1000 -ARG USER=ymir ARG MXNET="1.9.1" ENV LANG=C.UTF-8 @@ -41,9 +38,4 @@ COPY ymir_start.py /workspace/ymir_start.py # set up python path ENV PYTHONPATH=. -# Create non-root user and chown /workspace -RUN groupadd --gid $USER_GID $USER \ - && useradd --uid $USER_UID --gid $USER_GID -m $USER --create-home \ - && chown ${USER_GID}:${USER_GID} /workspace - CMD bash /usr/bin/start.sh diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index c2e9486..66de371 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -4,9 +4,6 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime -ARG USER_GID=1000 -ARG USER_UID=1000 -ARG USER=ymir ARG SERVER_MODE=prod @@ -43,9 +40,4 @@ COPY ymir_start.py /workspace/ymir_start.py # set up python path ENV PYTHONPATH=. -# Create non-root user and chown /workspace -RUN groupadd --gid $USER_GID $USER \ - && useradd --uid $USER_UID --gid $USER_GID -m $USER --create-home \ - && chown ${USER_GID}:${USER_GID} /workspace - CMD bash /usr/bin/start.sh From 059ac5d87ec38a76cdce8af075bb3087772dd87f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 14:19:40 +0800 Subject: [PATCH 011/150] update yolov5 dockerfile --- det-yolov5-tmi/cuda102.dockerfile | 21 +++++++++++++++------ det-yolov5-tmi/cuda111-devel.dockerfile | 21 +++++++++++++++------ det-yolov5-tmi/cuda111.dockerfile | 21 +++++++++++++++------ det-yolov5-tmi/models/common.py | 2 +- det-yolov5-tmi/requirements.txt | 6 +++--- det-yolov5-tmi/utils/ymir_yolov5.py | 13 +++++++++---- 6 files changed, 58 insertions(+), 26 deletions(-) diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index eeaf599..22f7b98 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -3,6 +3,7 @@ ARG CUDA="10.2" ARG CUDNN="7" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +ARG SERVER_MODE=prod ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -15,18 +16,26 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy file from host to docker +# install ymir-exc sdk +RUN if [ "${SERVER_MODE}" = "dev" ]; then \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi + +# Copy file from host to docker and install requirements ADD ./det-yolov5-tmi /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ -RUN pip install ymir-exc && pip install -r /app/requirements.txt +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ + && pip install -r /app/requirements.txt # Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh -RUN mkdir -p /root/.config/Ultralytics && \ - wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf +RUN cd /app && bash data/scripts/download_weights.sh \ + && mkdir -p /root/.config/Ultralytics \ + && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf # Make PYTHONPATH find local package ENV PYTHONPATH=. WORKDIR /app +RUN echo "python3 /app/start.py" > /usr/bin/start.sh CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/cuda111-devel.dockerfile b/det-yolov5-tmi/cuda111-devel.dockerfile index 77389b9..cd2eb03 100644 --- a/det-yolov5-tmi/cuda111-devel.dockerfile +++ b/det-yolov5-tmi/cuda111-devel.dockerfile @@ -4,6 +4,7 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel +ARG SERVER_MODE=prod ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -17,18 +18,26 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy file from host to docker +# install ymir-exc sdk +RUN if [ "${SERVER_MODE}" = "dev" ]; then \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi + +# Copy file from host to docker and install requirements ADD ./det-yolov5-tmi /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ -RUN pip install ymir-exc && pip install -r /app/requirements.txt +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ + && pip install -r /app/requirements.txt # Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh -RUN mkdir -p /root/.config/Ultralytics && \ - wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf +RUN cd /app && bash data/scripts/download_weights.sh \ + && mkdir -p /root/.config/Ultralytics \ + && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf # Make PYTHONPATH find local package ENV PYTHONPATH=. WORKDIR /app +RUN echo "python3 /app/start.py" > /usr/bin/start.sh CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index 9c8c061..db9b53b 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -4,6 +4,7 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +ARG SERVER_MODE=prod ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -16,18 +17,26 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy file from host to docker +# install ymir-exc sdk +RUN if [ "${SERVER_MODE}" = "dev" ]; then \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi + +# Copy file from host to docker and install requirements ADD ./det-yolov5-tmi /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ -RUN pip install ymir-exc && pip install -r /app/requirements.txt +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ + && pip install -r /app/requirements.txt # Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh -RUN mkdir -p /root/.config/Ultralytics && \ - wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf +RUN cd /app && bash data/scripts/download_weights.sh \ + && mkdir -p /root/.config/Ultralytics \ + && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf # Make PYTHONPATH find local package ENV PYTHONPATH=. WORKDIR /app +RUN echo "python3 /app/start.py" > /usr/bin/start.sh CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/models/common.py b/det-yolov5-tmi/models/common.py index a2ec35a..d116aa5 100644 --- a/det-yolov5-tmi/models/common.py +++ b/det-yolov5-tmi/models/common.py @@ -26,7 +26,7 @@ make_divisible, non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import copy_attr, time_sync -from utils.activations import SiLU, Hardswish + def autopad(k, p=None): # kernel, padding # Pad to 'same' diff --git a/det-yolov5-tmi/requirements.txt b/det-yolov5-tmi/requirements.txt index 3e65c34..fa1d389 100755 --- a/det-yolov5-tmi/requirements.txt +++ b/det-yolov5-tmi/requirements.txt @@ -37,6 +37,6 @@ onnx-simplifier>=0.3.6 # ONNX simplifier thop # FLOPs computation # Ymir --------------------------------------- -imagesize # fast obtain image size without load image -nptyping # numpy type hint -easydict \ No newline at end of file +imagesize>=1.3.0 # fast obtain image size without load image +nptyping>=2.1.1 # numpy type hint +easydict>=1.9 \ No newline at end of file diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 9010340..68b5854 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -2,6 +2,7 @@ utils function for ymir and yolov5 """ import os.path as osp +import shutil from enum import IntEnum from typing import Any, List, Tuple @@ -186,12 +187,16 @@ def convert_ymir_to_yolov5(cfg: edict) -> None: convert ymir format dataset to yolov5 format generate data.yaml for training/mining/infer """ - data = dict(path=cfg.ymir.input.root_dir, - train=cfg.ymir.input.training_index_file, - val=cfg.ymir.input.val_index_file, - test=cfg.ymir.input.candidate_index_file, + + data = dict(path=cfg.ymir.output.root_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) + for split, prefix in zip(['train', 'val', 'test'], ['training', 'val', 'candidate']): + src_file = getattr(cfg.ymir.input, f'{prefix}_index_file') + if osp.exists(src_file): + shutil.copy(src_file, f'{cfg.ymir.output.root_dir}/{split}.tsv') + + data[split] = f'{split}.tsv' with open(osp.join(cfg.ymir.output.root_dir, 'data.yaml'), 'w') as fw: fw.write(yaml.safe_dump(data)) From 62056ea6346ae902760f9733633e518cea260f43 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 18:22:12 +0800 Subject: [PATCH 012/150] add dockerfile for cuda11 --- det-mmdetection-tmi/docker/Dockerfile.cuda11 | 43 ++++++++++++++++++++ det-mmdetection-tmi/start.py | 17 +------- det-mmdetection-tmi/ymir_train.py | 2 +- 3 files changed, 46 insertions(+), 16 deletions(-) create mode 100644 det-mmdetection-tmi/docker/Dockerfile.cuda11 diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda11 b/det-mmdetection-tmi/docker/Dockerfile.cuda11 new file mode 100644 index 0000000..b00c88e --- /dev/null +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda11 @@ -0,0 +1,43 @@ +ARG PYTORCH="1.8.0" +ARG CUDA="11.1" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ARG MMCV="1.4.3" +ARG SERVER_MODE=prod + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" + +# Set timezone +RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo 'Asia/Shanghai' >/etc/timezone + +# Install apt package +RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install ymir-exc sdk and MMCV +RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ + && if [ "${SERVER_MODE}" = "dev" ]; then \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi \ + && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html \ + && conda clean --all + +# Install det-mmdetection-tmi +ADD det-mmdetection-tmi /app +WORKDIR /app +ENV FORCE_CUDA="1" +RUN pip install --no-cache-dir -r requirements/build.txt \ + && pip install --no-cache-dir -e . \ + && mkdir /img-man \ + && mv *-template.yaml /img-man + +RUN echo "python3 start.py" > /usr/bin/start.sh +CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 16e8e89..553cfb5 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -1,7 +1,5 @@ import logging import os -import os.path as osp -import shutil import subprocess import sys @@ -11,8 +9,8 @@ from ymir_exc import env, monitor from ymir_exc import result_writer as rw -from mmdet.utils.util_ymir import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, download_weight_file, get_merged_config, - get_weight_file, get_ymir_process) +from mmdet.utils.util_ymir import (YmirStage, get_merged_config, + get_ymir_process) def start() -> int: @@ -47,11 +45,6 @@ def _run_training(cfg: edict) -> None: def _run_mining(cfg: edict()) -> None: - logging.info('convert ymir dataset to yolov5 dataset') - out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') - convert_ymir_to_yolov5(cfg, out_dir) - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) - command = 'python3 mining/mining_cald.py' logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) @@ -59,12 +52,6 @@ def _run_mining(cfg: edict()) -> None: def _run_infer(cfg: edict) -> None: - # generate data.yaml for infer - logging.info('convert ymir dataset to yolov5 dataset') - out_dir = osp.join(cfg.ymir.output.root_dir, 'yolov5_dataset') - convert_ymir_to_yolov5(cfg, out_dir) - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) - N = dr.items_count(env.DatasetType.CANDIDATE) infer_result = dict() model = YmirYolov5(cfg) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 205bbc7..cd64cbd 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -115,7 +115,7 @@ def main(cfg: edict) -> int: if __name__ == '__main__': cfg = get_merged_config() - os.environ.setdefault('YMIR_MODELS_DIR','') + os.environ.setdefault('YMIR_MODELS_DIR',cfg.ymir.output.models_dir) os.environ.setdefault('COCO_EVAL_TMP_FILE', '') os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') sys.exit(main(cfg)) From fec9ca8e2e6364930f246c884ff58e9ac346b8c9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 15 Jun 2022 18:25:10 +0800 Subject: [PATCH 013/150] remove loguru --- det-yolov5-tmi/utils/datasets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/det-yolov5-tmi/utils/datasets.py b/det-yolov5-tmi/utils/datasets.py index cb36851..d4bf7b9 100755 --- a/det-yolov5-tmi/utils/datasets.py +++ b/det-yolov5-tmi/utils/datasets.py @@ -31,7 +31,6 @@ from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, ymir_xyxy2xywh) from utils.torch_utils import torch_distributed_zero_first -from loguru import logger # Parameters HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'] # include image suffixes From e56ea6e6f1f6963f7d4eda17fadaf206442a3d19 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 16 Jun 2022 15:52:58 +0800 Subject: [PATCH 014/150] update mxnet docker file --- live-code-executor/mxnet.dockerfile | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index e55f478..59731e6 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -1,20 +1,31 @@ -ARG CUDA="11.2.0" +ARG CUDA="11.2.1" ARG CUDNN="8" ARG BUILD="runtime" # runtime/devel -ARG SYSTEM="ubuntu20.04" +ARG SYSTEM="ubuntu18.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} ARG MXNET="1.9.1" -ENV LANG=C.UTF-8 - -ARG SERVER_MODE=prod +ARG DEBIAN_FRONTEND="noninteractive" +ARG MINICONDA_URL="https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py39_4.11.0-Linux-x86_64.sh" +ENV LANG=C.UTF-8 +ENV PATH /opt/conda/bin:$PATH # install linux package, needs to fix GPG error first. RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ apt-get update && \ - apt-get install -y git wget curl python3-dev gcc zip libglib2.0-0 libgl1-mesa-glx && \ - wget https://bootstrap.pypa.io/get-pip.py && \ - python3 get-pip.py + apt-get install -y git wget curl zip libglib2.0-0 libgl1-mesa-glx && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + wget "${MINICONDA_URL}" -O miniconda.sh -q && \ + mkdir -p /opt && \ + sh miniconda.sh -b -p /opt/conda && \ + rm miniconda.sh && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail @@ -22,10 +33,10 @@ RUN pip3 install mxnet-cu112==${MXNET} loguru # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi # copy template training/mining/infer config file RUN mkdir -p /img-man From f9ed5952132b26b2d65f82ff6b57648e7c6935ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=BD=B3=E6=AC=A3?= Date: Thu, 16 Jun 2022 22:54:35 +0800 Subject: [PATCH 015/150] change miniconda link --- live-code-executor/mxnet.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 59731e6..a82758e 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -6,7 +6,7 @@ ARG SYSTEM="ubuntu18.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} ARG MXNET="1.9.1" ARG DEBIAN_FRONTEND="noninteractive" -ARG MINICONDA_URL="https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py39_4.11.0-Linux-x86_64.sh" +ARG MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_4.11.0-Linux-x86_64.sh" ENV LANG=C.UTF-8 ENV PATH /opt/conda/bin:$PATH From 9972b9c7380fd2a2387f89f7d4125a625760249a Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 17 Jun 2022 09:16:18 +0800 Subject: [PATCH 016/150] remove cuda111-dev, add empty line --- det-yolov5-tmi/cuda102.dockerfile | 2 +- det-yolov5-tmi/cuda111-devel.dockerfile | 43 ------------------------- det-yolov5-tmi/cuda111.dockerfile | 2 +- 3 files changed, 2 insertions(+), 45 deletions(-) delete mode 100644 det-yolov5-tmi/cuda111-devel.dockerfile diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index 22f7b98..49a29d3 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -38,4 +38,4 @@ ENV PYTHONPATH=. WORKDIR /app RUN echo "python3 /app/start.py" > /usr/bin/start.sh -CMD python3 /app/start.py \ No newline at end of file +CMD bash /usr/bin/start.sh diff --git a/det-yolov5-tmi/cuda111-devel.dockerfile b/det-yolov5-tmi/cuda111-devel.dockerfile deleted file mode 100644 index cd2eb03..0000000 --- a/det-yolov5-tmi/cuda111-devel.dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -ARG PYTORCH="1.8.0" -ARG CUDA="11.1" -ARG CUDNN="8" - -# cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel -ARG SERVER_MODE=prod - -ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" -ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" -ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" -ENV LANG=C.UTF-8 - -# Install linux package -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ - apt-get update && apt-get install -y gnupg2 git libglib2.0-0 libgl1-mesa-glx \ - curl wget zip \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# install ymir-exc sdk -RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi - -# Copy file from host to docker and install requirements -ADD ./det-yolov5-tmi /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ - && pip install -r /app/requirements.txt - -# Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh \ - && mkdir -p /root/.config/Ultralytics \ - && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf - -# Make PYTHONPATH find local package -ENV PYTHONPATH=. - -WORKDIR /app -RUN echo "python3 /app/start.py" > /usr/bin/start.sh -CMD python3 /app/start.py \ No newline at end of file diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index db9b53b..0c6e5dd 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -39,4 +39,4 @@ ENV PYTHONPATH=. WORKDIR /app RUN echo "python3 /app/start.py" > /usr/bin/start.sh -CMD python3 /app/start.py \ No newline at end of file +CMD bash /usr/bin/start.sh From ba8a738b3addf97a235f074f4e208270c8e456d7 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sat, 18 Jun 2022 22:01:42 +0800 Subject: [PATCH 017/150] support ymir cfg-option and args-option --- .../{Dockerfile.cuda11 => Dockerfile.cuda111} | 15 ++-- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 69 +++++++++++++- det-mmdetection-tmi/requirements/runtime.txt | 1 + det-mmdetection-tmi/start.py | 4 +- det-mmdetection-tmi/tools/train.py | 5 +- det-mmdetection-tmi/ymir_train.py | 90 ++++++------------- 6 files changed, 108 insertions(+), 76 deletions(-) rename det-mmdetection-tmi/docker/{Dockerfile.cuda11 => Dockerfile.cuda111} (76%) diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda11 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 similarity index 76% rename from det-mmdetection-tmi/docker/Dockerfile.cuda11 rename to det-mmdetection-tmi/docker/Dockerfile.cuda111 index b00c88e..4b132f9 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda11 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -2,8 +2,9 @@ ARG PYTORCH="1.8.0" ARG CUDA="11.1" ARG CUDNN="8" -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +# mmcv>=1.3.17, <=1.5.0 ARG MMCV="1.4.3" ARG SERVER_MODE=prod @@ -21,8 +22,8 @@ RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build && rm -rf /var/lib/apt/lists/* # Install ymir-exc sdk and MMCV -RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ - && if [ "${SERVER_MODE}" = "dev" ]; then \ +RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ + if [ "${SERVER_MODE}" = "dev" ]; then \ pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ @@ -34,10 +35,10 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ ADD det-mmdetection-tmi /app WORKDIR /app ENV FORCE_CUDA="1" -RUN pip install --no-cache-dir -r requirements/build.txt \ +RUN pip install --no-cache-dir -r requirements/runtime.txt \ && pip install --no-cache-dir -e . \ && mkdir /img-man \ - && mv *-template.yaml /img-man - -RUN echo "python3 start.py" > /usr/bin/start.sh + && mv *-template.yaml /img-man \ + && echo "python3 start.py" > /usr/bin/start.sh + CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 3b5008b..6493f92 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -10,6 +10,8 @@ from urllib.parse import urlparse import mmcv +from mmcv import Config +from mmdet.apis import init_detector, inference_detector from easydict import EasyDict as edict from nptyping import NDArray, Shape, UInt8 from torch.hub import HASH_REGEX, _get_torch_home, download_url_to_file @@ -58,6 +60,57 @@ def get_merged_config() -> edict: merged_cfg.ymir = env.get_current_env() return merged_cfg +def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: + """ + - modify dataset config + - modify model output channel + """ + ### modify dataset config + ymir_ann_files = dict( + train=ymir_cfg.ymir.input.training_index_file, + val=ymir_cfg.ymir.input.val_index_file, + test=ymir_cfg.ymir.input.candidate_index_file + ) + + samples_per_gpu = ymir_cfg.param.samples_per_gpu + workers_per_gpu = ymir_cfg.param.workers_per_gpu + mmdet_cfg.data.samples_per_gpu = samples_per_gpu + mmdet_cfg.data.workers_per_gpu = workers_per_gpu + + for split in ['train','val','test']: + ymir_dataset_cfg=dict(type='YmirDataset', + ann_file=ymir_ann_files[split], + img_prefix=ymir_cfg.ymir.input.assets_dir, + ann_prefix=ymir_cfg.ymir.input.annotations_dir, + classes=ymir_cfg.param.class_names, + data_root=ymir_cfg.ymir.input.root_dir, + filter_empty_gt=False + ) + ### modify dataset config + mmdet_dataset_cfg = mmdet_cfg.data[split] + if isinstance(mmdet_dataset_cfg, (list, tuple)): + for x in mmdet_dataset_cfg: + x.update(ymir_dataset_cfg) + else: + src_dataset_type = mmdet_dataset_cfg.type + if src_dataset_type in ['CocoDataset']: + mmdet_dataset_cfg.update(ymir_dataset_cfg) + elif src_dataset_type in ['MultiImageMixDataset','RepeatDataset']: + mmdet_dataset_cfg.dataset.update(ymir_dataset_cfg) + else: + raise Exception(f'unsupported source dataset type {src_dataset_type}') + + ### modify model output channel + mmdet_model_cfg = mmdet_cfg.model.bbox_head + mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) + + ### epochs, checkpoint, tensorboard + mmdet_model_cfg.runner.max_epochs = ymir_cfg.param.max_epochs + mmdet_model_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir + tensorboard_logger = dict(type='TensorboardLoggerHook', + log_dir = ymir_cfg.ymir.output.tensorboard_dir) + mmdet_model_cfg.log_config['hooks'].append(tensorboard_logger) + return mmdet_cfg def get_weight_file(cfg: edict) -> str: """ @@ -146,4 +199,18 @@ def update_training_result_file(key_score): model_weight_file = osp.basename(weight_files[-1]) rw.write_training_result(model_names=[model_weight_file, osp.basename(model_config_file)], mAP=key_score, - classAPs=results_per_category) \ No newline at end of file + classAPs=results_per_category) + +class YmirModel: + def __init__(self, cfg:edict): + self.cfg = cfg + + # Specify the path to model config and checkpoint file + config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' + + # build the model from a config file and a checkpoint file + self.model = init_detector(config_file, checkpoint_file, device='cuda:0') + + def infer(self, img): + return inference_detector(self.model, img) \ No newline at end of file diff --git a/det-mmdetection-tmi/requirements/runtime.txt b/det-mmdetection-tmi/requirements/runtime.txt index f7a2cc7..3c93f57 100644 --- a/det-mmdetection-tmi/requirements/runtime.txt +++ b/det-mmdetection-tmi/requirements/runtime.txt @@ -3,3 +3,4 @@ numpy pycocotools six terminaltables +easydict \ No newline at end of file diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 553cfb5..54af3aa 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -10,7 +10,7 @@ from ymir_exc import result_writer as rw from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_ymir_process) + get_ymir_process, YmirModel) def start() -> int: @@ -54,7 +54,7 @@ def _run_mining(cfg: edict()) -> None: def _run_infer(cfg: edict) -> None: N = dr.items_count(env.DatasetType.CANDIDATE) infer_result = dict() - model = YmirYolov5(cfg) + model = YmirModel(cfg) idx = -1 monitor_gap = max(1, N // 100) diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index b9e9981..b454553 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -17,7 +17,7 @@ from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes - +from mmdet.utils.util_ymir import modify_mmdet_config def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') @@ -98,6 +98,9 @@ def main(): args = parse_args() cfg = Config.fromfile(args.config) + # modify mmdet config from file + cfg = modify_mmdet_config(cfg) + if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index cd64cbd..f328eff 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -11,7 +11,7 @@ def main(cfg: edict) -> int: - # default ymir config + ### default ymir config gpu_id = cfg.param.get("gpu_id", '0') num_gpus = len(gpu_id.split(",")) if num_gpus == 0: @@ -27,82 +27,42 @@ def main(cfg: edict) -> int: if not weight_file: weight_file = download_weight_file(model) - # user define config + ### user define config learning_rate = cfg.param.learning_rate - epochs = cfg.param.epochs + epochs = cfg.param.max_epochs samples_per_gpu = cfg.param.samples_per_gpu workers_per_gpu = min(4, max(1, samples_per_gpu//2)) - supported_models = [] - if model.startswith("faster_rcnn"): - files = glob.glob( - osp.join('configs/faster_rcnn/faster_rcnn_*_ymir_coco.py')) - supported_models = ['faster_rcnn_r50_fpn', 'faster_rcnn_r101_fpn'] - elif model.startswith("yolox"): - files = glob.glob(osp.join('configs/yolox/yolox_*_8x8_300e_ymir_coco.py')) - supported_models = ['yolox_nano', 'yolox_tiny', - 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x'] - else: - files = glob.glob(osp.join('configs/*/*_ymir_coco.py')) - supported_models = [osp.basename(f) for f in files] - - assert model in supported_models, f'unknown model {model}, not in {supported_models}' - - # modify base config file - base_config_file = './configs/_base_/datasets/ymir_coco.py' - - modify_dict = dict( - classes=classes, - num_classes=num_classes, - max_epochs=epochs, - lr=learning_rate, - samples_per_gpu=samples_per_gpu, - workers_per_gpu=workers_per_gpu, - data_root=cfg.ymir.input.root_dir, - img_prefix=cfg.ymir.input.assets_dir, - ann_prefix=cfg.ymir.input.annotations_dir, - train_ann_file=cfg.ymir.input.training_index_file, - val_ann_file=cfg.ymir.input.val_index_file, - tensorboard_dir=cfg.ymir.output.tensorboard_dir, - work_dir=cfg.ymir.output.models_dir, - checkpoints_path=weight_file - ) - - logging.info(f'modified config is {modify_dict}') - with open(base_config_file, 'r') as fp: - lines = fp.readlines() - - fw = open(base_config_file, 'w') - for line in lines: - for key in modify_dict: - if line.startswith((f"{key}=", f"{key} =")): - value = modify_dict[key] - if isinstance(value, str): - line = f"{key} = '{value}' \n" - else: - line = f"{key} = {value} \n" - break - fw.write(line) - fw.close() - - # train_config_file will use the config in base_config_file - train_config_file = '' - for f in files: - if osp.basename(f).startswith(model): - train_config_file = f + ### mmcv args config + config_file = cfg.param.get("config_file") + args_options = cfg.param.get("base_args",None) + cfg_options = cfg.param.get("cfg_options",None) monitor.write_monitor_logger(percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) work_dir = cfg.ymir.output.models_dir - if num_gpus == 1: - cmd = f"python tools/train.py {train_config_file} " + \ + if num_gpus == 0: + # view https://mmdetection.readthedocs.io/en/stable/1_exist_data_model.html#training-on-cpu + os.environ.setdefault('CUDA_VISIBLE_DEVICES',"-1") + cmd = f"python tools/train.py {config_file} " + \ + f"--work-dir {work_dir}" + elif num_gpus == 1: + cmd = f"python tools/train.py {config_file} " + \ f"--work-dir {work_dir} --gpu-id {gpu_id}" else: - os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id - cmd = f"./tools/dist_train.sh {train_config_file} {num_gpus} " + \ + os.environ.setdefault('CUDA_VISIBLE_DEVICES', gpu_id) + port = cfg.param.get('PORT') + os.environ.setdefault('PORT', port) + cmd = f"./tools/dist_train.sh {config_file} {num_gpus} " + \ f"--work-dir {work_dir}" + if args_options: + cmd +=f" {args_options}" + + if cfg_options: + cmd +=f" --cfg-options {cfg_options}" + logging.info(f"training command: {cmd}") subprocess.run(cmd.split(), check=True) @@ -116,6 +76,6 @@ def main(cfg: edict) -> int: if __name__ == '__main__': cfg = get_merged_config() os.environ.setdefault('YMIR_MODELS_DIR',cfg.ymir.output.models_dir) - os.environ.setdefault('COCO_EVAL_TMP_FILE', '') + os.environ.setdefault('COCO_EVAL_TMP_FILE', osp.join(cfg.ymir.output.root_dir,'eval_tmp.json')) os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') sys.exit(main(cfg)) From 748a522640d3c837080af38c602dd48cda53e950 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sun, 19 Jun 2022 14:56:20 +0800 Subject: [PATCH 018/150] support extend dataset format --- det-mmdetection-tmi/mmdet/datasets/ymir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-mmdetection-tmi/mmdet/datasets/ymir.py b/det-mmdetection-tmi/mmdet/datasets/ymir.py index 5cbbbfa..42771fb 100644 --- a/det-mmdetection-tmi/mmdet/datasets/ymir.py +++ b/det-mmdetection-tmi/mmdet/datasets/ymir.py @@ -146,7 +146,7 @@ def get_txt_ann_info(self, txt_path): else: lines=[] for line in lines: - obj=[int(x) for x in line.strip().split(',')] + obj=[int(x) for x in line.strip().split(',')[0:5]] # YMIR category id starts from 0, coco from 1 category_id, xmin, ymin, xmax, ymax = obj bbox = [xmin, ymin, xmax, ymax] From ea6a52e027274c09aac661a2d20f369fc73c80c9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sun, 19 Jun 2022 17:21:43 +0800 Subject: [PATCH 019/150] update config --- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 4 +-- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 25 ++++--------------- det-mmdetection-tmi/start.py | 3 ++- det-mmdetection-tmi/tools/train.py | 9 ++++--- det-mmdetection-tmi/ymir_infer.py | 16 ++++++++++++ det-mmdetection-tmi/ymir_train.py | 16 ++++++------ 6 files changed, 38 insertions(+), 35 deletions(-) create mode 100644 det-mmdetection-tmi/ymir_infer.py diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index 4b132f9..8441b6a 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -17,7 +17,7 @@ RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone # Install apt package -RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ +RUN apt-get update && apt-get install -y gcc ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -40,5 +40,5 @@ RUN pip install --no-cache-dir -r requirements/runtime.txt \ && mkdir /img-man \ && mv *-template.yaml /img-man \ && echo "python3 start.py" > /usr/bin/start.sh - + CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 6493f92..2a4ab09 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -11,7 +11,6 @@ import mmcv from mmcv import Config -from mmdet.apis import init_detector, inference_detector from easydict import EasyDict as edict from nptyping import NDArray, Shape, UInt8 from torch.hub import HASH_REGEX, _get_torch_home, download_url_to_file @@ -76,7 +75,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: workers_per_gpu = ymir_cfg.param.workers_per_gpu mmdet_cfg.data.samples_per_gpu = samples_per_gpu mmdet_cfg.data.workers_per_gpu = workers_per_gpu - + for split in ['train','val','test']: ymir_dataset_cfg=dict(type='YmirDataset', ann_file=ymir_ann_files[split], @@ -105,12 +104,12 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) ### epochs, checkpoint, tensorboard - mmdet_model_cfg.runner.max_epochs = ymir_cfg.param.max_epochs - mmdet_model_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir + mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs + mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir = ymir_cfg.ymir.output.tensorboard_dir) - mmdet_model_cfg.log_config['hooks'].append(tensorboard_logger) - return mmdet_cfg + mmdet_cfg.log_config['hooks'].append(tensorboard_logger) + return mmdet_cfg def get_weight_file(cfg: edict) -> str: """ @@ -200,17 +199,3 @@ def update_training_result_file(key_score): rw.write_training_result(model_names=[model_weight_file, osp.basename(model_config_file)], mAP=key_score, classAPs=results_per_category) - -class YmirModel: - def __init__(self, cfg:edict): - self.cfg = cfg - - # Specify the path to model config and checkpoint file - config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' - checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' - - # build the model from a config file and a checkpoint file - self.model = init_detector(config_file, checkpoint_file, device='cuda:0') - - def infer(self, img): - return inference_detector(self.model, img) \ No newline at end of file diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 54af3aa..89ea239 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -10,7 +10,8 @@ from ymir_exc import result_writer as rw from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_ymir_process, YmirModel) + get_ymir_process) +from ymir_infer import YmirModel def start() -> int: diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index b454553..a65e130 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -17,7 +17,7 @@ from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes -from mmdet.utils.util_ymir import modify_mmdet_config +from mmdet.utils.util_ymir import modify_mmdet_config, get_merged_config def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') @@ -96,11 +96,12 @@ def parse_args(): def main(): args = parse_args() - + ymir_cfg = get_merged_config() cfg = Config.fromfile(args.config) + print(cfg) # modify mmdet config from file - cfg = modify_mmdet_config(cfg) - + cfg = modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) + if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py new file mode 100644 index 0000000..07dd043 --- /dev/null +++ b/det-mmdetection-tmi/ymir_infer.py @@ -0,0 +1,16 @@ +from mmdet.apis import init_detector, inference_detector +from easydict import EasyDict as edict + +class YmirModel: + def __init__(self, cfg:edict): + self.cfg = cfg + + # Specify the path to model config and checkpoint file + config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' + + # build the model from a config file and a checkpoint file + self.model = init_detector(config_file, checkpoint_file, device='cuda:0') + + def infer(self, img): + return inference_detector(self.model, img) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index f328eff..ec44eec 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -23,16 +23,16 @@ def main(cfg: edict) -> int: if num_classes==0: raise Exception('not find class_names in config!') - weight_file = get_weight_file(cfg) - if not weight_file: - weight_file = download_weight_file(model) + # weight_file = get_weight_file(cfg) + # if not weight_file: + # weight_file = download_weight_file(model) ### user define config - learning_rate = cfg.param.learning_rate - epochs = cfg.param.max_epochs + # learning_rate = cfg.param.learning_rate + # epochs = cfg.param.max_epochs - samples_per_gpu = cfg.param.samples_per_gpu - workers_per_gpu = min(4, max(1, samples_per_gpu//2)) + # samples_per_gpu = cfg.param.samples_per_gpu + # workers_per_gpu = min(4, max(1, samples_per_gpu//2)) ### mmcv args config config_file = cfg.param.get("config_file") @@ -59,7 +59,7 @@ def main(cfg: edict) -> int: if args_options: cmd +=f" {args_options}" - + if cfg_options: cmd +=f" --cfg-options {cfg_options}" From 533fef1db02eb39e4c255bcf5d67ed12b46caf9e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 20 Jun 2022 16:19:12 +0800 Subject: [PATCH 020/150] update mmdet --- det-mmdetection-tmi/mmdet/apis/train.py | 1 + det-mmdetection-tmi/mmdet/utils/util_ymir.py | 21 ++++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/apis/train.py b/det-mmdetection-tmi/mmdet/apis/train.py index f2c14e9..ebc995d 100644 --- a/det-mmdetection-tmi/mmdet/apis/train.py +++ b/det-mmdetection-tmi/mmdet/apis/train.py @@ -188,6 +188,7 @@ def train_detector(model, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) + eval_cfg['classwise'] = True # Whether to evaluating the AP for each class eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 2a4ab09..a806bd6 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -117,22 +117,23 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path = cfg.param.pretrained_model_paths + model_params_path: List = cfg.param.pretrained_model_paths else: - model_params_path = cfg.param.model_params_path + model_params_path: List = cfg.param.model_params_path model_dir = osp.join(cfg.ymir.input.root_dir, cfg.ymir.input.models_dir) model_params_path = [ - p for p in model_params_path if osp.exists(osp.join(model_dir, p))] + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pth')] - # choose weight file by priority, best.pt > xxx.pt - if 'best.pt' in model_params_path: - return osp.join(model_dir, 'best.pt') - else: - for f in model_params_path: - if f.endswith('.pt'): - return osp.join(model_dir, f) + # choose weight file by priority, best_xxx.pth > latest.pth > epoch_xxx.pth + best_pth_files = [f for f in model_params_path if f.startswith('best_')] + if len(best_pth_files) > 0: + return get_newest_file(best_pth_files) + + epoch_pth_files = [f for f in model_params_path if f.startswith('epoch_')] + if len(epoch_pth_files) > 0: + return get_newest_file(epoch_pth_files) return "" From 9d0ef65a4e5b369c66745f5ed432c6bde20686fd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 20 Jun 2022 16:33:43 +0800 Subject: [PATCH 021/150] add gcc and make git clone faster --- live-code-executor/mxnet.dockerfile | 2 +- live-code-executor/ymir_start.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index a82758e..1ff0a66 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -13,7 +13,7 @@ ENV PATH /opt/conda/bin:$PATH # install linux package, needs to fix GPG error first. RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ apt-get update && \ - apt-get install -y git wget curl zip libglib2.0-0 libgl1-mesa-glx && \ + apt-get install -y git gcc wget curl zip libglib2.0-0 libgl1-mesa-glx && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ wget "${MINICONDA_URL}" -O miniconda.sh -q && \ diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index 71adf5c..0ea1bd6 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -23,9 +23,9 @@ def main(): git_branch = executor_config.get('git_branch', '') if not git_branch: - cmd = f'git clone {git_url} /app' + cmd = f'git clone {git_url} --depth 1 /app' else: - cmd = f'git clone {git_url} -b {git_branch} /app' + cmd = f'git clone {git_url} --depth 1 -b {git_branch} /app' logger.info(f'clone code: {cmd}') subprocess.run(cmd.split(), check=True) From ac06c0ffaee2d377b479521d6327e7e2dd2d4e40 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 20 Jun 2022 17:56:08 +0800 Subject: [PATCH 022/150] add training ability --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index a806bd6..2d67bdf 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -129,11 +129,11 @@ def get_weight_file(cfg: edict) -> str: # choose weight file by priority, best_xxx.pth > latest.pth > epoch_xxx.pth best_pth_files = [f for f in model_params_path if f.startswith('best_')] if len(best_pth_files) > 0: - return get_newest_file(best_pth_files) + return max(best_pth_files, key=os.path.getctime) epoch_pth_files = [f for f in model_params_path if f.startswith('epoch_')] if len(epoch_pth_files) > 0: - return get_newest_file(epoch_pth_files) + return max(epoch_pth_files, key=os.path.getctime) return "" @@ -181,7 +181,7 @@ def update_training_result_file(key_score): results_per_category = mmcv.load(COCO_EVAL_TMP_FILE) work_dir = os.getenv('YMIR_MODELS_DIR') - if work_dir is None or osp.isdir(work_dir): + if work_dir is None or not osp.isdir(work_dir): raise Exception( f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {work_dir}') From 5402b3f4713f5de4021c2d88d1b81282dd432e08 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 21 Jun 2022 15:21:02 +0800 Subject: [PATCH 023/150] mmdet training model --- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 2 +- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 17 ++++------------- det-mmdetection-tmi/requirements/runtime.txt | 6 +++++- det-mmdetection-tmi/training-template.yaml | 7 +++++++ det-mmdetection-tmi/ymir_infer.py | 5 +++-- det-mmdetection-tmi/ymir_train.py | 5 ++--- 6 files changed, 22 insertions(+), 20 deletions(-) create mode 100644 det-mmdetection-tmi/training-template.yaml diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index 8441b6a..42a9004 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -17,7 +17,7 @@ RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone # Install apt package -RUN apt-get update && apt-get install -y gcc ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ +RUN apt-get update && apt-get install -y build-essential ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 2d67bdf..eb9ad3e 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -104,7 +104,8 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) ### epochs, checkpoint, tensorboard - mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs + if ymir_cfg.param.get('max_epochs',None): + mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir = ymir_cfg.ymir.output.tensorboard_dir) @@ -186,17 +187,7 @@ def update_training_result_file(key_score): f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {work_dir}') # assert only one model config file in work_dir - model_config_file = glob.glob(osp.join(work_dir, '*.py'))[0] - weight_files = glob.glob(osp.join(work_dir, 'best_bbox_mAP_epoch_*.pth')) - if len(weight_files) == 0: - weight_files = glob.glob(osp.join(work_dir, 'epoch_*.pth')) - - if len(weight_files) == 0: - raise Exception(f'no weight file found in {work_dir}') - - # sort the weight files by time, use the latest file. - weight_files.sort(key=lambda fn: osp.getmtime(fn)) - model_weight_file = osp.basename(weight_files[-1]) - rw.write_training_result(model_names=[model_weight_file, osp.basename(model_config_file)], + result_files = glob.glob(osp.join(work_dir, '*')) + rw.write_training_result(model_names=[osp.basename(f) for f in result_files], mAP=key_score, classAPs=results_per_category) diff --git a/det-mmdetection-tmi/requirements/runtime.txt b/det-mmdetection-tmi/requirements/runtime.txt index 3c93f57..9754131 100644 --- a/det-mmdetection-tmi/requirements/runtime.txt +++ b/det-mmdetection-tmi/requirements/runtime.txt @@ -3,4 +3,8 @@ numpy pycocotools six terminaltables -easydict \ No newline at end of file +easydict +nptyping +imagesize>=1.3.0 +future +tensorboard>=2.5.0 diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml new file mode 100644 index 0000000..67b9aa3 --- /dev/null +++ b/det-mmdetection-tmi/training-template.yaml @@ -0,0 +1,7 @@ +samples_per_gpu: 2 +workers_per_gpu: 2 +max_epochs: 300 +config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' +args: '' +cfg_options: '' +port: 12345 diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 07dd043..6863f62 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -1,13 +1,14 @@ from mmdet.apis import init_detector, inference_detector from easydict import EasyDict as edict +from mmdet.utils.util_ymir import get_weight_file class YmirModel: def __init__(self, cfg:edict): self.cfg = cfg # Specify the path to model config and checkpoint file - config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' - checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' + config_file = cfg.param.config_file + checkpoint_file = get_weight_file(cfg) # build the model from a config file and a checkpoint file self.model = init_detector(config_file, checkpoint_file, device='cuda:0') diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index ec44eec..f2fc959 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -19,7 +19,6 @@ def main(cfg: edict) -> int: classes = cfg.param.class_names num_classes = len(classes) - model = cfg.param.model if num_classes==0: raise Exception('not find class_names in config!') @@ -36,7 +35,7 @@ def main(cfg: edict) -> int: ### mmcv args config config_file = cfg.param.get("config_file") - args_options = cfg.param.get("base_args",None) + args_options = cfg.param.get("args",None) cfg_options = cfg.param.get("cfg_options",None) monitor.write_monitor_logger(percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) @@ -52,7 +51,7 @@ def main(cfg: edict) -> int: f"--work-dir {work_dir} --gpu-id {gpu_id}" else: os.environ.setdefault('CUDA_VISIBLE_DEVICES', gpu_id) - port = cfg.param.get('PORT') + port = cfg.param.get('port') os.environ.setdefault('PORT', port) cmd = f"./tools/dist_train.sh {config_file} {num_gpus} " + \ f"--work-dir {work_dir}" From 1aed247e897f57d9ba3fb78d9eed395d632c94cd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 21 Jun 2022 17:25:45 +0800 Subject: [PATCH 024/150] add infer --- det-mmdetection-tmi/infer-template.yaml | 7 ++ .../mmdet/core/evaluation/eval_hooks.py | 12 ++- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 94 +++++++------------ det-mmdetection-tmi/start.py | 20 +++- det-mmdetection-tmi/training-template.yaml | 2 +- det-mmdetection-tmi/ymir_infer.py | 60 +++++++++++- det-mmdetection-tmi/ymir_train.py | 48 ++++------ 7 files changed, 141 insertions(+), 102 deletions(-) create mode 100644 det-mmdetection-tmi/infer-template.yaml diff --git a/det-mmdetection-tmi/infer-template.yaml b/det-mmdetection-tmi/infer-template.yaml new file mode 100644 index 0000000..8be36b9 --- /dev/null +++ b/det-mmdetection-tmi/infer-template.yaml @@ -0,0 +1,7 @@ +samples_per_gpu: 2 +workers_per_gpu: 2 +max_epochs: 300 +config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' +args_options: '' +cfg_options: '' +port: 12345 diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index 15c47bc..89bc7bb 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -8,8 +8,10 @@ from mmcv.runner import EvalHook as BaseEvalHook from torch.nn.modules.batchnorm import _BatchNorm from ymir_exc import monitor -from mmdet.utils.util_ymir import update_training_result_file -import os.path as osp + +from mmdet.utils.util_ymir import (YmirStage, get_ymir_process, + update_training_result_file) + def _calc_dynamic_intervals(start_interval, dynamic_interval_list): assert mmcv.is_list_of(dynamic_interval_list, tuple) @@ -47,7 +49,8 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" - percent=0.95*(runner.epoch/runner.max_epochs) + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) @@ -101,7 +104,8 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" - percent=0.1+0.8*(runner.epoch/runner.max_epochs) + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index eb9ad3e..79982e0 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -4,16 +4,13 @@ import glob import os import os.path as osp -import sys from enum import IntEnum -from typing import Any, List, Tuple -from urllib.parse import urlparse +from typing import Any, List import mmcv -from mmcv import Config from easydict import EasyDict as edict +from mmcv import Config from nptyping import NDArray, Shape, UInt8 -from torch.hub import HASH_REGEX, _get_torch_home, download_url_to_file from ymir_exc import env from ymir_exc import result_writer as rw @@ -59,12 +56,14 @@ def get_merged_config() -> edict: merged_cfg.ymir = env.get_current_env() return merged_cfg + def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: """ - modify dataset config - modify model output channel + - modify epochs, checkpoint, tensorboard config """ - ### modify dataset config + # modify dataset config ymir_ann_files = dict( train=ymir_cfg.ymir.input.training_index_file, val=ymir_cfg.ymir.input.val_index_file, @@ -76,17 +75,20 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_cfg.data.samples_per_gpu = samples_per_gpu mmdet_cfg.data.workers_per_gpu = workers_per_gpu - for split in ['train','val','test']: - ymir_dataset_cfg=dict(type='YmirDataset', - ann_file=ymir_ann_files[split], - img_prefix=ymir_cfg.ymir.input.assets_dir, - ann_prefix=ymir_cfg.ymir.input.annotations_dir, - classes=ymir_cfg.param.class_names, - data_root=ymir_cfg.ymir.input.root_dir, - filter_empty_gt=False - ) - ### modify dataset config - mmdet_dataset_cfg = mmdet_cfg.data[split] + for split in ['train', 'val', 'test']: + ymir_dataset_cfg = dict(type='YmirDataset', + ann_file=ymir_ann_files[split], + img_prefix=ymir_cfg.ymir.input.assets_dir, + ann_prefix=ymir_cfg.ymir.input.annotations_dir, + classes=ymir_cfg.param.class_names, + data_root=ymir_cfg.ymir.input.root_dir, + filter_empty_gt=False + ) + # modify dataset config for `split` + mmdet_dataset_cfg = mmdet_cfg.data.get(split, None) + if mmdet_dataset_cfg is None: + continue + if isinstance(mmdet_dataset_cfg, (list, tuple)): for x in mmdet_dataset_cfg: x.update(ymir_dataset_cfg) @@ -94,24 +96,26 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: src_dataset_type = mmdet_dataset_cfg.type if src_dataset_type in ['CocoDataset']: mmdet_dataset_cfg.update(ymir_dataset_cfg) - elif src_dataset_type in ['MultiImageMixDataset','RepeatDataset']: + elif src_dataset_type in ['MultiImageMixDataset', 'RepeatDataset']: mmdet_dataset_cfg.dataset.update(ymir_dataset_cfg) else: - raise Exception(f'unsupported source dataset type {src_dataset_type}') + raise Exception( + f'unsupported source dataset type {src_dataset_type}') - ### modify model output channel + # modify model output channel mmdet_model_cfg = mmdet_cfg.model.bbox_head mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) - ### epochs, checkpoint, tensorboard - if ymir_cfg.param.get('max_epochs',None): + # modify epochs, checkpoint, tensorboard config + if ymir_cfg.param.get('max_epochs', None): mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', - log_dir = ymir_cfg.ymir.output.tensorboard_dir) + log_dir=ymir_cfg.ymir.output.tensorboard_dir) mmdet_cfg.log_config['hooks'].append(tensorboard_logger) return mmdet_cfg + def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority @@ -122,55 +126,21 @@ def get_weight_file(cfg: edict) -> str: else: model_params_path: List = cfg.param.model_params_path - model_dir = osp.join(cfg.ymir.input.root_dir, - cfg.ymir.input.models_dir) + model_dir = cfg.ymir.input.models_dir model_params_path = [ - osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pth')] + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.pth','.pt'))] # choose weight file by priority, best_xxx.pth > latest.pth > epoch_xxx.pth - best_pth_files = [f for f in model_params_path if f.startswith('best_')] + best_pth_files = [f for f in model_params_path if osp.basename(f).startswith('best_')] if len(best_pth_files) > 0: return max(best_pth_files, key=os.path.getctime) - epoch_pth_files = [f for f in model_params_path if f.startswith('epoch_')] + epoch_pth_files = [f for f in model_params_path if osp.basename(f).startswith('epoch_')] if len(epoch_pth_files) > 0: return max(epoch_pth_files, key=os.path.getctime) - return "" - -def download_weight_file(model: str) -> str: - """ - download weight file from web if not exist. - """ - model_to_url = dict( - faster_rcnn_r50_fpn='https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth', - faster_rcnn_r101_fpn='https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth', - yolox_tiny='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth', - yolox_s='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth', - yolox_l='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth', - yolox_x='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth', - yolox_nano='https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth' - ) - - url = model_to_url[model] - torch_home = _get_torch_home() - model_dir = os.path.join(torch_home, 'checkpoints') - - os.makedirs(model_dir, exist_ok=True) - parts = urlparse(url) - filename = os.path.basename(parts.path) - cached_file = os.path.join(model_dir, filename) - - if not os.path.exists(cached_file): - sys.stderr.write('Downloading: "{}" to {}\n'.format( - url, cached_file)) - r = HASH_REGEX.search(filename) # r is Optional[Match[str]] - hash_prefix = r.group(1) if r else None - download_url_to_file( - url, cached_file, hash_prefix, progress=True) - - return cached_file + return "" def update_training_result_file(key_score): diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 89ea239..44babba 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -1,9 +1,11 @@ +import glob import logging import os import subprocess import sys import cv2 +import yaml from easydict import EasyDict as edict from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor @@ -11,7 +13,7 @@ from mmdet.utils.util_ymir import (YmirStage, get_merged_config, get_ymir_process) -from ymir_infer import YmirModel +from ymir_infer import YmirModel, mmdet_result_to_ymir def start() -> int: @@ -41,6 +43,19 @@ def _run_training(cfg: edict) -> None: command = 'python3 ymir_train.py' logging.info(f'start training: {command}') subprocess.run(command.split(), check=True) + + work_dir = cfg.ymir.output.models_dir + result_files = glob.glob(os.path.join(work_dir, '*')) + + training_result_file = cfg.ymir.output.training_result_file + with open(training_result_file, 'r') as fp: + best_result = yaml.safe_load(fp) + + # save the last checkpoint + rw.write_training_result(model_names=[os.path.basename(f) for f in result_files], + mAP=best_result['map'], + classAPs=best_result['class_aps']) + # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) @@ -58,11 +73,12 @@ def _run_infer(cfg: edict) -> None: model = YmirModel(cfg) idx = -1 + # write infer result monitor_gap = max(1, N // 100) for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): img = cv2.imread(asset_path) result = model.infer(img) - infer_result[asset_path] = result + infer_result[asset_path] = mmdet_result_to_ymir(result, cfg.param.class_names) idx += 1 if idx % monitor_gap == 0: diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 67b9aa3..8be36b9 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -2,6 +2,6 @@ samples_per_gpu: 2 workers_per_gpu: 2 max_epochs: 300 config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' -args: '' +args_options: '' cfg_options: '' port: 12345 diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 6863f62..47e47ed 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -1,17 +1,71 @@ -from mmdet.apis import init_detector, inference_detector +import argparse +from typing import Any, List + +import numpy as np from easydict import EasyDict as edict +from mmcv import DictAction +from nptyping import NDArray, Shape +from ymir_exc import result_writer as rw + +from mmdet.apis import inference_detector, init_detector from mmdet.utils.util_ymir import get_weight_file +DETECTION_RESULT = NDArray[Shape['*,5'], Any] + + +def parse_option(cfg_options: str) -> dict: + parser = argparse.ArgumentParser(description='parse cfg options') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + + args = parser.parse_args(f'--cfg-options {cfg_options}'.split()) + return args.cfg_options + + +def mmdet_result_to_ymir(results: List[DETECTION_RESULT], + class_names: List[str]) -> List[rw.Annotation]: + ann_list = [] + for idx, result in enumerate(results): + for line in result: + if any(np.isinf(line)): + continue + x1, y1, x2, y2, score = line + ann = rw.Annotation( + class_name=class_names[idx], + score=score, + box=rw.Box(x=round(x1), + y=round(y1), + w=round(x2-x1), + h=round(y2-y1)) + ) + ann_list.append(ann) + return ann_list + + class YmirModel: - def __init__(self, cfg:edict): + def __init__(self, cfg: edict): self.cfg = cfg # Specify the path to model config and checkpoint file config_file = cfg.param.config_file checkpoint_file = get_weight_file(cfg) + cfg_options = parse_option( + cfg.param.cfg_options) if cfg.param.cfg_options else None + # current infer can only use one gpu!!! + gpu_ids = cfg.param.gpu_id + gpu_id = gpu_ids.split(',')[0] # build the model from a config file and a checkpoint file - self.model = init_detector(config_file, checkpoint_file, device='cuda:0') + self.model = init_detector( + config_file, checkpoint_file, device=f'cuda:{gpu_id}', cfg_options=cfg_options) def infer(self, img): return inference_detector(self.model, img) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index f2fc959..a39e64b 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -1,4 +1,3 @@ -import glob import logging import os import os.path as osp @@ -7,11 +6,13 @@ from easydict import EasyDict as edict from ymir_exc import monitor -from mmdet.utils.util_ymir import get_merged_config, get_weight_file, download_weight_file, get_ymir_process, YmirStage, update_training_result_file + +from mmdet.utils.util_ymir import (YmirStage, get_merged_config, + get_ymir_process) def main(cfg: edict) -> int: - ### default ymir config + # default ymir config gpu_id = cfg.param.get("gpu_id", '0') num_gpus = len(gpu_id.split(",")) if num_gpus == 0: @@ -19,31 +20,21 @@ def main(cfg: edict) -> int: classes = cfg.param.class_names num_classes = len(classes) - if num_classes==0: + if num_classes == 0: raise Exception('not find class_names in config!') - # weight_file = get_weight_file(cfg) - # if not weight_file: - # weight_file = download_weight_file(model) - - ### user define config - # learning_rate = cfg.param.learning_rate - # epochs = cfg.param.max_epochs - - # samples_per_gpu = cfg.param.samples_per_gpu - # workers_per_gpu = min(4, max(1, samples_per_gpu//2)) - - ### mmcv args config + # mmcv args config config_file = cfg.param.get("config_file") - args_options = cfg.param.get("args",None) - cfg_options = cfg.param.get("cfg_options",None) + args_options = cfg.param.get("args_options", None) + cfg_options = cfg.param.get("cfg_options", None) - monitor.write_monitor_logger(percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) + monitor.write_monitor_logger( + percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) work_dir = cfg.ymir.output.models_dir if num_gpus == 0: # view https://mmdetection.readthedocs.io/en/stable/1_exist_data_model.html#training-on-cpu - os.environ.setdefault('CUDA_VISIBLE_DEVICES',"-1") + os.environ.setdefault('CUDA_VISIBLE_DEVICES', "-1") cmd = f"python tools/train.py {config_file} " + \ f"--work-dir {work_dir}" elif num_gpus == 1: @@ -57,24 +48,21 @@ def main(cfg: edict) -> int: f"--work-dir {work_dir}" if args_options: - cmd +=f" {args_options}" + cmd += f" {args_options}" if cfg_options: - cmd +=f" --cfg-options {cfg_options}" + cmd += f" --cfg-options {cfg_options}" logging.info(f"training command: {cmd}") subprocess.run(cmd.split(), check=True) - - # eval_hooks will generate training_result_file if current map is best. - # create a fake map = 0 if no training_result_file generate in eval_hooks - if not osp.exists(cfg.ymir.output.training_result_file): - update_training_result_file(0) - + logging.info(f"training finished") return 0 + if __name__ == '__main__': cfg = get_merged_config() - os.environ.setdefault('YMIR_MODELS_DIR',cfg.ymir.output.models_dir) - os.environ.setdefault('COCO_EVAL_TMP_FILE', osp.join(cfg.ymir.output.root_dir,'eval_tmp.json')) + os.environ.setdefault('YMIR_MODELS_DIR', cfg.ymir.output.models_dir) + os.environ.setdefault('COCO_EVAL_TMP_FILE', osp.join( + cfg.ymir.output.root_dir, 'eval_tmp.json')) os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') sys.exit(main(cfg)) From 0ba4479b790470da9e61d035ec4739f3ff6db479 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 21 Jun 2022 17:32:21 +0800 Subject: [PATCH 025/150] update readme --- det-mmdetection-tmi/README_ymir.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/det-mmdetection-tmi/README_ymir.md b/det-mmdetection-tmi/README_ymir.md index de86768..90a84b0 100644 --- a/det-mmdetection-tmi/README_ymir.md +++ b/det-mmdetection-tmi/README_ymir.md @@ -6,3 +6,7 @@ - modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format - modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process - modify `mmdet/datasets/__init__.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. +- modify `mmdet/apis/train.py`, set `eval_cfg['classwise'] = True` for class-wise evaluation +- add `mmdet/utils/util_ymir.py` for ymir training/infer/mining +- add `ymir_infer.py` for infer and mining +- add `ymir_train.py` modify `tools/train.py` to update the mmcv config for training From a8a8eddec68b6634d2e4b0ef154295a101b59a7a Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 22 Jun 2022 18:12:21 +0800 Subject: [PATCH 026/150] update evaluation --- det-mmdetection-tmi/mmdet/apis/train.py | 1 - det-mmdetection-tmi/mmdet/utils/util_ymir.py | 23 +++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/apis/train.py b/det-mmdetection-tmi/mmdet/apis/train.py index ebc995d..f2c14e9 100644 --- a/det-mmdetection-tmi/mmdet/apis/train.py +++ b/det-mmdetection-tmi/mmdet/apis/train.py @@ -188,7 +188,6 @@ def train_detector(model, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) - eval_cfg['classwise'] = True # Whether to evaluating the AP for each class eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 79982e0..c04013c 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -70,6 +70,8 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: test=ymir_cfg.ymir.input.candidate_index_file ) + # validation may augment the image and use more gpu + # so set smaller samples_per_gpu for validation samples_per_gpu = ymir_cfg.param.samples_per_gpu workers_per_gpu = ymir_cfg.param.workers_per_gpu mmdet_cfg.data.samples_per_gpu = samples_per_gpu @@ -82,7 +84,11 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: ann_prefix=ymir_cfg.ymir.input.annotations_dir, classes=ymir_cfg.param.class_names, data_root=ymir_cfg.ymir.input.root_dir, - filter_empty_gt=False + filter_empty_gt=False, + samples_per_gpu=samples_per_gpu if split == 'train' else max( + 1, samples_per_gpu//2), + workers_per_gpu=workers_per_gpu if split == 'train' else max( + 1, workers_per_gpu//2) ) # modify dataset config for `split` mmdet_dataset_cfg = mmdet_cfg.data.get(split, None) @@ -113,6 +119,12 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir=ymir_cfg.ymir.output.tensorboard_dir) mmdet_cfg.log_config['hooks'].append(tensorboard_logger) + + # modify evaluation and interval + interval = max(1, mmdet_cfg.runner.max_epoch//30) + mmdet_cfg.evaluation.interval = interval + # Whether to evaluating the AP for each class + mmdet_cfg.evaluation.classwise = True return mmdet_cfg @@ -128,18 +140,19 @@ def get_weight_file(cfg: edict) -> str: model_dir = cfg.ymir.input.models_dir model_params_path = [ - osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.pth','.pt'))] + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.pth', '.pt'))] # choose weight file by priority, best_xxx.pth > latest.pth > epoch_xxx.pth - best_pth_files = [f for f in model_params_path if osp.basename(f).startswith('best_')] + best_pth_files = [ + f for f in model_params_path if osp.basename(f).startswith('best_')] if len(best_pth_files) > 0: return max(best_pth_files, key=os.path.getctime) - epoch_pth_files = [f for f in model_params_path if osp.basename(f).startswith('epoch_')] + epoch_pth_files = [ + f for f in model_params_path if osp.basename(f).startswith('epoch_')] if len(epoch_pth_files) > 0: return max(epoch_pth_files, key=os.path.getctime) - return "" From 1bff027b7407643e238a6342bc7ae93ae4666962 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 24 Jun 2022 11:02:36 +0800 Subject: [PATCH 027/150] udpate --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index c04013c..96aa821 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -2,6 +2,7 @@ utils function for ymir and yolov5 """ import glob +import logging import os import os.path as osp from enum import IntEnum @@ -157,6 +158,7 @@ def get_weight_file(cfg: edict) -> str: def update_training_result_file(key_score): + logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') if COCO_EVAL_TMP_FILE is None: raise Exception( From 5f435e974ee1f98350b4d871b2c0345a2dea93c3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 24 Jun 2022 11:09:31 +0800 Subject: [PATCH 028/150] fix training bug --- det-yolov4-training/convert_label_ark2txt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov4-training/convert_label_ark2txt.py b/det-yolov4-training/convert_label_ark2txt.py index 509e0b2..0304ba9 100755 --- a/det-yolov4-training/convert_label_ark2txt.py +++ b/det-yolov4-training/convert_label_ark2txt.py @@ -40,7 +40,7 @@ def _convert_annotations(index_file_path: str, dst_annotations_dir: str) -> None output_list = [] for each_line in txt_content: - each_line = [int(each) for each in each_line.split(",")] + each_line = [int(each) for each in each_line.split(",")[0:5]] cls, xmin, ymin, xmax, ymax, *_ = each_line xmin = max(0, xmin) From 6f1e49a45fd8806473ed22f27fce281b3ee022d5 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 24 Jun 2022 16:17:28 +0800 Subject: [PATCH 029/150] add README.MD --- README.MD | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 README.MD diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..aa0d4ae --- /dev/null +++ b/README.MD @@ -0,0 +1,64 @@ +# ymir-executor 使用文档 + +## det-yolov4-training + +- yolov4的训练镜像,采用mxnet与darknet框架,默认cuda版本为`10.1`,无法直接在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,并修改其它依赖。 + +## det-yolov4-mining + +- yolov4挖掘与推理镜像,与det-yolov4-training对应 + +## det-yolov5-tmi + +- yolov5训练、挖掘及推理镜像,训练时会从github上下载权重 + +- yolov5-FAQ + + - 权重下载出错:提前将权重下载好并复制到镜像中 + +## live-code-executor + +- 可以通过`git_url`, `git_branch`从网上clone代码到镜像并运行 + + - 参考[live-code](https://github.com/IndustryEssentials/ymir-remote-git) + +## det-mmdetection-tmi + +- mmdetection 训练、挖掘及推理镜像,目前还没开发完 + +# 如何制作自己的ymir-executor + +- [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) + +# FAQ + +- apt 或 pip 安装慢或出错 + + - 采用国内源,如在docker file 中添加如下命令 + + ``` + RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list + + RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple + ``` + +- docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 + + - 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. + + ``` + cd ymir-executor + + docker build -t ymir-executor/yolov5 . -f det-yolov5-tmi/cuda111.dockerfile + ``` + +- 镜像运行完`/in`与`/out`目录中的文件被清理 + + - ymir系统为节省空间,会在任务结束后删除其中不必要的文件,如果不想删除,可以在部署ymir时,修改文件`ymir/command/mir/tools/command_run_in_out.py`,注释其中的`_cleanup(work_dir=work_dir)`。注意需要重新构建后端镜像 + + ``` + cd ymir + docker build -t industryessentials/ymir-backend --build-arg PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple --build-arg SERVER_MODE='dev' -f Dockerfile.backend . + + docker-compose down -v && docker-compose up -d + ``` From 2f4ed3eae50a631f727cf5e2e717de8743fa952c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 24 Jun 2022 16:19:28 +0800 Subject: [PATCH 030/150] add README.MD --- README.MD | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.MD b/README.MD index aa0d4ae..81e0077 100644 --- a/README.MD +++ b/README.MD @@ -20,16 +20,19 @@ - 可以通过`git_url`, `git_branch`从网上clone代码到镜像并运行 - - 参考[live-code](https://github.com/IndustryEssentials/ymir-remote-git) +- 参考 [live-code](https://github.com/IndustryEssentials/ymir-remote-git) ## det-mmdetection-tmi - mmdetection 训练、挖掘及推理镜像,目前还没开发完 -# 如何制作自己的ymir-executor + +## 如何制作自己的ymir-executor - [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) +--- + # FAQ - apt 或 pip 安装慢或出错 From bbf824b067f4c63e1f4ed8ad888d7f368c4dde9f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 24 Jun 2022 18:58:37 +0800 Subject: [PATCH 031/150] fix yolov5 nptyping problem --- det-yolov5-tmi/requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/requirements.txt b/det-yolov5-tmi/requirements.txt index fa1d389..70af2f8 100755 --- a/det-yolov5-tmi/requirements.txt +++ b/det-yolov5-tmi/requirements.txt @@ -2,7 +2,7 @@ # Base ---------------------------------------- matplotlib>=3.2.2 -numpy>=1.18.5 +numpy>=1.20.0 opencv-python>=4.1.2 Pillow>=7.1.2 PyYAML>=5.3.1 @@ -39,4 +39,5 @@ thop # FLOPs computation # Ymir --------------------------------------- imagesize>=1.3.0 # fast obtain image size without load image nptyping>=2.1.1 # numpy type hint -easydict>=1.9 \ No newline at end of file +typing-extensions>=4.2.0 +easydict>=1.9 From 8882e6088eeee0470d1f6c195169fea83395df5a Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 27 Jun 2022 16:49:54 +0800 Subject: [PATCH 032/150] fix extra tag --- det-yolov5-tmi/utils/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/utils/datasets.py b/det-yolov5-tmi/utils/datasets.py index d4bf7b9..28a25b9 100755 --- a/det-yolov5-tmi/utils/datasets.py +++ b/det-yolov5-tmi/utils/datasets.py @@ -903,7 +903,7 @@ def verify_image_label(args): if os.path.isfile(lb_file): nf = 1 # label found with open(lb_file) as f: - lb = [x.split(',') for x in f.read().strip().splitlines() if len(x)] + lb = [x.split(',')[0:5] for x in f.read().strip().splitlines() if len(x)] nl = len(lb) if nl: From 78c06edaa93a700a38a2e66aba1efbb71018e6f3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 28 Jun 2022 18:08:03 +0800 Subject: [PATCH 033/150] no merge, support for ymir1.2.0 --- det-yolov5-tmi/infer-template.yaml | 8 +++--- det-yolov5-tmi/mining-template.yaml | 8 +++--- det-yolov5-tmi/start.py | 9 ++++-- det-yolov5-tmi/train.py | 6 ++-- det-yolov5-tmi/training-template.yaml | 10 ++++--- det-yolov5-tmi/utils/metrics.py | 2 +- det-yolov5-tmi/utils/ymir_yolov5.py | 40 +++++++++++++-------------- 7 files changed, 46 insertions(+), 37 deletions(-) diff --git a/det-yolov5-tmi/infer-template.yaml b/det-yolov5-tmi/infer-template.yaml index 7574512..89dcc96 100644 --- a/det-yolov5-tmi/infer-template.yaml +++ b/det-yolov5-tmi/infer-template.yaml @@ -2,10 +2,10 @@ # after build image, it should at /img-man/infer-template.yaml # key: gpu_id, task_id, model_params_path, class_names should be preserved -gpu_id: '0' -task_id: 'default-infer-task' -model_params_path: [] -class_names: [] +# gpu_id: '0' +# task_id: 'default-infer-task' +# model_params_path: [] +# class_names: [] img_size: 640 conf_thres: 0.25 diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 5f2a3b2..20106dc 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -2,10 +2,10 @@ # after build image, it should at /img-man/mining-template.yaml # key: gpu_id, task_id, model_params_path, class_names should be preserved -gpu_id: '0' -task_id: 'default-training-task' -model_params_path: [] -class_names: [] +# gpu_id: '0' +# task_id: 'default-training-task' +# model_params_path: [] +# class_names: [] img_size: 640 conf_thres: 0.25 diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index ba06400..7b687e9 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -50,6 +50,8 @@ def _run_training(cfg: edict) -> None: batch_size = cfg.param.batch_size model = cfg.param.model img_size = cfg.param.img_size + save_period = cfg.param.save_period + args_options = cfg.param.args_options weights = get_weight_file(cfg) if not weights: # download pretrained weight @@ -59,8 +61,11 @@ def _run_training(cfg: edict) -> None: command = f'python3 train.py --epochs {epochs} ' + \ f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ - f'--img-size {img_size} --hyp data/hyps/hyp.scratch-low.yaml ' + \ - '--exist-ok' + f'--img-size {img_size} ' + \ + f'--save-period {save_period}' + if args_options: + command += f" {args_options}" + logging.info(f'start training: {command}') subprocess.run(command.split(), check=True) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 6dd190e..7fcbbce 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -413,12 +413,12 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Save last, best and delete torch.save(ckpt, last) - write_ymir_training_result(ymir_cfg, results, maps, rewrite=False) if best_fitness == fi: torch.save(ckpt, best) - write_ymir_training_result(ymir_cfg, results, maps, rewrite=True) if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') + weight_file = str(w / f'epoch{epoch}.pt') + write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) @@ -465,6 +465,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") torch.cuda.empty_cache() + # save the best and last weight file with other files in models_dir + write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') return results diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index 8cacec8..72356b7 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -2,13 +2,15 @@ # after build image, it should at /img-man/training-template.yaml # key: gpu_id, task_id, pretrained_model_paths, class_names should be preserved -gpu_id: '0' -task_id: 'default-training-task' -pretrained_model_paths: [] -class_names: [] +# gpu_id: '0' +# task_id: 'default-training-task' +# pretrained_model_paths: [] +# class_names: [] model: 'yolov5s' batch_size: 16 epochs: 300 img_size: 640 opset: 11 +args_options: '--exist-ok' +save_period: 10 diff --git a/det-yolov5-tmi/utils/metrics.py b/det-yolov5-tmi/utils/metrics.py index 857fa5d..48db16f 100644 --- a/det-yolov5-tmi/utils/metrics.py +++ b/det-yolov5-tmi/utils/metrics.py @@ -14,7 +14,7 @@ def fitness(x): # Model fitness as a weighted combination of metrics - w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (x[:, :4] * w).sum(1) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 68b5854..ffcb39d 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -1,6 +1,7 @@ """ utils function for ymir and yolov5 """ +import glob import os.path as osp import shutil from enum import IntEnum @@ -62,6 +63,7 @@ def get_merged_config() -> edict: merged_cfg.ymir = env.get_current_env() return merged_cfg + def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority @@ -202,31 +204,29 @@ def convert_ymir_to_yolov5(cfg: edict) -> None: fw.write(yaml.safe_dump(data)) -def write_ymir_training_result(cfg: edict, results: Tuple, maps: NDArray, rewrite=False) -> int: +def write_ymir_training_result(cfg: edict, + map50: float, + epoch: int, + weight_file: str) -> int: """ cfg: ymir config results: (mp, mr, map50, map, loss) maps: map@0.5:0.95 for all classes - rewrite: set true to ensure write the best result + epoch: stage + weight_file: saved weight files, empty weight_file will save all files """ - if not rewrite: - training_result_file = cfg.ymir.output.training_result_file - if osp.exists(training_result_file): - return 0 - model = cfg.param.model - class_names = cfg.param.class_names - mp = results[0] # mean of precision - mr = results[1] # mean of recall - map50 = results[2] # mean of ap@0.5 - map = results[3] # mean of ap@0.5:0.95 - # use `rw.write_training_result` to save training result - rw.write_training_result(model_names=[f'{model}.yaml', 'best.pt', 'last.pt', 'best.onnx'], - mAP=float(map), - mAP50=float(map50), - precision=float(mp), - recall=float(mr), - classAPs={class_name: v - for class_name, v in zip(class_names, maps.tolist())}) + if weight_file: + rw.write_model_stage(stage_name=f"{model}_{epoch}", + files=[weight_file], + mAP=float(map50)) + else: + # save other files with + files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*')) + if not f.endswith('.pt')] + ['last.pt', 'best.pt'] + + rw.write_model_stage(stage_name=f"{model}_last_and_best", + files=files, + mAP=float(map50)) return 0 From 8a01ee0c7e57c34bdfbb51f42e70ef3e83df0e59 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 30 Jun 2022 18:03:50 +0800 Subject: [PATCH 034/150] fix spell error --- det-yolov5-tmi/utils/ymir_yolov5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index ffcb39d..db6e183 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -70,7 +70,7 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path = cfg.param.pretrained_model_paths + model_params_path = cfg.param.get('pretrained_model_params',[]) else: model_params_path = cfg.param.model_params_path From 9b825acaa928a3929d47b457faecda0f8635a0b8 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 30 Jun 2022 18:05:36 +0800 Subject: [PATCH 035/150] fix spell error --- det-yolov5-tmi/training-template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index 72356b7..c6d0ee4 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -1,10 +1,10 @@ # training template for your executor app # after build image, it should at /img-man/training-template.yaml -# key: gpu_id, task_id, pretrained_model_paths, class_names should be preserved +# key: gpu_id, task_id, pretrained_model_params, class_names should be preserved # gpu_id: '0' # task_id: 'default-training-task' -# pretrained_model_paths: [] +# pretrained_model_params: [] # class_names: [] model: 'yolov5s' From 22f3e4587fe7e7f675ba061f6dfb8c754312bb97 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 30 Jun 2022 18:56:32 +0800 Subject: [PATCH 036/150] fix abs_path file not found error --- det-yolov5-tmi/utils/ymir_yolov5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index db6e183..492822f 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -219,7 +219,7 @@ def write_ymir_training_result(cfg: edict, # use `rw.write_training_result` to save training result if weight_file: rw.write_model_stage(stage_name=f"{model}_{epoch}", - files=[weight_file], + files=[osp.basename(weight_file)], mAP=float(map50)) else: # save other files with From 9ebc439c9531fb79cdab48561020ddb89fe9a0dd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 4 Jul 2022 11:42:35 +0800 Subject: [PATCH 037/150] infer && mining --- det-yolov5-tmi/start.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 7b687e9..12cdcc2 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -22,12 +22,10 @@ def start() -> int: if cfg.ymir.run_training: _run_training(cfg) - elif cfg.ymir.run_mining: + if cfg.ymir.run_mining: _run_mining(cfg) - elif cfg.ymir.run_infer: + if cfg.ymir.run_infer: _run_infer(cfg) - else: - logging.warning('no task running') return 0 From 94cefe7cf41736ce180ce7b04120b89fc421bf27 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 4 Jul 2022 16:51:41 +0800 Subject: [PATCH 038/150] training or mining && infer --- det-yolov5-tmi/start.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 12cdcc2..fba6632 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -22,10 +22,11 @@ def start() -> int: if cfg.ymir.run_training: _run_training(cfg) - if cfg.ymir.run_mining: - _run_mining(cfg) - if cfg.ymir.run_infer: - _run_infer(cfg) + else: + if cfg.ymir.run_mining: + _run_mining(cfg) + if cfg.ymir.run_infer: + _run_infer(cfg) return 0 From 035fb311aea1ae93570f0d20a8d9df643146f2e6 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 4 Jul 2022 17:21:34 +0800 Subject: [PATCH 039/150] write file from /in to /out --- det-yolov4-training/cfg/coco.data | 4 ++-- det-yolov4-training/convert_label_ark2txt.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/det-yolov4-training/cfg/coco.data b/det-yolov4-training/cfg/coco.data index 95f0887..78903a4 100755 --- a/det-yolov4-training/cfg/coco.data +++ b/det-yolov4-training/cfg/coco.data @@ -1,5 +1,5 @@ classes= 1 -train = /in/train-index-assets.tsv -valid = /in/val-index-assets.tsv +train = /out/train-index-assets.tsv +valid = /out/val-index-assets.tsv names = /out/coco.names backup = /out/models diff --git a/det-yolov4-training/convert_label_ark2txt.py b/det-yolov4-training/convert_label_ark2txt.py index 0304ba9..1043b53 100755 --- a/det-yolov4-training/convert_label_ark2txt.py +++ b/det-yolov4-training/convert_label_ark2txt.py @@ -81,7 +81,7 @@ def _create_image_index_file(src_index_path: str, dst_index_path: str) -> None: if __name__ == "__main__": - _create_image_index_file(src_index_path='/in/train-index.tsv', dst_index_path='/in/train-index-assets.tsv') - _create_image_index_file(src_index_path='/in/val-index.tsv', dst_index_path='/in/val-index-assets.tsv') - _convert_annotations(index_file_path='/in/train-index.tsv', dst_annotations_dir='/in/tmp_labels') - _convert_annotations(index_file_path='/in/val-index.tsv', dst_annotations_dir='/in/tmp_labels') + _create_image_index_file(src_index_path='/in/train-index.tsv', dst_index_path='/out/train-index-assets.tsv') + _create_image_index_file(src_index_path='/in/val-index.tsv', dst_index_path='/out/val-index-assets.tsv') + _convert_annotations(index_file_path='/in/train-index.tsv', dst_annotations_dir='/out/tmp_labels') + _convert_annotations(index_file_path='/in/val-index.tsv', dst_annotations_dir='/out/tmp_labels') From dba2d4c6b5680d2748cc7cfd566a55d1e8d1d2d9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 4 Jul 2022 19:21:07 +0800 Subject: [PATCH 040/150] update docker file and c --- det-yolov4-training/Dockerfile | 2 +- det-yolov4-training/src/utils.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/det-yolov4-training/Dockerfile b/det-yolov4-training/Dockerfile index 1d8ce14..6e6c4c9 100644 --- a/det-yolov4-training/Dockerfile +++ b/det-yolov4-training/Dockerfile @@ -1,7 +1,7 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple WORKDIR /darknet -RUN apt-get update +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update RUN apt install -y software-properties-common wget RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update diff --git a/det-yolov4-training/src/utils.c b/det-yolov4-training/src/utils.c index c7c90aa..10b12cf 100755 --- a/det-yolov4-training/src/utils.c +++ b/det-yolov4-training/src/utils.c @@ -271,7 +271,7 @@ void replace_image_to_label(const char* input_path, char* output_path) find_replace(output_path, "/images/train2014/", "/labels/train2014/", output_path); // COCO find_replace(output_path, "/images/val2014/", "/labels/val2014/", output_path); // COCO - find_replace(output_path, "/in/assets/", "/in/tmp_labels/", output_path); + find_replace(output_path, "/in/assets/", "/out/tmp_labels/", output_path); find_replace(output_path, "\\JPEGImages\\", "\\labels\\", output_path); // PascalVOC //find_replace(output_path, "/images/", "/labels/", output_path); // COCO From 7a5dd796b7597a9ee1be556a4455b79becbb9344 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 5 Jul 2022 19:31:22 +0800 Subject: [PATCH 041/150] update readme --- README.MD | 68 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/README.MD b/README.MD index 81e0077..ac81320 100644 --- a/README.MD +++ b/README.MD @@ -14,7 +14,7 @@ - yolov5-FAQ - - 权重下载出错:提前将权重下载好并复制到镜像中 + - 权重下载出错:提前将权重下载好并复制到镜像中或导入预训练模型 ## live-code-executor @@ -31,6 +31,12 @@ - [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) +## 如何导入预训练模型 + +- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) + + - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 + --- # FAQ @@ -39,29 +45,61 @@ - 采用国内源,如在docker file 中添加如下命令 - ``` - RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list + ``` + RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list - RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple - ``` + RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple + ``` - docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 - 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. - ``` - cd ymir-executor + ``` + cd ymir-executor - docker build -t ymir-executor/yolov5 . -f det-yolov5-tmi/cuda111.dockerfile - ``` + docker build -t ymir-executor/yolov5 . -f det-yolov5-tmi/cuda111.dockerfile + ``` - 镜像运行完`/in`与`/out`目录中的文件被清理 - - ymir系统为节省空间,会在任务结束后删除其中不必要的文件,如果不想删除,可以在部署ymir时,修改文件`ymir/command/mir/tools/command_run_in_out.py`,注释其中的`_cleanup(work_dir=work_dir)`。注意需要重新构建后端镜像 + - ymir系统为节省空间,会在任务`成功结束`后删除其中不必要的文件,如果不想删除,可以在部署ymir时,修改文件`ymir/command/mir/tools/command_run_in_out.py`,注释其中的`_cleanup(work_dir=work_dir)`。注意需要重新构建后端镜像 + + ``` + cd ymir + docker build -t industryessentials/ymir-backend --build-arg PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple --build-arg SERVER_MODE='dev' -f Dockerfile.backend . + + docker-compose down -v && docker-compose up -d + ``` + +- 训练镜像如何调试 + + - 先通过失败任务的tensorboard链接拿到任务id,如`t000000100000175245d1656933456` + + - 进入ymir部署目录 `ymir-workplace/sandbox/work_dir/TaskTypeTraining/t000000100000175245d1656933456/sub_task/t000000100000175245d1656933456`, `ls` 可以看到以下结果 + + ``` + # ls + in out task_config.yaml + ``` + + - 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 + + ``` + docker run -it --gpus all -v $PWD/in:/in -v $PWD/out:/out -v : bash + + # 以/home/ymir/ymir-workplace作为ymir部署目录为例 + docker run -it --gpus all -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash + ``` + + - 推理与挖掘镜像调试同理,注意对应目录均为`ymir-workplace/sandbox/work_dir/TaskTypeMining` + +- 模型精度/速度如何权衡与提升 + + - 模型精度与数据集大小、数据集质量、学习率、batch size、 迭代次数、模型结构、数据增强方式、损失函数等相关,在此不做展开,详情参考: + + - [Object Detection in 20 Years: A Survey](https://arxiv.org/abs/1905.05055) - ``` - cd ymir - docker build -t industryessentials/ymir-backend --build-arg PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple --build-arg SERVER_MODE='dev' -f Dockerfile.backend . + - [Paper with Code: Object Detection](https://paperswithcode.com/task/object-detection) - docker-compose down -v && docker-compose up -d - ``` + - [awesome object detection](https://github.com/amusi/awesome-object-detection) From 41fef8c971b8721e57f54be1c01419dcd4a2117e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 5 Jul 2022 19:36:08 +0800 Subject: [PATCH 042/150] Revert "fix yolov5 nptyping problem" This reverts commit bbf824b067f4c63e1f4ed8ad888d7f368c4dde9f. --- det-yolov5-tmi/requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/det-yolov5-tmi/requirements.txt b/det-yolov5-tmi/requirements.txt index 70af2f8..fa1d389 100755 --- a/det-yolov5-tmi/requirements.txt +++ b/det-yolov5-tmi/requirements.txt @@ -2,7 +2,7 @@ # Base ---------------------------------------- matplotlib>=3.2.2 -numpy>=1.20.0 +numpy>=1.18.5 opencv-python>=4.1.2 Pillow>=7.1.2 PyYAML>=5.3.1 @@ -39,5 +39,4 @@ thop # FLOPs computation # Ymir --------------------------------------- imagesize>=1.3.0 # fast obtain image size without load image nptyping>=2.1.1 # numpy type hint -typing-extensions>=4.2.0 -easydict>=1.9 +easydict>=1.9 \ No newline at end of file From 1a4af5e29b2cb341cdaabc5f26be10e9ecb8aa4c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 5 Jul 2022 19:42:39 +0800 Subject: [PATCH 043/150] revert and redo --- det-yolov5-tmi/requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/requirements.txt b/det-yolov5-tmi/requirements.txt index fa1d389..70af2f8 100755 --- a/det-yolov5-tmi/requirements.txt +++ b/det-yolov5-tmi/requirements.txt @@ -2,7 +2,7 @@ # Base ---------------------------------------- matplotlib>=3.2.2 -numpy>=1.18.5 +numpy>=1.20.0 opencv-python>=4.1.2 Pillow>=7.1.2 PyYAML>=5.3.1 @@ -39,4 +39,5 @@ thop # FLOPs computation # Ymir --------------------------------------- imagesize>=1.3.0 # fast obtain image size without load image nptyping>=2.1.1 # numpy type hint -easydict>=1.9 \ No newline at end of file +typing-extensions>=4.2.0 +easydict>=1.9 From 3b3e97886dd0993511fc0e46e7ef6d9ac0025ace Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 6 Jul 2022 11:48:52 +0800 Subject: [PATCH 044/150] update readme --- README.MD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.MD b/README.MD index ac81320..bcba683 100644 --- a/README.MD +++ b/README.MD @@ -14,7 +14,7 @@ - yolov5-FAQ - - 权重下载出错:提前将权重下载好并复制到镜像中或导入预训练模型 + - 镜像训练时权重下载出错或慢:提前将权重下载好并复制到镜像`/app`目录下或通过ymir导入预训练模型,在训练时进行加载。 ## live-code-executor From 7b201d13ee4a2b66d1d2e2a36ca9cbface9ff305 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 6 Jul 2022 15:15:14 +0800 Subject: [PATCH 045/150] add cuda112 dockerfile for yolov4 --- det-yolov4-mining/cuda112.dockerfile | 15 +++++++++++++++ det-yolov4-training/Dockerfile | 2 +- det-yolov4-training/cuda112.dockerfile | 23 +++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 det-yolov4-mining/cuda112.dockerfile create mode 100644 det-yolov4-training/cuda112.dockerfile diff --git a/det-yolov4-mining/cuda112.dockerfile b/det-yolov4-mining/cuda112.dockerfile new file mode 100644 index 0000000..871b00f --- /dev/null +++ b/det-yolov4-mining/cuda112.dockerfile @@ -0,0 +1,15 @@ +FROM industryessentials/ymir-executor:cuda112-yolov4-training + +RUN apt-get update && apt-get install -y --no-install-recommends libsm6 libxext6 libfontconfig1 libxrender1 libgl1-mesa-glx \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN pip3 install --upgrade pip setuptools wheel && pip3 install opencv-python pyyaml scipy tqdm && rm -rf /root/.cache/pip3 + +COPY . /app +WORKDIR /app +RUN cp ./start.sh /usr/bin/start.sh && \ + mkdir -p /img-man && \ + cp ./mining-template.yaml /img-man/mining-template.yaml && \ + cp ./infer-template.yaml /img-man/infer-template.yaml && \ + cp ./README.md /img-man/readme.md +CMD sh /usr/bin/start.sh diff --git a/det-yolov4-training/Dockerfile b/det-yolov4-training/Dockerfile index 6e6c4c9..61ce1f6 100644 --- a/det-yolov4-training/Dockerfile +++ b/det-yolov4-training/Dockerfile @@ -1,6 +1,7 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple WORKDIR /darknet +RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update RUN apt install -y software-properties-common wget RUN add-apt-repository ppa:deadsnakes/ppa @@ -12,7 +13,6 @@ RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six -RUN echo '\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse\n' >> /etc/apt/sources.list ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev COPY . /darknet diff --git a/det-yolov4-training/cuda112.dockerfile b/det-yolov4-training/cuda112.dockerfile new file mode 100644 index 0000000..3e6884b --- /dev/null +++ b/det-yolov4-training/cuda112.dockerfile @@ -0,0 +1,23 @@ +FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 +ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple +WORKDIR /darknet +RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update +RUN apt install -y software-properties-common wget +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update +RUN apt install -y python3.7 python3-distutils +RUN wget https://bootstrap.pypa.io/get-pip.py +RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137 +RUN rm /usr/bin/python3 +RUN ln -s /usr/bin/python3.7 /usr/bin/python3 +RUN python3 get-pip.py +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update && apt-get install -y libopencv-dev +COPY . /darknet +RUN cp /darknet/make_train_test_darknet.sh /usr/bin/start.sh +RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml +RUN make -j +CMD bash /usr/bin/start.sh From 4c99d813e360cec9d16cc8a4448ae2b8e9c305a9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 6 Jul 2022 16:45:29 +0800 Subject: [PATCH 046/150] update mmdet for ymir1.2.0 --- det-mmdetection-tmi/docker/Dockerfile | 25 -------- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 46 ++++++++++++++ det-mmdetection-tmi/docker/Dockerfile.cuda111 | 4 +- .../mmdet/core/evaluation/eval_hooks.py | 56 ++++++++++++----- det-mmdetection-tmi/mmdet/datasets/coco.py | 17 +++-- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 63 ++++++++++++++----- det-mmdetection-tmi/start.py | 31 ++++----- det-mmdetection-tmi/training-template.yaml | 1 + det-mmdetection-tmi/ymir_train.py | 11 ++-- 9 files changed, 162 insertions(+), 92 deletions(-) delete mode 100644 det-mmdetection-tmi/docker/Dockerfile create mode 100644 det-mmdetection-tmi/docker/Dockerfile.cuda102 diff --git a/det-mmdetection-tmi/docker/Dockerfile b/det-mmdetection-tmi/docker/Dockerfile deleted file mode 100644 index 5ee7a37..0000000 --- a/det-mmdetection-tmi/docker/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -ARG PYTORCH="1.6.0" -ARG CUDA="10.1" -ARG CUDNN="7" - -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel - -ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" -ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" -ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" - -RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install MMCV -RUN pip install --no-cache-dir --upgrade pip wheel setuptools -RUN pip install --no-cache-dir mmcv-full==1.3.17 -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html - -# Install MMDetection -RUN conda clean --all -RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection -WORKDIR /mmdetection -ENV FORCE_CUDA="1" -RUN pip install --no-cache-dir -r requirements/build.txt -RUN pip install --no-cache-dir -e . diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 new file mode 100644 index 0000000..6110bf6 --- /dev/null +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -0,0 +1,46 @@ +ARG PYTORCH="1.8.1" +ARG CUDA="10.2" +ARG CUDNN="7" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +# mmcv>=1.3.17, <=1.5.0 +ARG MMCV="1.4.3" +ARG SERVER_MODE=prod + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 + +# Set timezone +RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo 'Asia/Shanghai' >/etc/timezone + +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ + && apt-get update \ + && apt-get install -y build-essential ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) +RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ + if [ "${SERVER_MODE}" = "dev" ]; then \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + else \ + pip install ymir-exc; \ + fi \ + && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html \ + && conda clean --all + +# Install det-mmdetection-tmi +COPY . /app/ +WORKDIR /app +ENV FORCE_CUDA="1" +RUN pip install --no-cache-dir -r requirements/runtime.txt \ + && pip install --no-cache-dir -e . \ + && mkdir /img-man \ + && mv *-template.yaml /img-man \ + && echo "cd /app && python3 start.py" > /usr/bin/start.sh + +CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index 42a9004..f969e4f 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -32,13 +32,13 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ && conda clean --all # Install det-mmdetection-tmi -ADD det-mmdetection-tmi /app +COPY . /app/ WORKDIR /app ENV FORCE_CUDA="1" RUN pip install --no-cache-dir -r requirements/runtime.txt \ && pip install --no-cache-dir -e . \ && mkdir /img-man \ && mv *-template.yaml /img-man \ - && echo "python3 start.py" > /usr/bin/start.sh + && echo "cd /app && python3 start.py" > /usr/bin/start.sh CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index 89bc7bb..6e63d43 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -49,15 +49,27 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" - percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) - monitor.write_monitor_logger(percent=percent) + if self.by_epoch: + monitor_interval = max(1, runner.max_epochs//1000) + if runner.epoch % monitor_interval == 0: + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) + monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) def before_train_iter(self, runner): self._decide_interval(runner) super().before_train_iter(runner) + def after_train_iter(self, runner): + if not self.by_epoch: + monitor_interval = max(1, runner.max_iters//1000) + if runner.iter % monitor_interval == 0: + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.iter/runner.max_iters) + monitor.write_monitor_logger(percent=percent) + super().after_train_iter(runner) + def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" if not self._should_evaluate(runner): @@ -67,14 +79,15 @@ def _do_evaluate(self, runner): results = single_gpu_test(runner.model, self.dataloader, show=False) runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) + update_training_result_file(last=False, key_score=key_score) # the key_score may be `None` so it needs to skip the action to save # the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) - best_score = runner.meta['hook_msgs'].get( - 'best_score', self.init_value_map[self.rule]) - if self.compare_func(key_score, best_score): - update_training_result_file(key_score) + # best_score = runner.meta['hook_msgs'].get( + # 'best_score', self.init_value_map[self.rule]) + # if self.compare_func(key_score, best_score): + # update_training_result_file(key_score) # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, @@ -104,15 +117,27 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" - percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) - monitor.write_monitor_logger(percent=percent) + if self.by_epoch and runner.rank == 0: + monitor_interval = max(1, runner.max_epochs//1000) + if runner.epoch % monitor_interval == 0: + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) + monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) def before_train_iter(self, runner): self._decide_interval(runner) super().before_train_iter(runner) + def after_train_iter(self, runner): + if not self.by_epoch and runner.rank == 0: + monitor_interval = max(1, runner.max_iters//1000) + if runner.iter % monitor_interval == 0: + percent = get_ymir_process( + stage=YmirStage.TASK, p=runner.iter/runner.max_iters) + monitor.write_monitor_logger(percent=percent) + super().after_train_iter(runner) + def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" # Synchronization of BatchNorm's buffer (running_mean @@ -145,13 +170,14 @@ def _do_evaluate(self, runner): print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) - + update_training_result_file(last=False, key_score=key_score) # the key_score may be `None` so it needs to skip # the action to save the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) - best_score = runner.meta['hook_msgs'].get( - 'best_score', self.init_value_map[self.rule]) - if self.compare_func(key_score, best_score): - update_training_result_file(key_score) + # best_score = runner.meta['hook_msgs'].get( + # 'best_score', self.init_value_map[self.rule]) + # if self.compare_func(key_score, best_score): + # update_training_result_file(key_score) + diff --git a/det-mmdetection-tmi/mmdet/datasets/coco.py b/det-mmdetection-tmi/mmdet/datasets/coco.py index cde2de7..ffe83d4 100644 --- a/det-mmdetection-tmi/mmdet/datasets/coco.py +++ b/det-mmdetection-tmi/mmdet/datasets/coco.py @@ -562,15 +562,6 @@ def evaluate(self, results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) - - COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') - if COCO_EVAL_TMP_FILE is not None: - mmcv.dump({name:value for name,value in results_per_category}, COCO_EVAL_TMP_FILE, file_format='json') - else: - raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') - - print_log(f'\n write eval result to {COCO_EVAL_TMP_FILE}', logger=logger) - num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) @@ -601,4 +592,12 @@ def evaluate(self, f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() + + COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') + if COCO_EVAL_TMP_FILE is not None: + mmcv.dump(eval_results, COCO_EVAL_TMP_FILE, file_format='json') + else: + raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + + print_log(f'\n write eval result to {COCO_EVAL_TMP_FILE}', logger=logger) return eval_results diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 96aa821..bf2d31c 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -3,6 +3,7 @@ """ import glob import logging +import yaml import os import os.path as osp from enum import IntEnum @@ -85,11 +86,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: ann_prefix=ymir_cfg.ymir.input.annotations_dir, classes=ymir_cfg.param.class_names, data_root=ymir_cfg.ymir.input.root_dir, - filter_empty_gt=False, - samples_per_gpu=samples_per_gpu if split == 'train' else max( - 1, samples_per_gpu//2), - workers_per_gpu=workers_per_gpu if split == 'train' else max( - 1, workers_per_gpu//2) + filter_empty_gt=False ) # modify dataset config for `split` mmdet_dataset_cfg = mmdet_cfg.data.get(split, None) @@ -101,7 +98,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: x.update(ymir_dataset_cfg) else: src_dataset_type = mmdet_dataset_cfg.type - if src_dataset_type in ['CocoDataset']: + if src_dataset_type in ['CocoDataset', 'YmirDataset']: mmdet_dataset_cfg.update(ymir_dataset_cfg) elif src_dataset_type in ['MultiImageMixDataset', 'RepeatDataset']: mmdet_dataset_cfg.dataset.update(ymir_dataset_cfg) @@ -119,13 +116,17 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir=ymir_cfg.ymir.output.tensorboard_dir) - mmdet_cfg.log_config['hooks'].append(tensorboard_logger) + if len(mmdet_cfg.log_config['hooks']) <= 1: + mmdet_cfg.log_config['hooks'].append(tensorboard_logger) + else: + mmdet_cfg.log_config['hooks'][1].update(tensorboard_logger) # modify evaluation and interval - interval = max(1, mmdet_cfg.runner.max_epoch//30) + interval = max(1, mmdet_cfg.runner.max_epochs//30) mmdet_cfg.evaluation.interval = interval + mmdet_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') # Whether to evaluating the AP for each class - mmdet_cfg.evaluation.classwise = True + # mmdet_cfg.evaluation.classwise = True return mmdet_cfg @@ -150,21 +151,23 @@ def get_weight_file(cfg: edict) -> str: return max(best_pth_files, key=os.path.getctime) epoch_pth_files = [ - f for f in model_params_path if osp.basename(f).startswith('epoch_')] + f for f in model_params_path if osp.basename(f).startswith(('epoch_', 'iter_'))] if len(epoch_pth_files) > 0: return max(epoch_pth_files, key=os.path.getctime) return "" -def update_training_result_file(key_score): - logging.info(f'key_score is {key_score}') +def update_training_result_file(last=False, key_score=None): + if key_score: + logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') if COCO_EVAL_TMP_FILE is None: raise Exception( 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') - results_per_category = mmcv.load(COCO_EVAL_TMP_FILE) + eval_result = mmcv.load(COCO_EVAL_TMP_FILE) + map = eval_result['bbox_mAP_50'] work_dir = os.getenv('YMIR_MODELS_DIR') if work_dir is None or not osp.isdir(work_dir): @@ -172,7 +175,33 @@ def update_training_result_file(key_score): f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {work_dir}') # assert only one model config file in work_dir - result_files = glob.glob(osp.join(work_dir, '*')) - rw.write_training_result(model_names=[osp.basename(f) for f in result_files], - mAP=key_score, - classAPs=results_per_category) + result_files = [osp.basename(f) for f in glob.glob( + osp.join(work_dir, '*')) if osp.basename(f) != 'result.yaml'] + + if last: + # save all output file + rw.write_model_stage(files=result_files, + mAP=float(map), + stage_name='last') + else: + # save newest weight file in format epoch_xxx.pth or iter_xxx.pth + weight_files = [osp.join(work_dir, f) for f in result_files if f.startswith( + ('iter_', 'epoch_')) and f.endswith('.pth')] + + if len(weight_files) > 0: + newest_weight_file = osp.basename( + max(weight_files, key=os.path.getctime)) + + stage_name = osp.splitext(newest_weight_file)[0] + training_result_file = osp.join(work_dir, 'result.yaml') + if osp.exists(training_result_file): + with open(training_result_file, 'r') as f: + training_result = yaml.safe_load(f) + model_stages = training_result.get('model_stages', {}) + else: + model_stages = {} + + if stage_name not in model_stages: + rw.write_model_stage(files=[newest_weight_file], + mAP=float(map), + stage_name=stage_name) diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 44babba..9fd6a7c 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -16,17 +16,16 @@ from ymir_infer import YmirModel, mmdet_result_to_ymir -def start() -> int: - cfg = get_merged_config() - +def start(cfg: edict) -> int: logging.info(f'merged config: {cfg}') if cfg.ymir.run_training: _run_training(cfg) - elif cfg.ymir.run_mining: - _run_mining(cfg) - elif cfg.ymir.run_infer: - _run_infer(cfg) + elif cfg.ymir.run_mining or cfg.ymir.run_infer: + if cfg.ymir.run_mining: + _run_mining(cfg) + if cfg.ymir.run_infer: + _run_infer(cfg) else: logging.warning('no task running') @@ -44,18 +43,6 @@ def _run_training(cfg: edict) -> None: logging.info(f'start training: {command}') subprocess.run(command.split(), check=True) - work_dir = cfg.ymir.output.models_dir - result_files = glob.glob(os.path.join(work_dir, '*')) - - training_result_file = cfg.ymir.output.training_result_file - with open(training_result_file, 'r') as fp: - best_result = yaml.safe_load(fp) - - # save the last checkpoint - rw.write_training_result(model_names=[os.path.basename(f) for f in result_files], - mAP=best_result['map'], - classAPs=best_result['class_aps']) - # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) @@ -95,5 +82,9 @@ def _run_infer(cfg: edict) -> None: datefmt='%Y%m%d-%H:%M:%S', level=logging.INFO) + cfg = get_merged_config() + os.environ.setdefault('YMIR_MODELS_DIR', cfg.ymir.output.models_dir) + os.environ.setdefault('COCO_EVAL_TMP_FILE', os.path.join( + cfg.ymir.output.root_dir, 'eval_tmp.json')) os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') - sys.exit(start()) + sys.exit(start(cfg)) diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 8be36b9..9b97e21 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -4,4 +4,5 @@ max_epochs: 300 config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' args_options: '' cfg_options: '' +metric: 'bbox_mAP_50' port: 12345 diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index a39e64b..e3f3537 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -8,7 +8,7 @@ from ymir_exc import monitor from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_ymir_process) + get_ymir_process, update_training_result_file) def main(cfg: edict) -> int: @@ -35,15 +35,15 @@ def main(cfg: edict) -> int: if num_gpus == 0: # view https://mmdetection.readthedocs.io/en/stable/1_exist_data_model.html#training-on-cpu os.environ.setdefault('CUDA_VISIBLE_DEVICES', "-1") - cmd = f"python tools/train.py {config_file} " + \ + cmd = f"python3 tools/train.py {config_file} " + \ f"--work-dir {work_dir}" elif num_gpus == 1: - cmd = f"python tools/train.py {config_file} " + \ + cmd = f"python3 tools/train.py {config_file} " + \ f"--work-dir {work_dir} --gpu-id {gpu_id}" else: os.environ.setdefault('CUDA_VISIBLE_DEVICES', gpu_id) port = cfg.param.get('port') - os.environ.setdefault('PORT', port) + os.environ.setdefault('PORT', str(port)) cmd = f"./tools/dist_train.sh {config_file} {num_gpus} " + \ f"--work-dir {work_dir}" @@ -55,6 +55,9 @@ def main(cfg: edict) -> int: logging.info(f"training command: {cmd}") subprocess.run(cmd.split(), check=True) + + # save the last checkpoint + update_training_result_file(last=True) logging.info(f"training finished") return 0 From 2cbb95e00a8a5480b9d8ca088be7c00db53cc484 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 10:36:41 +0800 Subject: [PATCH 047/150] update training --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 5 +++-- det-mmdetection-tmi/training-template.yaml | 2 +- det-mmdetection-tmi/ymir_infer.py | 20 +++++++++++++++++++- det-mmdetection-tmi/ymir_train.py | 2 +- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index bf2d31c..d3c6e97 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -136,7 +136,7 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path: List = cfg.param.pretrained_model_paths + model_params_path: List = cfg.param.pretrained_model_params else: model_params_path: List = cfg.param.model_params_path @@ -167,7 +167,8 @@ def update_training_result_file(last=False, key_score=None): 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') eval_result = mmcv.load(COCO_EVAL_TMP_FILE) - map = eval_result['bbox_mAP_50'] + # eval_result may be empty dict {}. + map = eval_result.get('bbox_mAP_50',0) work_dir = os.getenv('YMIR_MODELS_DIR') if work_dir is None or not osp.isdir(work_dir): diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 9b97e21..a56133d 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -4,5 +4,5 @@ max_epochs: 300 config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' args_options: '' cfg_options: '' -metric: 'bbox_mAP_50' +metric: 'bbox' port: 12345 diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 47e47ed..7fc8892 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -1,4 +1,5 @@ import argparse +import os.path as osp from typing import Any, List import numpy as np @@ -49,13 +50,27 @@ def mmdet_result_to_ymir(results: List[DETECTION_RESULT], ann_list.append(ann) return ann_list +def get_config_file(cfg): + if cfg.ymir.run_training: + model_params_path: List = cfg.param.pretrained_model_params + else: + model_params_path: List = cfg.param.model_params_path + + model_dir = cfg.ymir.input.models_dir + config_files = [ + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.py'))] + + if len(config_files) > 0: + return config_files[0] + else: + return None class YmirModel: def __init__(self, cfg: edict): self.cfg = cfg # Specify the path to model config and checkpoint file - config_file = cfg.param.config_file + config_file = get_config_file(cfg) checkpoint_file = get_weight_file(cfg) cfg_options = parse_option( cfg.param.cfg_options) if cfg.param.cfg_options else None @@ -69,3 +84,6 @@ def __init__(self, cfg: edict): def infer(self, img): return inference_detector(self.model, img) + + def mining(self): + pass diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index e3f3537..3a3b3de 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -44,7 +44,7 @@ def main(cfg: edict) -> int: os.environ.setdefault('CUDA_VISIBLE_DEVICES', gpu_id) port = cfg.param.get('port') os.environ.setdefault('PORT', str(port)) - cmd = f"./tools/dist_train.sh {config_file} {num_gpus} " + \ + cmd = f"bash ./tools/dist_train.sh {config_file} {num_gpus} " + \ f"--work-dir {work_dir}" if args_options: From 9bdc59f41538c7a7fa3790961ecd0a64599514eb Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 11:34:40 +0800 Subject: [PATCH 048/150] fix py3.7 and ymir_exc problem --- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 8 ++++---- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 6110bf6..653b03a 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -12,7 +12,8 @@ ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV LANG=C.UTF-8 - +ENV FORCE_CUDA="1" +ENV PYTHONPATH=. # Set timezone RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone @@ -24,9 +25,9 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ && rm -rf /var/lib/apt/lists/* # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) -RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ +RUN pip install --no-cache-dir --upgrade pip wheel setuptools pydantic tensorboardX pyyaml && \ if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install --ignore-requires-python --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi \ @@ -36,7 +37,6 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ # Install det-mmdetection-tmi COPY . /app/ WORKDIR /app -ENV FORCE_CUDA="1" RUN pip install --no-cache-dir -r requirements/runtime.txt \ && pip install --no-cache-dir -e . \ && mkdir /img-man \ diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index f969e4f..d0c24c6 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -11,7 +11,8 @@ ARG SERVER_MODE=prod ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" - +ENV FORCE_CUDA="1" +ENV PYTHONPATH=. # Set timezone RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone @@ -34,7 +35,6 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ # Install det-mmdetection-tmi COPY . /app/ WORKDIR /app -ENV FORCE_CUDA="1" RUN pip install --no-cache-dir -r requirements/runtime.txt \ && pip install --no-cache-dir -e . \ && mkdir /img-man \ From 5d14372d3697609928df560715472499edc26090 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 15:38:52 +0800 Subject: [PATCH 049/150] add mining and infer --- det-mmdetection-tmi/start.py | 60 ++--- det-mmdetection-tmi/ymir_infer.py | 53 ++++- det-mmdetection-tmi/ymir_log.py | 53 ----- det-mmdetection-tmi/ymir_mining.py | 357 +++++++++++++++++++++++++++++ det-mmdetection-tmi/ymir_train.py | 1 - 5 files changed, 420 insertions(+), 104 deletions(-) delete mode 100644 det-mmdetection-tmi/ymir_log.py create mode 100644 det-mmdetection-tmi/ymir_mining.py diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 9fd6a7c..686d451 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -1,80 +1,54 @@ -import glob import logging import os import subprocess import sys -import cv2 -import yaml from easydict import EasyDict as edict -from ymir_exc import dataset_reader as dr -from ymir_exc import env, monitor -from ymir_exc import result_writer as rw -from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_ymir_process) -from ymir_infer import YmirModel, mmdet_result_to_ymir +from mmdet.utils.util_ymir import get_merged_config +from ymir_exc import monitor def start(cfg: edict) -> int: logging.info(f'merged config: {cfg}') if cfg.ymir.run_training: - _run_training(cfg) + _run_training() elif cfg.ymir.run_mining or cfg.ymir.run_infer: if cfg.ymir.run_mining: - _run_mining(cfg) + _run_mining() if cfg.ymir.run_infer: - _run_infer(cfg) + _run_infer() else: logging.warning('no task running') return 0 -def _run_training(cfg: edict) -> None: - """ - function for training task - 1. convert dataset - 2. training model - 3. save model weight/hyperparameter/... to design directory - """ +def _run_training() -> None: command = 'python3 ymir_train.py' logging.info(f'start training: {command}') subprocess.run(command.split(), check=True) # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) + logging.info(f"training finished") - -def _run_mining(cfg: edict()) -> None: - command = 'python3 mining/mining_cald.py' - logging.info(f'mining: {command}') +def _run_mining() -> None: + command = 'python3 ymir_mining.py' + logging.info(f'start mining: {command}') subprocess.run(command.split(), check=True) - monitor.write_monitor_logger(percent=1.0) - - -def _run_infer(cfg: edict) -> None: - N = dr.items_count(env.DatasetType.CANDIDATE) - infer_result = dict() - model = YmirModel(cfg) - idx = -1 - # write infer result - monitor_gap = max(1, N // 100) - for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): - img = cv2.imread(asset_path) - result = model.infer(img) - infer_result[asset_path] = mmdet_result_to_ymir(result, cfg.param.class_names) - idx += 1 + monitor.write_monitor_logger(percent=1.0) + logging.info(f"mining finished") - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) - monitor.write_monitor_logger(percent=percent) +def _run_infer() -> None: + command = 'python3 ymir_infer.py' + logging.info(f'start infer: {command}') + subprocess.run(command.split(), check=True) - rw.write_infer_result(infer_result=infer_result) monitor.write_monitor_logger(percent=1.0) - + logging.info(f"infer finished") if __name__ == '__main__': logging.basicConfig(stream=sys.stdout, diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 7fc8892..84df374 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -1,15 +1,22 @@ import argparse import os.path as osp +import sys +import warnings from typing import Any, List +import cv2 import numpy as np from easydict import EasyDict as edict from mmcv import DictAction from nptyping import NDArray, Shape -from ymir_exc import result_writer as rw +from tqdm import tqdm from mmdet.apis import inference_detector, init_detector -from mmdet.utils.util_ymir import get_weight_file +from mmdet.utils.util_ymir import (YmirStage, get_merged_config, + get_weight_file, get_ymir_process) +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw DETECTION_RESULT = NDArray[Shape['*,5'], Any] @@ -50,6 +57,7 @@ def mmdet_result_to_ymir(results: List[DETECTION_RESULT], ann_list.append(ann) return ann_list + def get_config_file(cfg): if cfg.ymir.run_training: model_params_path: List = cfg.param.pretrained_model_params @@ -61,9 +69,13 @@ def get_config_file(cfg): osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.py'))] if len(config_files) > 0: + if len(config_files) > 1: + warnings.warn(f'multiple config file found! use {config_files[0]}') return config_files[0] else: - return None + raise Exception( + f'no config_file found in {model_dir} and {model_params_path}') + class YmirModel: def __init__(self, cfg: edict): @@ -72,8 +84,8 @@ def __init__(self, cfg: edict): # Specify the path to model config and checkpoint file config_file = get_config_file(cfg) checkpoint_file = get_weight_file(cfg) - cfg_options = parse_option( - cfg.param.cfg_options) if cfg.param.cfg_options else None + options = cfg.param.get('cfg_options', None) + cfg_options = parse_option(options) if options else None # current infer can only use one gpu!!! gpu_ids = cfg.param.gpu_id @@ -85,5 +97,32 @@ def __init__(self, cfg: edict): def infer(self, img): return inference_detector(self.model, img) - def mining(self): - pass + +def main(): + cfg = get_merged_config() + + N = dr.items_count(env.DatasetType.CANDIDATE) + infer_result = dict() + model = YmirModel(cfg) + idx = -1 + + # write infer result + monitor_gap = max(1, N // 100) + for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): + img = cv2.imread(asset_path) + result = model.infer(img) + infer_result[asset_path] = mmdet_result_to_ymir( + result, cfg.param.class_names) + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + monitor.write_monitor_logger(percent=percent) + + rw.write_infer_result(infer_result=infer_result) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-mmdetection-tmi/ymir_log.py b/det-mmdetection-tmi/ymir_log.py deleted file mode 100644 index 29f2ec8..0000000 --- a/det-mmdetection-tmi/ymir_log.py +++ /dev/null @@ -1,53 +0,0 @@ -import time -import os.path as osp -from typing import Generator -from pygtail import Pygtail -from mmcv.util import TORCH_VERSION, digit_version - -if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.1')): - try: - from tensorboardX import SummaryWriter - except ImportError: - raise ImportError('Please install tensorboardX to use ' - 'TensorboardLoggerHook.') -else: - try: - from torch.utils.tensorboard import SummaryWriter - except ImportError: - raise ImportError( - 'Please run "pip install future tensorboard" to install ' - 'the dependencies to use torch.utils.tensorboard ' - '(applicable to PyTorch 1.1 or higher)') - - -def read_log(f: str, wait: bool = True, sleep: float = 0.1) -> Generator[str]: - """ - Basically tail -f with a configurable sleep - """ - with open(f) as logfile: - # logfile.seek(0, os.SEEK_END) - while True: - new_line = logfile.readline() - if new_line: - yield new_line - else: - if wait: - # wait for new line - time.sleep(sleep) - else: - # read all line in file - break - -def write_tensorboard_text(tb_log_file: str, executor_log_file: str) -> None: - global _TENSORBOARD_GLOBAL_STEP - # tb_log_file = osp.join(cfg.ymir.output.tensorboard_dir, 'tensorboard_text.log') - # executor_log_file = cfg.ymir.output.executor_log_file - writer = SummaryWriter(tb_log_file) - - # Pygtail always return the new lines - for line in Pygtail(executor_log_file): - writer.add_text(tag='ymir-executor', text_string=line, global_step=_TENSORBOARD_GLOBAL_STEP) - _TENSORBOARD_GLOBAL_STEP += 1 - - writer.close() \ No newline at end of file diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py new file mode 100644 index 0000000..16379df --- /dev/null +++ b/det-mmdetection-tmi/ymir_mining.py @@ -0,0 +1,357 @@ +""" +data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout +official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py +""" +import random +import sys +from typing import Any, Dict, List, Tuple + +import cv2 +import numpy as np +from nptyping import NDArray +from scipy.stats import entropy +from tqdm import tqdm + +from mmdet.utils.util_ymir import (BBOX, CV_IMAGE, YmirStage, + get_merged_config, get_ymir_process) +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw +from ymir_infer import YmirModel + + +def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: + ''' + Find intersection of every box combination between two sets of box + boxes1: bounding boxes 1, a tensor of dimensions (n1, 4) + boxes2: bounding boxes 2, a tensor of dimensions (n2, 4) + + Out: Intersection each of boxes1 with respect to each of boxes2, + a tensor of dimensions (n1, n2) + ''' + n1 = boxes1.shape[0] + n2 = boxes2.shape[0] + max_xy = np.minimum(np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + + min_xy = np.maximum(np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) + return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) + + +def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ + -> Tuple[CV_IMAGE, BBOX]: + """ + image: opencv image, [height,width,channels] + bbox: numpy.ndarray, [N,4] --> [x1,y1,x2,y2] + """ + image = image.copy() + + width = image.shape[1] + # Flip image horizontally + image = image[:, ::-1, :] + if len(bbox) > 0: + bbox = bbox.copy() + # Flip bbox horizontally + bbox[:, [0, 2]] = width - bbox[:, [2, 0]] + return image, bbox + + +def cutout(image: CV_IMAGE, bbox: BBOX, cut_num: int = 2, fill_val: int = 0, + bbox_remove_thres: float = 0.4, bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: + ''' + Cutout augmentation + image: A PIL image + boxes: bounding boxes, a tensor of dimensions (#objects, 4) + labels: labels of object, a tensor of dimensions (#objects) + fill_val: Value filled in cut out + bbox_remove_thres: Theshold to remove bbox cut by cutout + + Out: new image, new_boxes, new_labels + ''' + image = image.copy() + bbox = bbox.copy() + + if len(bbox) == 0: + return image, bbox + + original_h, original_w, original_channel = image.shape + count = 0 + for _ in range(50): + # Random cutout size: [0.15, 0.5] of original dimension + cutout_size_h = random.uniform(0.05 * original_h, 0.2 * original_h) + cutout_size_w = random.uniform(0.05 * original_w, 0.2 * original_w) + + # Random position for cutout + left = random.uniform(0, original_w - cutout_size_w) + right = left + cutout_size_w + top = random.uniform(0, original_h - cutout_size_h) + bottom = top + cutout_size_h + cutout = np.array( + [[float(left), float(top), float(right), float(bottom)]]) + + # Calculate intersect between cutout and bounding boxes + overlap_size = intersect(cutout, bbox) + area_boxes = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) + ratio = overlap_size / (area_boxes + 1e-14) + # If all boxes have Iou greater than bbox_remove_thres, try again + if ratio.max() > bbox_remove_thres or ratio.max() < bbox_min_thres: + continue + + image[int(top):int(bottom), int(left):int(right), :] = fill_val + count += 1 + if count >= cut_num: + break + return image, bbox + + +def rotate(image: CV_IMAGE, bbox: BBOX, rot: float = 5) -> Tuple[CV_IMAGE, BBOX]: + image = image.copy() + bbox = bbox.copy() + h, w, c = image.shape + center = np.array([w / 2.0, h / 2.0]) + s = max(h, w) * 1.0 + trans = get_affine_transform(center, s, rot, [w, h]) + if len(bbox) > 0: + for i in range(bbox.shape[0]): + x1, y1 = affine_transform(bbox[i, :2], trans) + x2, y2 = affine_transform(bbox[i, 2:], trans) + x3, y3 = affine_transform(bbox[i, [2, 1]], trans) + x4, y4 = affine_transform(bbox[i, [0, 3]], trans) + bbox[i, :2] = [min(x1, x2, x3, x4), min(y1, y2, y3, y4)] + bbox[i, 2:] = [max(x1, x2, x3, x4), max(y1, y2, y3, y4)] + image = cv2.warpAffine(image, trans, (w, h), flags=cv2.INTER_LINEAR) + return image, bbox + + +def get_3rd_point(a: NDArray, b: NDArray) -> NDArray: + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_dir(src_point: NDArray, rot_rad: float) -> List: + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result + + +def transform_preds(coords: NDArray, center: NDArray, scale: Any, rot: float, output_size: List) -> NDArray: + trans = get_affine_transform(center, scale, rot, output_size, inv=True) + target_coords = affine_transform(coords, trans) + return target_coords + + +def get_affine_transform(center: NDArray, + scale: Any, + rot: float, + output_size: List, + shift: NDArray = np.array([0, 0], dtype=np.float32), + inv: bool = False) -> NDArray: + if not isinstance(scale, np.ndarray) and not isinstance(scale, list): + scale = np.array([scale, scale], dtype=np.float32) + + scale_tmp = scale + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = get_dir([0, src_w * -0.5], rot_rad) + dst_dir = np.array([0, dst_w * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def affine_transform(pt: NDArray, t: NDArray) -> NDArray: + new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def resize(img: CV_IMAGE, boxes: BBOX, ratio: float = 0.8) -> Tuple[CV_IMAGE, BBOX]: + """ + ratio: <= 1.0 + """ + assert ratio <= 1.0, f'resize ratio {ratio} must <= 1.0' + + h, w, _ = img.shape + ow = int(w * ratio) + oh = int(h * ratio) + resize_img = cv2.resize(img, (ow, oh)) + new_img = np.zeros_like(img) + new_img[:oh, :ow] = resize_img + + if len(boxes) == 0: + return new_img, boxes + else: + return new_img, boxes * ratio + + +def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: + """ + args: + boxes1: np.array, (N, 4), xyxy + boxes2: np.array, (M, 4), xyxy + return: + iou: np.array, (N, M) + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + iner_area = intersect(boxes1, boxes2) + area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) + area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) + iou = iner_area / (area1 + area2 - iner_area + 1e-14) + return iou + + +def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: + if len(result) > 0: + bboxes = result[:, :4].astype(np.int32) + conf = result[:, 4] + class_id = result[:, 5] + else: + bboxes = np.zeros(shape=(0, 4), dtype=np.int32) + conf = np.zeros(shape=(0, 1), dtype=np.float32) + class_id = np.zeros(shape=(0, 1), dtype=np.int32) + + return bboxes, conf, class_id + + +class YmirMining(YmirModel): + def mining(self): + N = dr.items_count(env.DatasetType.CANDIDATE) + monitor_gap = max(1, N // 100) + idx = -1 + beta = 1.3 + mining_result = [] + for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): + img = cv2.imread(asset_path) + # xyxy,conf,cls + result = self.predict(img) + bboxes, conf, _ = split_result(result) + if len(result) == 0: + # no result for the image without augmentation + mining_result.append((asset_path, -beta)) + continue + + consistency = 0.0 + aug_bboxes_dict, aug_results_dict = self.aug_predict(img, bboxes) + for key in aug_results_dict: + # no result for the image with augmentation f'{key}' + if len(aug_results_dict[key]) == 0: + consistency += beta + continue + + bboxes_key, conf_key, _ = split_result(aug_results_dict[key]) + cls_scores_aug = 1 - conf_key + cls_scores = 1 - conf + + consistency_per_aug = 2.0 + ious = get_ious(bboxes_key, aug_bboxes_dict[key]) + aug_idxs = np.argmax(ious, axis=0) + for origin_idx, aug_idx in enumerate(aug_idxs): + max_iou = ious[aug_idx, origin_idx] + if max_iou == 0: + consistency_per_aug = min(consistency_per_aug, beta) + p = cls_scores_aug[aug_idx] + q = cls_scores[origin_idx] + m = (p + q) / 2. + js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + if js < 0: + js = 0 + consistency_box = max_iou + consistency_cls = 0.5 * \ + (conf[origin_idx] + conf_key[aug_idx]) * (1 - js) + consistency_per_inst = abs( + consistency_box + consistency_cls - beta) + consistency_per_aug = min( + consistency_per_aug, consistency_per_inst.item()) + + consistency += consistency_per_aug + + consistency /= len(aug_results_dict) + + mining_result.append((asset_path, consistency)) + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + monitor.write_monitor_logger(percent=percent) + + return mining_result + + def predict(self, img: CV_IMAGE) -> NDArray: + """ + predict single image and return bbox information + img: opencv BGR, uint8 format + """ + results = self.infer(img) + + xyxy_conf_idx_list=[] + for idx, result in enumerate(results): + for line in result: + if any(np.isinf(line)): + continue + x1, y1, x2, y2, score = line + xyxy_conf_idx_list.append([x1, y1, x2, y2, score, idx]) + + if len(xyxy_conf_idx_list) == 0: + return np.zeros(shape=(0, 6), dtype=np.float32) + else: + return np.array(xyxy_conf_idx_list, dtype=np.float32) + + def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], Dict[str, NDArray]]: + """ + for different augmentation methods: flip, cutout, rotate and resize + augment the image and bbox and use model to predict them. + + return the predict result and augment bbox. + """ + aug_dict = dict(flip=horizontal_flip, + cutout=cutout, + rotate=rotate, + resize=resize) + + aug_bboxes = dict() + aug_results = dict() + for key in aug_dict: + aug_img, aug_bbox = aug_dict[key](image, bboxes) + + aug_result = self.predict(aug_img) + aug_bboxes[key] = aug_bbox + aug_results[key] = aug_result + + return aug_bboxes, aug_results + + +def main(): + cfg = get_merged_config() + miner = YmirMining(cfg) + mining_result = miner.mining() + rw.write_mining_result(mining_result=mining_result) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 3a3b3de..ace0c27 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -58,7 +58,6 @@ def main(cfg: edict) -> int: # save the last checkpoint update_training_result_file(last=True) - logging.info(f"training finished") return 0 From 8cbf2e9332407f3428899de74645c1460f2d52b7 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 15:40:03 +0800 Subject: [PATCH 050/150] update dockerfile --- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 653b03a..841fe7b 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -25,9 +25,9 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ && rm -rf /var/lib/apt/lists/* # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) -RUN pip install --no-cache-dir --upgrade pip wheel setuptools pydantic tensorboardX pyyaml && \ +RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --ignore-requires-python --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi \ From 0daa2cd591db41c06549d955adae7d6db57dbd5d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 16:01:44 +0800 Subject: [PATCH 051/150] empty mining template --- det-mmdetection-tmi/README_ymir.md | 15 +++++++++++++-- det-mmdetection-tmi/infer-template.yaml | 12 ++++++------ det-mmdetection-tmi/mining-template.yaml | 7 +++++++ 3 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 det-mmdetection-tmi/mining-template.yaml diff --git a/det-mmdetection-tmi/README_ymir.md b/det-mmdetection-tmi/README_ymir.md index 90a84b0..194bd03 100644 --- a/det-mmdetection-tmi/README_ymir.md +++ b/det-mmdetection-tmi/README_ymir.md @@ -2,11 +2,22 @@ `mmdetection` framework for object `det`ection `t`raining/`m`ining/`i`nfer task +# build docker image + +``` +docker build -t ymir-executor/mmdet:cuda102-tmi -build-arg SERVER_MODE=dev -f docker/Dockerfile.cuda102 . + +docker build -t ymir-executor/mmdet:cuda111-tmi -build-arg SERVER_MODE=dev -f docker/Dockerfile.cuda111 . +``` + # changelog - modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format - modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process - modify `mmdet/datasets/__init__.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. -- modify `mmdet/apis/train.py`, set `eval_cfg['classwise'] = True` for class-wise evaluation - add `mmdet/utils/util_ymir.py` for ymir training/infer/mining -- add `ymir_infer.py` for infer and mining +- add `ymir_infer.py` for infer +- add `ymir_mining.py` for mining - add `ymir_train.py` modify `tools/train.py` to update the mmcv config for training +- add `start.py`, the entrypoint for docker image + + diff --git a/det-mmdetection-tmi/infer-template.yaml b/det-mmdetection-tmi/infer-template.yaml index 8be36b9..7dd411c 100644 --- a/det-mmdetection-tmi/infer-template.yaml +++ b/det-mmdetection-tmi/infer-template.yaml @@ -1,7 +1,7 @@ -samples_per_gpu: 2 -workers_per_gpu: 2 -max_epochs: 300 -config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' -args_options: '' +# samples_per_gpu: 2 +# workers_per_gpu: 2 +# max_epochs: 300 +# config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' +# args_options: '' cfg_options: '' -port: 12345 +# port: 12345 diff --git a/det-mmdetection-tmi/mining-template.yaml b/det-mmdetection-tmi/mining-template.yaml new file mode 100644 index 0000000..7dd411c --- /dev/null +++ b/det-mmdetection-tmi/mining-template.yaml @@ -0,0 +1,7 @@ +# samples_per_gpu: 2 +# workers_per_gpu: 2 +# max_epochs: 300 +# config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' +# args_options: '' +cfg_options: '' +# port: 12345 From 9b346f25f793dc73db2059d3c0dd561b78d72fa3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 16:25:53 +0800 Subject: [PATCH 052/150] model_path --> model_params --- live-code-executor/img-man/training-template.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live-code-executor/img-man/training-template.yaml b/live-code-executor/img-man/training-template.yaml index 79b1356..865b40b 100644 --- a/live-code-executor/img-man/training-template.yaml +++ b/live-code-executor/img-man/training-template.yaml @@ -4,5 +4,5 @@ gpu_id: '0' task_id: 'default-training-task' -pretrained_model_paths: [] +pretrained_model_params: [] class_names: [] From 84eb49fddcf3bf1e37bd37125140f52cdd076391 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 18:08:15 +0800 Subject: [PATCH 053/150] add opencv --- live-code-executor/mxnet.dockerfile | 2 +- live-code-executor/torch.dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 1ff0a66..6a09472 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -29,7 +29,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 66de371..4b7d735 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru + pip install loguru opencv # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ From 61554ae9d337fee97605d32e3c05c16cff4ea007 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 18:12:46 +0800 Subject: [PATCH 054/150] add opencv --- live-code-executor/mxnet.dockerfile | 2 +- live-code-executor/torch.dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 6a09472..58601dc 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -29,7 +29,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru opencv +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 4b7d735..32f4883 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru opencv + pip install loguru opencv-python # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ From f1dd6c61ac33b3d3c33af6567a72573001bd23ee Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 18:18:46 +0800 Subject: [PATCH 055/150] udpate opencv version --- live-code-executor/mxnet.dockerfile | 2 +- live-code-executor/torch.dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index 58601dc..a738f9b 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -29,7 +29,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==4.1.2.30 # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 32f4883..aa17ce2 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru opencv-python + pip install loguru opencv-python==4.1.2.30 # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ From 50e6864f67a8ff8f9ba399649611a374e1ca788e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=BD=B3=E6=AC=A3?= Date: Thu, 7 Jul 2022 18:32:16 +0800 Subject: [PATCH 056/150] Update torch.dockerfile --- live-code-executor/torch.dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index aa17ce2..61526d5 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -6,7 +6,7 @@ ARG CUDNN="8" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime ARG SERVER_MODE=prod - +ARG OPENCV="4.1.2.30" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" @@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru opencv-python==4.1.2.30 + pip install loguru opencv-python==${OPENCV} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ From 6a164cae6f7841debae5a905f8b1cac79a9401aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=BD=B3=E6=AC=A3?= Date: Thu, 7 Jul 2022 18:32:49 +0800 Subject: [PATCH 057/150] Update mxnet.dockerfile --- live-code-executor/mxnet.dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index a738f9b..fd258af 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -5,6 +5,7 @@ ARG SYSTEM="ubuntu18.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} ARG MXNET="1.9.1" +ARG OPENCV="4.1.2.30" ARG DEBIAN_FRONTEND="noninteractive" ARG MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_4.11.0-Linux-x86_64.sh" @@ -29,8 +30,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==4.1.2.30 - +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==${OPENCV} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ From 3ef0d4356300aea96d9c3502fa4eaf7d9b9f5561 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 7 Jul 2022 18:51:23 +0800 Subject: [PATCH 058/150] add opencv and numpy arg --- live-code-executor/mxnet.dockerfile | 3 ++- live-code-executor/torch.dockerfile | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index fd258af..e1ff9c4 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -6,6 +6,7 @@ ARG SYSTEM="ubuntu18.04" FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-${BUILD}-${SYSTEM} ARG MXNET="1.9.1" ARG OPENCV="4.1.2.30" +ARG NUMPY="1.20.0" ARG DEBIAN_FRONTEND="noninteractive" ARG MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_4.11.0-Linux-x86_64.sh" @@ -30,7 +31,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==${OPENCV} +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==${OPENCV} numpy=${NUMPY} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 61526d5..806f471 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -7,6 +7,7 @@ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime ARG SERVER_MODE=prod ARG OPENCV="4.1.2.30" +ARG NUMPY="1.20.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" @@ -20,7 +21,7 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru opencv-python==${OPENCV} + pip install loguru opencv-python==${OPENCV} numpy=${NUMPY} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ From cffc5d0033b6d7acd1f26b8bcc1f40e85895848b Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 6 Jul 2022 15:15:14 +0800 Subject: [PATCH 059/150] add cuda112 dockerfile for yolov4 --- det-yolov4-mining/cuda112.dockerfile | 15 +++++++++++++++ det-yolov4-training/Dockerfile | 2 +- det-yolov4-training/cuda112.dockerfile | 23 +++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 det-yolov4-mining/cuda112.dockerfile create mode 100644 det-yolov4-training/cuda112.dockerfile diff --git a/det-yolov4-mining/cuda112.dockerfile b/det-yolov4-mining/cuda112.dockerfile new file mode 100644 index 0000000..871b00f --- /dev/null +++ b/det-yolov4-mining/cuda112.dockerfile @@ -0,0 +1,15 @@ +FROM industryessentials/ymir-executor:cuda112-yolov4-training + +RUN apt-get update && apt-get install -y --no-install-recommends libsm6 libxext6 libfontconfig1 libxrender1 libgl1-mesa-glx \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN pip3 install --upgrade pip setuptools wheel && pip3 install opencv-python pyyaml scipy tqdm && rm -rf /root/.cache/pip3 + +COPY . /app +WORKDIR /app +RUN cp ./start.sh /usr/bin/start.sh && \ + mkdir -p /img-man && \ + cp ./mining-template.yaml /img-man/mining-template.yaml && \ + cp ./infer-template.yaml /img-man/infer-template.yaml && \ + cp ./README.md /img-man/readme.md +CMD sh /usr/bin/start.sh diff --git a/det-yolov4-training/Dockerfile b/det-yolov4-training/Dockerfile index 6e6c4c9..61ce1f6 100644 --- a/det-yolov4-training/Dockerfile +++ b/det-yolov4-training/Dockerfile @@ -1,6 +1,7 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple WORKDIR /darknet +RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update RUN apt install -y software-properties-common wget RUN add-apt-repository ppa:deadsnakes/ppa @@ -12,7 +13,6 @@ RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six -RUN echo '\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\ndeb https://mirrors.ustc.edu.cn/ubuntu/ bionic-security main restricted universe multiverse\n' >> /etc/apt/sources.list ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev COPY . /darknet diff --git a/det-yolov4-training/cuda112.dockerfile b/det-yolov4-training/cuda112.dockerfile new file mode 100644 index 0000000..3e6884b --- /dev/null +++ b/det-yolov4-training/cuda112.dockerfile @@ -0,0 +1,23 @@ +FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 +ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple +WORKDIR /darknet +RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update +RUN apt install -y software-properties-common wget +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update +RUN apt install -y python3.7 python3-distutils +RUN wget https://bootstrap.pypa.io/get-pip.py +RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137 +RUN rm /usr/bin/python3 +RUN ln -s /usr/bin/python3.7 /usr/bin/python3 +RUN python3 get-pip.py +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six + +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update && apt-get install -y libopencv-dev +COPY . /darknet +RUN cp /darknet/make_train_test_darknet.sh /usr/bin/start.sh +RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml +RUN make -j +CMD bash /usr/bin/start.sh From 80084c65e720d0b4d060e04b7c0e3ffaa3a5a781 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 8 Jul 2022 13:32:18 +0800 Subject: [PATCH 060/150] update master --- README.MD | 11 ++++++++++- det-yolov5-tmi/.dockerignore | 2 +- live-code-executor/mxnet.dockerfile | 3 ++- live-code-executor/torch.dockerfile | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/README.MD b/README.MD index bcba683..dafee8a 100644 --- a/README.MD +++ b/README.MD @@ -2,7 +2,16 @@ ## det-yolov4-training -- yolov4的训练镜像,采用mxnet与darknet框架,默认cuda版本为`10.1`,无法直接在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,并修改其它依赖。 +- yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法直接在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 + + ``` + cd det-yolov4-training + # cuda101-yolov4-training + docker build -t ymir-executor/yolov4:cuda101-training -f Dockerfile . + + # cuda112-yolov4-training + docker build -t ymir-executor/yolov4:cuda112-training -f cuda112.dockerfile . + ``` ## det-yolov4-mining diff --git a/det-yolov5-tmi/.dockerignore b/det-yolov5-tmi/.dockerignore index af51ccc..bee6b98 100644 --- a/det-yolov5-tmi/.dockerignore +++ b/det-yolov5-tmi/.dockerignore @@ -12,7 +12,7 @@ data/samples/* *.jpg # Neural Network weights ----------------------------------------------------------------------------------------------- -**/*.pt +#**/*.pt **/*.pth **/*.onnx **/*.engine diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index e1ff9c4..cd2ed5d 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -15,7 +15,8 @@ ENV PATH /opt/conda/bin:$PATH # install linux package, needs to fix GPG error first. RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ apt-get update && \ - apt-get install -y git gcc wget curl zip libglib2.0-0 libgl1-mesa-glx && \ + apt-get install -y git gcc wget curl zip libglib2.0-0 libgl1-mesa-glx \ + libsm6 libxext6 libxrender-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ wget "${MINICONDA_URL}" -O miniconda.sh -q && \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 806f471..88fe0eb 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -15,7 +15,7 @@ ENV LANG=C.UTF-8 # install linux package RUN apt-get update && apt-get install -y git curl wget zip gcc \ - libglib2.0-0 libgl1-mesa-glx \ + libglib2.0-0 libgl1-mesa-glx libsm6 libxext6 libxrender-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 6bdd3e529b1d0dafcf1349d3ab7bfa226f3883fc Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 11:54:47 +0800 Subject: [PATCH 061/150] update dockerfile --- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 3 +-- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 841fe7b..62ea15e 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -27,7 +27,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi \ @@ -38,7 +38,6 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ COPY . /app/ WORKDIR /app RUN pip install --no-cache-dir -r requirements/runtime.txt \ - && pip install --no-cache-dir -e . \ && mkdir /img-man \ && mv *-template.yaml /img-man \ && echo "cd /app && python3 start.py" > /usr/bin/start.sh diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index d0c24c6..08fe8f4 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y build-essential ffmpeg libsm6 libxext6 # Install ymir-exc sdk and MMCV RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi \ @@ -36,7 +36,6 @@ RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ COPY . /app/ WORKDIR /app RUN pip install --no-cache-dir -r requirements/runtime.txt \ - && pip install --no-cache-dir -e . \ && mkdir /img-man \ && mv *-template.yaml /img-man \ && echo "cd /app && python3 start.py" > /usr/bin/start.sh From 5c1a6f5f95a5e979931ef547d1791563c7cba21e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 12:06:53 +0800 Subject: [PATCH 062/150] format doc --- .../mmdet/core/evaluation/eval_hooks.py | 1 - det-mmdetection-tmi/mmdet/datasets/coco.py | 6 +- det-mmdetection-tmi/mmdet/datasets/ymir.py | 62 ++++++++++--------- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 4 +- det-mmdetection-tmi/start.py | 9 ++- det-mmdetection-tmi/tools/train.py | 1 + det-mmdetection-tmi/ymir_mining.py | 2 +- 7 files changed, 46 insertions(+), 39 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index 6e63d43..dff0705 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -180,4 +180,3 @@ def _do_evaluate(self, runner): # 'best_score', self.init_value_map[self.rule]) # if self.compare_func(key_score, best_score): # update_training_result_file(key_score) - diff --git a/det-mmdetection-tmi/mmdet/datasets/coco.py b/det-mmdetection-tmi/mmdet/datasets/coco.py index ffe83d4..7de1cdb 100644 --- a/det-mmdetection-tmi/mmdet/datasets/coco.py +++ b/det-mmdetection-tmi/mmdet/datasets/coco.py @@ -597,7 +597,9 @@ def evaluate(self, if COCO_EVAL_TMP_FILE is not None: mmcv.dump(eval_results, COCO_EVAL_TMP_FILE, file_format='json') else: - raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + raise Exception( + 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') - print_log(f'\n write eval result to {COCO_EVAL_TMP_FILE}', logger=logger) + print_log( + f'\n write eval result to {COCO_EVAL_TMP_FILE}', logger=logger) return eval_results diff --git a/det-mmdetection-tmi/mmdet/datasets/ymir.py b/det-mmdetection-tmi/mmdet/datasets/ymir.py index 42771fb..1276310 100644 --- a/det-mmdetection-tmi/mmdet/datasets/ymir.py +++ b/det-mmdetection-tmi/mmdet/datasets/ymir.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab voc.py. All rights reserved. # wangjiaxin 2022-04-25 -from collections import OrderedDict import os.path as osp # from PIL import Image @@ -12,21 +11,23 @@ from .api_wrappers import COCO from .coco import CocoDataset + @DATASETS.register_module() class YmirDataset(CocoDataset): """ - converted dataset by ymir system 1.0.0 + converted dataset by ymir system 1.0.0 + /in/assets: image files directory /in/annotations: annotation files directory /in/train-index.tsv: image_file \t annotation_file /in/val-index.tsv: image_file \t annotation_file """ + def __init__(self, min_size=0, ann_prefix='annotations', **kwargs): - self.min_size=min_size - self.ann_prefix=ann_prefix + self.min_size = min_size + self.ann_prefix = ann_prefix super(YmirDataset, self).__init__(**kwargs) def load_annotations(self, ann_file): @@ -43,16 +44,16 @@ def load_annotations(self, ann_file): categories = [] # category_id is from 1 for coco, not 0 for i, name in enumerate(self.CLASSES): - categories.append({'supercategory':'none', + categories.append({'supercategory': 'none', 'id': i+1, - 'name': name}) + 'name': name}) annotations = [] instance_counter = 1 image_counter = 1 - with open(ann_file,'r') as fp: - lines=fp.readlines() + with open(ann_file, 'r') as fp: + lines = fp.readlines() for line in lines: # split any white space @@ -76,22 +77,22 @@ def load_annotations(self, ann_file): anns = [] for ann in anns: - ann['image_id']=image_counter - ann['id']=instance_counter + ann['image_id'] = image_counter + ann['id'] = instance_counter annotations.append(ann) - instance_counter+=1 + instance_counter += 1 - image_counter+=1 + image_counter += 1 - ### pycocotool coco init + # pycocotool coco init self.coco = COCO() - self.coco.dataset['type']='instances' - self.coco.dataset['categories']=categories - self.coco.dataset['images']=images - self.coco.dataset['annotations']=annotations + self.coco.dataset['type'] = 'instances' + self.coco.dataset['categories'] = categories + self.coco.dataset['images'] = images + self.coco.dataset['annotations'] = annotations self.coco.createIndex() - ### mmdetection coco init + # mmdetection coco init # avoid the filter problem in CocoDataset, view coco_api.py for detail self.coco.img_ann_map = self.coco.imgToAnns self.coco.cat_img_map = self.coco.catToImgs @@ -103,7 +104,7 @@ def load_annotations(self, ann_file): self.img_ids = self.coco.get_img_ids() # self.img_ids = list(self.coco.imgs.keys()) assert len(self.img_ids) > 0, 'image number must > 0' - N=len(self.img_ids) + N = len(self.img_ids) print(f'load {N} image from YMIR dataset') data_infos = [] @@ -119,11 +120,11 @@ def load_annotations(self, ann_file): return data_infos def dump(self, ann_file): - with open(ann_file,'w') as fp: + with open(ann_file, 'w') as fp: json.dump(self.coco.dataset, fp) - def get_ann_path_from_img_path(self,img_path): - img_id=osp.splitext(osp.basename(img_path))[0] + def get_ann_path_from_img_path(self, img_path): + img_id = osp.splitext(osp.basename(img_path))[0] return osp.join(self.data_root, self.ann_prefix, img_id+'.txt') def get_txt_ann_info(self, txt_path): @@ -141,16 +142,16 @@ def get_txt_ann_info(self, txt_path): # txt_path = self.get_ann_path_from_img_path(img_path) anns = [] if osp.exists(txt_path): - with open(txt_path,'r') as fp: - lines=fp.readlines() + with open(txt_path, 'r') as fp: + lines = fp.readlines() else: - lines=[] + lines = [] for line in lines: - obj=[int(x) for x in line.strip().split(',')[0:5]] + obj = [int(x) for x in line.strip().split(',')[0:5]] # YMIR category id starts from 0, coco from 1 category_id, xmin, ymin, xmax, ymax = obj bbox = [xmin, ymin, xmax, ymax] - h,w=ymax-ymin,xmax-xmin + h, w = ymax-ymin, xmax-xmin ignore = 0 if self.min_size: assert not self.test_mode @@ -160,12 +161,13 @@ def get_txt_ann_info(self, txt_path): ignore = 1 ann = dict( - segmentation=[[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]], + segmentation=[ + [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]], area=w*h, iscrowd=0, image_id=None, bbox=[xmin, ymin, w, h], - category_id=category_id+1, # category id is from 1 for coco + category_id=category_id+1, # category id is from 1 for coco id=None, ignore=ignore ) @@ -188,7 +190,7 @@ def get_cat_ids(self, idx): txt_path = self.data_infos[idx]['ann_path'] txt_path = osp.join(self.data_root, self.ann_prefix, txt_path) if osp.exists(txt_path): - with open(txt_path,'r') as fp: + with open(txt_path, 'r') as fp: lines = fp.readlines() else: lines = [] diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index d3c6e97..faf39e0 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -158,7 +158,7 @@ def get_weight_file(cfg: edict) -> str: return "" -def update_training_result_file(last=False, key_score=None): +def update_training_result_file(last: bool = False, key_score=None): if key_score: logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') @@ -168,7 +168,7 @@ def update_training_result_file(last=False, key_score=None): eval_result = mmcv.load(COCO_EVAL_TMP_FILE) # eval_result may be empty dict {}. - map = eval_result.get('bbox_mAP_50',0) + map = eval_result.get('bbox_mAP_50', 0) work_dir = os.getenv('YMIR_MODELS_DIR') if work_dir is None or not osp.isdir(work_dir): diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 686d451..e4b1398 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -32,7 +32,8 @@ def _run_training() -> None: # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) - logging.info(f"training finished") + logging.info("training finished") + def _run_mining() -> None: command = 'python3 ymir_mining.py' @@ -40,7 +41,8 @@ def _run_mining() -> None: subprocess.run(command.split(), check=True) monitor.write_monitor_logger(percent=1.0) - logging.info(f"mining finished") + logging.info("mining finished") + def _run_infer() -> None: command = 'python3 ymir_infer.py' @@ -48,7 +50,8 @@ def _run_infer() -> None: subprocess.run(command.split(), check=True) monitor.write_monitor_logger(percent=1.0) - logging.info(f"infer finished") + logging.info("infer finished") + if __name__ == '__main__': logging.basicConfig(stream=sys.stdout, diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index a65e130..74121ff 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -19,6 +19,7 @@ from mmdet.utils import collect_env, get_root_logger, setup_multi_processes from mmdet.utils.util_ymir import modify_mmdet_config, get_merged_config + def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') parser.add_argument('config', help='train config file path') diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py index 16379df..f4bea0c 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining.py @@ -307,7 +307,7 @@ def predict(self, img: CV_IMAGE) -> NDArray: """ results = self.infer(img) - xyxy_conf_idx_list=[] + xyxy_conf_idx_list = [] for idx, result in enumerate(results): for line in result: if any(np.isinf(line)): From 0a6f11c9e9c7b82319b63d4652dff3a6b99e4fd3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 12:17:11 +0800 Subject: [PATCH 063/150] update .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 5563689..6dbd818 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# dockerfile for China +*.dockerfile.cn +det-mmdetection-tmi/docker/*.cn + *.png *.jpg *.img From 49906e3068b2319982fedd591a7f6042c3a4e6b5 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 15:50:47 +0800 Subject: [PATCH 064/150] DDP training --- det-yolov5-tmi/start.py | 36 +++++++++++++++++++++++---- det-yolov5-tmi/training-template.yaml | 2 ++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index fba6632..d59fa8a 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -51,17 +51,43 @@ def _run_training(cfg: edict) -> None: img_size = cfg.param.img_size save_period = cfg.param.save_period args_options = cfg.param.args_options + gpu_id = str(cfg.param.gpu_id) + gpu_count = len(gpu_id.split(',')) if gpu_id else 0 + port = int(cfg.param.port) + sync_bn = cfg.param.sync_bn weights = get_weight_file(cfg) if not weights: # download pretrained weight weights = download_weight_file(model) models_dir = cfg.ymir.output.models_dir - command = f'python3 train.py --epochs {epochs} ' + \ - f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ - f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ - f'--img-size {img_size} ' + \ - f'--save-period {save_period}' + + if gpu_count == 0: + command = f'python3 train.py --epochs {epochs} ' + \ + f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ + f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ + f'--img-size {img_size} ' + \ + f'--save-period {save_period} ' + \ + f'--devices cpu' + elif gpu_count == 1: + command = f'python3 train.py --epochs {epochs} ' + \ + f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ + f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ + f'--img-size {img_size} ' + \ + f'--save-period {save_period} ' + \ + f'--devices {gpu_id}' + else: + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} ' + \ + f'--master_port {port} train.py --epochs {epochs} ' + \ + f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ + f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ + f'--img-size {img_size} ' + \ + f'--save-period {save_period} ' + \ + f'--devices {gpu_id}' + + if sync_bn: + command += " --sync-bn" + if args_options: command += f" {args_options}" diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index c6d0ee4..b01bdc1 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -14,3 +14,5 @@ img_size: 640 opset: 11 args_options: '--exist-ok' save_period: 10 +port: 29500 # work for multi-gpu only +sync_bn: False # work for multi-gpu only From a1177dcab0916d1ef745b80f27eff3b0dfe3734a Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 16:15:22 +0800 Subject: [PATCH 065/150] remove *.pt in dockerignore file --- det-yolov5-tmi/.dockerignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/.dockerignore b/det-yolov5-tmi/.dockerignore index af51ccc..bee6b98 100644 --- a/det-yolov5-tmi/.dockerignore +++ b/det-yolov5-tmi/.dockerignore @@ -12,7 +12,7 @@ data/samples/* *.jpg # Neural Network weights ----------------------------------------------------------------------------------------------- -**/*.pt +#**/*.pt **/*.pth **/*.onnx **/*.engine From 3d342d05cab9479ce3fcdee7253614ea497038f4 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 11 Jul 2022 17:13:34 +0800 Subject: [PATCH 066/150] add conf_threshold for infer --- det-mmdetection-tmi/infer-template.yaml | 1 + det-mmdetection-tmi/mmdet/utils/util_ymir.py | 3 ++- det-mmdetection-tmi/ymir_infer.py | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/infer-template.yaml b/det-mmdetection-tmi/infer-template.yaml index 7dd411c..cc2f1e7 100644 --- a/det-mmdetection-tmi/infer-template.yaml +++ b/det-mmdetection-tmi/infer-template.yaml @@ -4,4 +4,5 @@ # config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' # args_options: '' cfg_options: '' +conf_threshold: 0.2 # port: 12345 diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index faf39e0..eff0aba 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -203,6 +203,7 @@ def update_training_result_file(last: bool = False, key_score=None): model_stages = {} if stage_name not in model_stages: - rw.write_model_stage(files=[newest_weight_file], + config_files = [f for f in result_files if f.endswith('.py')] + rw.write_model_stage(files=[newest_weight_file] + config_files, mAP=float(map), stage_name=stage_name) diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 84df374..b4716e2 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -108,11 +108,15 @@ def main(): # write infer result monitor_gap = max(1, N // 100) + conf_threshold = float(cfg.param.conf_threshold) for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): img = cv2.imread(asset_path) result = model.infer(img) - infer_result[asset_path] = mmdet_result_to_ymir( + raw_anns = mmdet_result_to_ymir( result, cfg.param.class_names) + + infer_result[asset_path] = [ + ann for ann in raw_anns if ann.score >= conf_threshold] idx += 1 if idx % monitor_gap == 0: From ba3b7f3a17b1e9f51e3c6db90d53b524152885d9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 13 Jul 2022 14:00:14 +0800 Subject: [PATCH 067/150] update dockerfile and commit id --- live-code-executor/mxnet.dockerfile | 4 ++-- live-code-executor/torch.dockerfile | 4 ++-- live-code-executor/ymir_start.py | 19 +++++++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index e1ff9c4..e04bd4b 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -31,10 +31,10 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && # Install python package # view https://mxnet.apache.org/versions/1.9.1/get_started for detail -RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==${OPENCV} numpy=${NUMPY} +RUN pip3 install mxnet-cu112==${MXNET} loguru opencv-python==${OPENCV} numpy==${NUMPY} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index 806f471..a71476f 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -21,11 +21,11 @@ RUN apt-get update && apt-get install -y git curl wget zip gcc \ # Install python package RUN pip install -U pip && \ - pip install loguru opencv-python==${OPENCV} numpy=${NUMPY} + pip install loguru opencv-python==${OPENCV} numpy==${NUMPY} # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index 0ea1bd6..918320c 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -20,15 +20,22 @@ def main(): show_ymir_info(executor_config) git_url = executor_config['git_url'] - git_branch = executor_config.get('git_branch', '') + # commit id, tag or branch + git_id = executor_config.get('git_branch', '') - if not git_branch: - cmd = f'git clone {git_url} --depth 1 /app' - else: - cmd = f'git clone {git_url} --depth 1 -b {git_branch} /app' - logger.info(f'clone code: {cmd}') + cmd = f'git clone {git_url} /app' subprocess.run(cmd.split(), check=True) + if not git_id: + result = subprocess.run('git rev-parse HEAD', check=True, shell=True, + capture_output=True, encoding='utf-8', cwd='/app') + # remove '\n' + git_id = result.stdout.strip() + else: + subprocess.run(f'git checkout {git_id}', check=True, shell=True, cwd='/app') + + logger.info(f'clone code with {git_id}: {cmd}') + # step 2. read /app/extra-requirements.txt and install it. pypi_file = '/app/extra-requirements.txt' if osp.exists(pypi_file): From c3e55a41ad47869e981838378349a0bdac50550d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 15 Jul 2022 16:32:32 +0800 Subject: [PATCH 068/150] fix bug --- det-yolov5-tmi/start.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index d59fa8a..7f66691 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -54,7 +54,7 @@ def _run_training(cfg: edict) -> None: gpu_id = str(cfg.param.gpu_id) gpu_count = len(gpu_id.split(',')) if gpu_id else 0 port = int(cfg.param.port) - sync_bn = cfg.param.sync_bn + sync_bn = cfg.param.get('sync_bn', False) weights = get_weight_file(cfg) if not weights: # download pretrained weight @@ -68,14 +68,14 @@ def _run_training(cfg: edict) -> None: f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ f'--img-size {img_size} ' + \ f'--save-period {save_period} ' + \ - f'--devices cpu' + f'--device cpu' elif gpu_count == 1: command = f'python3 train.py --epochs {epochs} ' + \ f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ f'--img-size {img_size} ' + \ f'--save-period {save_period} ' + \ - f'--devices {gpu_id}' + f'--device {gpu_id}' else: command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} ' + \ f'--master_port {port} train.py --epochs {epochs} ' + \ @@ -83,7 +83,7 @@ def _run_training(cfg: edict) -> None: f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ f'--img-size {img_size} ' + \ f'--save-period {save_period} ' + \ - f'--devices {gpu_id}' + f'--device {gpu_id}' if sync_bn: command += " --sync-bn" From 9f8c1ac26c26b5eb82184d9f7f8ed99189eeb5e2 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sat, 16 Jul 2022 10:12:23 +0800 Subject: [PATCH 069/150] swap config and add export_format --- det-yolov5-tmi/start.py | 2 +- det-yolov5-tmi/training-template.yaml | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 7f66691..9e82742 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -53,7 +53,7 @@ def _run_training(cfg: edict) -> None: args_options = cfg.param.args_options gpu_id = str(cfg.param.gpu_id) gpu_count = len(gpu_id.split(',')) if gpu_id else 0 - port = int(cfg.param.port) + port = int(cfg.param.get('port', 29500)) sync_bn = cfg.param.get('sync_bn', False) weights = get_weight_file(cfg) if not weights: diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index b01bdc1..763f66a 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -7,6 +7,8 @@ # pretrained_model_params: [] # class_names: [] +shm_size: '32G' +export_format: 'ark:raw' model: 'yolov5s' batch_size: 16 epochs: 300 @@ -14,5 +16,5 @@ img_size: 640 opset: 11 args_options: '--exist-ok' save_period: 10 -port: 29500 # work for multi-gpu only sync_bn: False # work for multi-gpu only +port: 29500 # work for multi-gpu only From a3ffc83bac8b788fa6c688bc3dbf5e8c77fa9149 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sat, 16 Jul 2022 10:52:11 +0800 Subject: [PATCH 070/150] update mmdet --- det-mmdetection-tmi/README_ymir.md | 8 +++++--- .../mmdet/core/evaluation/eval_hooks.py | 10 +++++----- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 2 +- det-mmdetection-tmi/ymir_train.py | 19 ++++++++++++++++--- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/det-mmdetection-tmi/README_ymir.md b/det-mmdetection-tmi/README_ymir.md index 194bd03..1281e7f 100644 --- a/det-mmdetection-tmi/README_ymir.md +++ b/det-mmdetection-tmi/README_ymir.md @@ -13,11 +13,13 @@ docker build -t ymir-executor/mmdet:cuda111-tmi -build-arg SERVER_MODE=dev -f do # changelog - modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format - modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process -- modify `mmdet/datasets/__init__.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. +- modify `mmdet/datasets/__init__.py, mmdet/datasets/coco.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. +- modify `requirements/runtime.txt` to add new dependent package. - add `mmdet/utils/util_ymir.py` for ymir training/infer/mining - add `ymir_infer.py` for infer - add `ymir_mining.py` for mining - add `ymir_train.py` modify `tools/train.py` to update the mmcv config for training - add `start.py`, the entrypoint for docker image - - +- add `training-template.yaml, infer-template.yaml, mining-template.yaml` for ymir pre-defined hyper-parameters. +- add `docker/Dockerfile.cuda102, docker/Dockerfile.cuda111` to build docker image +- remove `docker/Dockerfile` to avoid misuse diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index dff0705..dc40801 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -10,7 +10,7 @@ from ymir_exc import monitor from mmdet.utils.util_ymir import (YmirStage, get_ymir_process, - update_training_result_file) + write_ymir_training_result) def _calc_dynamic_intervals(start_interval, dynamic_interval_list): @@ -79,7 +79,7 @@ def _do_evaluate(self, runner): results = single_gpu_test(runner.model, self.dataloader, show=False) runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) - update_training_result_file(last=False, key_score=key_score) + write_ymir_training_result(last=False, key_score=key_score) # the key_score may be `None` so it needs to skip the action to save # the best checkpoint if self.save_best and key_score: @@ -87,7 +87,7 @@ def _do_evaluate(self, runner): # best_score = runner.meta['hook_msgs'].get( # 'best_score', self.init_value_map[self.rule]) # if self.compare_func(key_score, best_score): - # update_training_result_file(key_score) + # write_ymir_training_result(key_score) # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, @@ -170,7 +170,7 @@ def _do_evaluate(self, runner): print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) - update_training_result_file(last=False, key_score=key_score) + write_ymir_training_result(last=False, key_score=key_score) # the key_score may be `None` so it needs to skip # the action to save the best checkpoint if self.save_best and key_score: @@ -179,4 +179,4 @@ def _do_evaluate(self, runner): # best_score = runner.meta['hook_msgs'].get( # 'best_score', self.init_value_map[self.rule]) # if self.compare_func(key_score, best_score): - # update_training_result_file(key_score) + # write_ymir_training_result(key_score) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index eff0aba..2c232e2 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -158,7 +158,7 @@ def get_weight_file(cfg: edict) -> str: return "" -def update_training_result_file(last: bool = False, key_score=None): +def write_ymir_training_result(last: bool = False, key_score=None): if key_score: logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index ace0c27..7a57946 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -7,8 +7,8 @@ from easydict import EasyDict as edict from ymir_exc import monitor -from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_ymir_process, update_training_result_file) +from mmdet.utils.util_ymir import (YmirStage, get_merged_config, get_weight_file, + get_ymir_process, write_ymir_training_result) def main(cfg: edict) -> int: @@ -28,6 +28,19 @@ def main(cfg: edict) -> int: args_options = cfg.param.get("args_options", None) cfg_options = cfg.param.get("cfg_options", None) + if args_options.find('--resume-from') == -1 and \ + cfg_options.find('load_from') == -1 and \ + cfg_options.find('resume_from') == -1: + + weight_file = get_weight_file(cfg) + if weight_file: + if cfg_options: + cfg_options += f' load_from={weight_file}' + else: + cfg_options = f'load_from={weight_file}' + else: + logging.warning('no weight file used for training!') + monitor.write_monitor_logger( percent=get_ymir_process(YmirStage.PREPROCESS, p=0.2)) @@ -57,7 +70,7 @@ def main(cfg: edict) -> int: subprocess.run(cmd.split(), check=True) # save the last checkpoint - update_training_result_file(last=True) + write_ymir_training_result(last=True) return 0 From 4bfe8c3a5e9f9cc4d8eaa43b01bfe3687bbc5041 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sat, 16 Jul 2022 11:12:31 +0800 Subject: [PATCH 071/150] fix none error --- det-mmdetection-tmi/ymir_train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 7a57946..96d1a69 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -28,9 +28,9 @@ def main(cfg: edict) -> int: args_options = cfg.param.get("args_options", None) cfg_options = cfg.param.get("cfg_options", None) - if args_options.find('--resume-from') == -1 and \ - cfg_options.find('load_from') == -1 and \ - cfg_options.find('resume_from') == -1: + if (args_options is None or args_options.find('--resume-from') == -1) and \ + (cfg_options is None or (cfg_options.find('load_from') == -1 and + cfg_options.find('resume_from') == -1)): weight_file = get_weight_file(cfg) if weight_file: From 6326d07bb456d163e750bd935f0c6964882c62b3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Sat, 16 Jul 2022 11:14:16 +0800 Subject: [PATCH 072/150] add comment --- det-mmdetection-tmi/ymir_train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 96d1a69..31c2375 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -28,6 +28,7 @@ def main(cfg: edict) -> int: args_options = cfg.param.get("args_options", None) cfg_options = cfg.param.get("cfg_options", None) + # auto load offered weight file if not set by user! if (args_options is None or args_options.find('--resume-from') == -1) and \ (cfg_options is None or (cfg_options.find('load_from') == -1 and cfg_options.find('resume_from') == -1)): From e87e7ce151599fdb6e857fbcab6678c6261210f2 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 18 Jul 2022 12:33:14 +0800 Subject: [PATCH 073/150] update git clone for live code --- live-code-executor/ymir_start.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index 918320c..bd4f2dc 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -23,18 +23,26 @@ def main(): # commit id, tag or branch git_id = executor_config.get('git_branch', '') - cmd = f'git clone {git_url} /app' + # https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/ + cmd = f'git clone --filter=blob:none {git_url} /app' + logger.info(f'running: {cmd}') subprocess.run(cmd.split(), check=True) if not git_id: - result = subprocess.run('git rev-parse HEAD', check=True, shell=True, - capture_output=True, encoding='utf-8', cwd='/app') - # remove '\n' - git_id = result.stdout.strip() + # logger.warning(f'no commid_id/tag/branch offered for {git_url}') + raise Exception(f'no commid_id/tag/branch offered for {git_url}') else: - subprocess.run(f'git checkout {git_id}', check=True, shell=True, cwd='/app') + cmd = f'git checkout {git_id}' + logger.info(f'running: {cmd}') + subprocess.run(cmd.split(), check=True, cwd='/app') + + result = subprocess.run('git rev-parse HEAD', check=True, shell=True, + capture_output=True, encoding='utf-8', cwd='/app') + + commit_id = result.stdout.strip() # remove '\n' + subprocess.run(f'echo {commit_id} > /out/models/git_commit_id.txt', check=True, shell=True) + logger.info(f'clone code {git_url} with commit id {commit_id}') - logger.info(f'clone code with {git_id}: {cmd}') # step 2. read /app/extra-requirements.txt and install it. pypi_file = '/app/extra-requirements.txt' From a701af6f4a6e559cb5e3a0e671ecb8b5f9ca67fd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 09:28:03 +0800 Subject: [PATCH 074/150] support ymir1.0.0+ --- det-yolov5-tmi/cuda102.dockerfile | 2 + det-yolov5-tmi/cuda111.dockerfile | 2 + det-yolov5-tmi/train.py | 57 +++++++++++++++---------- det-yolov5-tmi/utils/ymir_yolov5.py | 64 +++++++++++++++++++++++++++-- 4 files changed, 99 insertions(+), 26 deletions(-) diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index 49a29d3..3afe7e4 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -4,11 +4,13 @@ ARG CUDNN="7" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime ARG SERVER_MODE=prod +ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV LANG=C.UTF-8 +ENV YMIR_VERSION=${YMIR} # Install linux package RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index 0c6e5dd..ca19784 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -5,11 +5,13 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime ARG SERVER_MODE=prod +ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV LANG=C.UTF-8 +ENV YMIR_VERSION=$YMIR # Install linux package RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 7fcbbce..449c85d 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -12,6 +12,7 @@ $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch """ +from ymir_exc import monitor import argparse import math import os @@ -23,6 +24,7 @@ from pathlib import Path import numpy as np +from packaging.version import Version import torch import torch.distributed as dist import torch.nn as nn @@ -38,26 +40,25 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -import val # for end-of-epoch mAP -from models.experimental import attempt_load -from models.yolo import Model -from utils.autoanchor import check_anchors -from utils.autobatch import check_train_batch_size -from utils.callbacks import Callbacks -from utils.datasets import create_dataloader -from utils.downloads import attempt_download +from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config, write_old_ymir_training_result +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first +from utils.plots import plot_evolve, plot_labels +from utils.metrics import fitness +from utils.loss import ComputeLoss +from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.loggers import Loggers from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.loggers import Loggers -from utils.loggers.wandb.wandb_utils import check_wandb_resume -from utils.loss import ComputeLoss -from utils.metrics import fitness -from utils.plots import plot_evolve, plot_labels -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config -from ymir_exc import monitor +from utils.downloads import attempt_download +from utils.datasets import create_dataloader +from utils.callbacks import Callbacks +from utils.autobatch import check_train_batch_size +from utils.autoanchor import check_anchors +from models.yolo import Model +from models.experimental import attempt_load +import val # for end-of-epoch mAP LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -73,9 +74,15 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze ymir_cfg = opt.ymir_cfg - opt.ymir_cfg = '' # yaml cannot dump edict, remove it here + opt.ymir_cfg = '' # yaml cannot dump edict, remove it here log_dir = Path(ymir_cfg.ymir.output.tensorboard_dir) + YMIR_VERSION = os.environ.get('YMIR_VERSION', '1.2.0') + if Version(YMIR_VERSION) >= Version('1.2.0'): + latest_ymir = True + else: + latest_ymir = False + # Directories w = save_dir # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir @@ -184,7 +191,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] else: - lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA @@ -296,7 +303,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # ymir monitor if epoch % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=epoch/(epochs-start_epoch+1)) + percent = get_ymir_process(stage=YmirStage.TASK, p=epoch / (epochs - start_epoch + 1)) monitor.write_monitor_logger(percent=percent) # Update image weights (optional, single-GPU only) @@ -418,7 +425,10 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') weight_file = str(w / f'epoch{epoch}.pt') - write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) + if latest_ymir: + write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) + else: + write_old_ymir_training_result(ymir_cfg, results, maps, rewrite=True) del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) @@ -466,7 +476,11 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary torch.cuda.empty_cache() # save the best and last weight file with other files in models_dir - write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') + if RANK in [-1, 0]: + if latest_ymir: + write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') + else: + write_old_ymir_training_result(ymir_cfg, (), np.array([0]), rewrite=False) return results @@ -541,7 +555,6 @@ def main(opt, callbacks=Callbacks()): ymir_cfg = get_merged_config() opt.ymir_cfg = ymir_cfg - # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 492822f..bc3fe7e 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -5,7 +5,7 @@ import os.path as osp import shutil from enum import IntEnum -from typing import Any, List, Tuple +from typing import Any, Dict, List, Tuple import numpy as np import torch @@ -70,7 +70,7 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path = cfg.param.get('pretrained_model_params',[]) + model_params_path = cfg.param.get('pretrained_model_params', []) else: model_params_path = cfg.param.model_params_path @@ -209,9 +209,9 @@ def write_ymir_training_result(cfg: edict, epoch: int, weight_file: str) -> int: """ + for ymir>=1.2.0 cfg: ymir config - results: (mp, mr, map50, map, loss) - maps: map@0.5:0.95 for all classes + map50: map50 epoch: stage weight_file: saved weight files, empty weight_file will save all files """ @@ -230,3 +230,59 @@ def write_ymir_training_result(cfg: edict, files=files, mAP=float(map50)) return 0 + + +def write_training_result(model: List[str], map: float, class_aps: Dict[str, float], **kwargs: dict) -> None: + """ + for 1.0.0 <= ymir <=1.1.0 + """ + training_result = { + 'model': model, + 'map': map, + 'class_aps': class_aps, + } + training_result.update(kwargs) + + env_config = env.get_current_env() + with open(env_config.output.training_result_file, 'w') as f: + yaml.safe_dump(training_result, f) + + +def write_old_ymir_training_result(cfg: edict, results: Tuple, maps: NDArray, rewrite=False) -> int: + """ + for 1.0.0 <= ymir <=1.1.0 + cfg: ymir config + results: (mp, mr, map50, map, loss) + maps: map@0.5:0.95 for all classes + rewrite: set true to ensure write the best result + """ + + if not rewrite: + training_result_file = cfg.ymir.output.training_result_file + if osp.exists(training_result_file): + with open(cfg.ymir.output.training_result_file, 'r') as f: + training_result = yaml.safe_load(stream=f) + + files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] + + training_result['model_names'] = files + ['best.onnx'] + write_training_result(**training_result) + + return 0 + + class_names = cfg.param.class_names + mp = results[0] # mean of precision + mr = results[1] # mean of recall + map50 = results[2] # mean of ap@0.5 + map = results[3] # mean of ap@0.5:0.95 + + files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] + # use `rw.write_training_result` to save training result + write_training_result(model=files + ['best.onnx'], + map=float(map), + map50=float(map50), + precision=float(mp), + recall=float(mr), + class_aps={class_name: v + for class_name, v in zip(class_names, maps.tolist())}) + return 0 From d2beb2967aadd5e173dd87fff2d4eb1d11edb5cf Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 09:44:20 +0800 Subject: [PATCH 075/150] update --- live-code-executor/ymir_start.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index bd4f2dc..d2c5415 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -21,28 +21,20 @@ def main(): git_url = executor_config['git_url'] # commit id, tag or branch - git_id = executor_config.get('git_branch', '') + git_revision = executor_config.get('git_branch', '') # https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/ cmd = f'git clone --filter=blob:none {git_url} /app' logger.info(f'running: {cmd}') subprocess.run(cmd.split(), check=True) - if not git_id: + if not git_revision: # logger.warning(f'no commid_id/tag/branch offered for {git_url}') raise Exception(f'no commid_id/tag/branch offered for {git_url}') - else: - cmd = f'git checkout {git_id}' - logger.info(f'running: {cmd}') - subprocess.run(cmd.split(), check=True, cwd='/app') - - result = subprocess.run('git rev-parse HEAD', check=True, shell=True, - capture_output=True, encoding='utf-8', cwd='/app') - - commit_id = result.stdout.strip() # remove '\n' - subprocess.run(f'echo {commit_id} > /out/models/git_commit_id.txt', check=True, shell=True) - logger.info(f'clone code {git_url} with commit id {commit_id}') + cmd = f'git checkout {git_revision}' + logger.info(f'running: {cmd}') + subprocess.run(cmd.split(), check=True, cwd='/app') # step 2. read /app/extra-requirements.txt and install it. pypi_file = '/app/extra-requirements.txt' From 298637dfb3031d72886746a621901b1bd6ddbb22 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 13:56:09 +0800 Subject: [PATCH 076/150] update doc --- README.MD | 60 +++++++++++++++++++++++++++++++++++++++++++----------- debug.png | Bin 0 -> 43913 bytes 2 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 debug.png diff --git a/README.MD b/README.MD index dafee8a..3618622 100644 --- a/README.MD +++ b/README.MD @@ -17,23 +17,51 @@ - yolov4挖掘与推理镜像,与det-yolov4-training对应 +``` +cd det-yolov4-mining + +docker build -t ymir-executor/yolov4:cuda101-mi -f Dockerfile . + +docker build -t ymir-executor/yolov4:cuda112-mi -f cuda112.dockerfile . +``` + ## det-yolov5-tmi -- yolov5训练、挖掘及推理镜像,训练时会从github上下载权重 +- [修改说明](./det-yolov5-tmi/README_yolov5.md) + +- yolov5训练、挖掘及推理镜像,镜像构建时会从github上下载权重, 如果访问github不稳定, 建议提前将模型权重下载并在构建时复制到镜像中. -- yolov5-FAQ +``` +cd det-yolov5-tmi +docker build -t ymir-executor/ymir1.1.0:cuda102-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda102.dockerfile . - - 镜像训练时权重下载出错或慢:提前将权重下载好并复制到镜像`/app`目录下或通过ymir导入预训练模型,在训练时进行加载。 +docker build -t ymir-executor/ymir1.1.0:cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . +``` ## live-code-executor -- 可以通过`git_url`, `git_branch`从网上clone代码到镜像并运行 +- 可以通过`git_url`, `commit id` 或 `tag` 从网上clone代码到镜像并运行, 不推荐使用`branch`, 因为这样拉取的代码可能随时间变化, 实验结果不具备可重复性. - 参考 [live-code](https://github.com/IndustryEssentials/ymir-remote-git) +``` +cd live-code-executor + +docker build -t ymir-executor/live-code:torch-tmi -f torch.dockerfile + +docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile +``` + ## det-mmdetection-tmi -- mmdetection 训练、挖掘及推理镜像,目前还没开发完 +- [修改说明](./det-mmdetection-tmi/README_ymir.md) + +``` +cd det-mmdetection-tmi +docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi -f docker/Dockerfile.cuda102 --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 . + +docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi -f docker/Dockerfile.cuda111 --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 . +``` ## 如何制作自己的ymir-executor @@ -50,7 +78,7 @@ # FAQ -- apt 或 pip 安装慢或出错 +## apt 或 pip 安装慢或出错 - 采用国内源,如在docker file 中添加如下命令 @@ -60,7 +88,7 @@ RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple ``` -- docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 +## docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 - 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. @@ -70,7 +98,7 @@ docker build -t ymir-executor/yolov5 . -f det-yolov5-tmi/cuda111.dockerfile ``` -- 镜像运行完`/in`与`/out`目录中的文件被清理 +## 镜像运行完`/in`与`/out`目录中的文件被清理 - ymir系统为节省空间,会在任务`成功结束`后删除其中不必要的文件,如果不想删除,可以在部署ymir时,修改文件`ymir/command/mir/tools/command_run_in_out.py`,注释其中的`_cleanup(work_dir=work_dir)`。注意需要重新构建后端镜像 @@ -81,7 +109,9 @@ docker-compose down -v && docker-compose up -d ``` -- 训练镜像如何调试 +## 训练镜像如何调试 + + ![](./debug.png) - 先通过失败任务的tensorboard链接拿到任务id,如`t000000100000175245d1656933456` @@ -95,15 +125,21 @@ - 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 ``` - docker run -it --gpus all -v $PWD/in:/in -v $PWD/out:/out -v : bash + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v : bash # 以/home/ymir/ymir-workplace作为ymir部署目录为例 - docker run -it --gpus all -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash + ``` + + - 进入到docker 容器中后, 执行镜像默认的命令, 如dockerfile中写的 `CMD bash /usr/bin/start.sh` + + ``` + bash /usr/bin/start.sh ``` - 推理与挖掘镜像调试同理,注意对应目录均为`ymir-workplace/sandbox/work_dir/TaskTypeMining` -- 模型精度/速度如何权衡与提升 +## 模型精度/速度如何权衡与提升 - 模型精度与数据集大小、数据集质量、学习率、batch size、 迭代次数、模型结构、数据增强方式、损失函数等相关,在此不做展开,详情参考: diff --git a/debug.png b/debug.png new file mode 100644 index 0000000000000000000000000000000000000000..e439ca6465be75812be03450bcfb52acb23ae0a5 GIT binary patch literal 43913 zcmeFYXIN9){w@kqg0#R=L8Jsk1*M8gC!vVwLO?_*A`n0XrT3bIqM{<8OPW$bQ3L@= zq}PD-j?x05_ma?CAmvQf-pjrJ=Q;O&IA89U`$2r>%p7x!ImU1N%KN_K)g41UF7`9* z3=9lhw{Bj)%fP^_#=yX+#>NWV>5F>(9QcpX`>vig!?$jsCE$YDQOiJ!fdL!KL9t~4 zu8(`%wD4wN;BGkj$JpYY^N4{VPvq8ht$P91Bx-QEm2Sx@#~Q2~SxZgXN~yuESuPCg z|8RA^H=8XQ_E41B3w*pn;u(*&s2KAfZeFKehCbxtVPj@uep$|W?9Y>LuJinmc`03% z!@>LTh0M!inWEv-t`%mf~aR1TgiIsB5wI_RvYp9tt8UZpUW2F8wUMUe*IOjkC zwWRrHR2@q6R2HBpsY8LYB&E^tqVBCpDis&fW?&r<5*u_a)mmD}+TN|y zIHB62km4>SWNlZXg{k!{%vaiGf7QPlvM^RGd#OlR zj0*ZlXI-TX*@O${JvQ)0!$Mx5z}+9Owvl!=GJb~WcB(kV^}vE!wTvrX_I2*;Eb7{i zeYFu;8>}*^^dz8Ch~M^yJ>434vogsCAAd%*RF;s$Gz@>YfgI0p_d1#zJalRxli@_Z=jjg+{!1jR^z(}Rn9$mX=O^`!us=T zOV)w#LpQ&Z#Qo6A`ppMkH)PJ7h*t`KTsqNSI6fmn9ur*sUbVWMfSjBiUR_5p{=?aEetK8*hnnbNRwE_m z7aaHwcLu?mnN7&A$Wda#MaXT-@1w4?fsUg|g+jNJ&8mJKr*^b?(<~u%ls*_qghq09 z>+6uK^z`Q6eQS1p2<33AOK#`KE_Np4<-J2ETGWEOFFkA?7p6hqNrbn2xnSsH0#|wYQD2X>_9}t{vz~IEa?vxyk({LscykfqjSGvrioLeC<*Y4bg5Z z!S07PlbA5upl_6*6%Ynr7KP&IXeoTY&m!<-C}c*kR_M*nL-Oo=i^vX6+HJpn522xx zdX^k*DY6QWa_4t>y}q(GvsOE2SnGA)G;Is*@b>=R*h1PtM-}Z_+<^AL#`|NWcbgR= zq-0-MaYuUS|at)yNRgL?Qu}@p(P#p7fM(3(Qs;{-WHP!C8O*S z738h&erNts%X7MSh9xb&Vs*4djY(;T`QomK+7GLp%|9RvTB7H&#n0%?!S#N&y$ZYn zTeT(+?yiFTNsFbUl|_f71g$jp5E^Pfoso%0j7>VFEX3d4zCQ;MVx3YMN}M~|7u^#B zh=86|u>99+c;i{vxTOG=HW#p+W;tqO9nyK?hT%eY>Vh@$z`D$n`jq}>4VSZ-U(vg> ze4UuENLlhicICwdp=G{@N@FtR$VxS0zRsB5p732&I#Hk7yRREPyEQ`T=TiBamBJKz zI=-YQ=;Y7N-G)qNjz}J9^X>;lgK~E3fusg7`T0*rx5MzmT|wOD6Z5)~N_A7SzqeQ- za#vvwzlK_=-C33!jk7me%m$a6hpd;FQS1ctny`{q^i+u|nRdnwy$OPiBI&Ys9KF>N z=a+~i8y3d0Nc_@PMt+U6(Wh$uw1Xrw#c`1x>46cJUiIf`{5L7=QO`>TqT|7aHOmih z`?O__9&(pOc~VcC%xFrMVUDGtbrZ_vq^BLD1iwdk*wSv2@*&8TRp1ch;B}?hPjkX_ z33s~q)Q=6u)b+$C{iMpSWa@r+bt|fr{}4hd`fQ)YJ{B*im<8N8o1aH?Q^`P;oI26esxmV-T4f~c+KhSQ(m2D%>Yff1sqkd_B(}%l~E|4 zH*!p(7_8&^%w(7=k~e?L;YX~QfbTW(R}%I%WgMDeuV`&{~V~@qiRN=vMr6G6qsp7Jw)d_NvOGI0hPDp&HbX3;46O%I+n@s$*M_Su zc*UCH0x7xNGeownmIFCf*XQXmPEw1{>bdJS;tj6ZIWgVStd-+C{1L)z%0Rp= zPQGS$ujTNATYV08wc&iYFofX!5>=`vEgA}Mkrh_mPfWa1`ncN%^_qE-y16k^BkK|a zQT#PFI>jWCa3+EOFOD0t?R1Ywp|Z7Sn>Se0@UX=b*%=ePFhz!Ab}P==MebVhGzSa= zBO7J%^&Y4BvqnRs*>GX;`Dm}9+S1}kA;wqs?kP*IqIl1l;8aGAyoU6~Uvgbb>~U}9 z@wN`4ew&%&aFC(zL-N*nsJzeYKwqY6CbmCBP+vN!4&GuJ>$Xsfv|4x)^99#0@DLM} ztJ!!l%Fn+L5&ZUWsYb0l3p~+5!uG&XmzXPC?R}lO{Nmn;bAA8XvxI59@czy z*_k2DsmkMsM{8nkKQ=d37q9h=z2UeJ-wmd=eKFepqVm}4YK0!=?5VoN0=QlQNw2)O ze!43=Qjk~r>9^r+19}I0Omb=E=-~O%Bq01(lou@$ zR*Vc3)tc?;4g}Us<}sId_ctEu-DakCv4 zR5($aK&ZsT__>r;(SbZ|m%`8C*ZAw{`O<{%&fkKDY}ovS0@t0C{93hE7j>G;OCo!= z!$L1B#~V)Dp<(Ljw5zlI; z9R;h;=EC)E#?zX-RR*hl!z~Y69NcKv`YwUg*qDw_xk@X)3+p?rtl_o(<1#mW->X|( zk6f&e49OqT!Y~E$t|#pbH;n$cb|plUaY+(9cNXtCg4ha58LSgwfleIYjQkeVL*f!` zXq4$3xgAzDo|G_10Ni;o(uv#PxSu+`M3|zZX#&_5lcCY%NtQ=qfF(tRpW11E$7-R* ztabrCe8t>$PT4bTvZa$lVki#Am!I05$)wQ zqB1|qG0Id(|FtI-V}C_MLT&}t!l}4Dbptg#XI2SPRdjYbF{=}7m-Vdl%}$T2k=stt zV&B&p2^NB4icKAP#ciu)8~Y@jJHzcAeKhB++%HC?Fn5RKQ^?!R*ris&>n$BakvtA- z!e^uk-Fquy9!Jxl57#xz&>hs2LK17J&ROBYxm@y!9qGI)a?GcA^6MUpB-4i1K#X>k zVpsHhSQ%Z`>seXt!q|VPtG8Xo&cxRrA07Vz*Vm|zQJ7mR+LQMmpKTileX<$Km53cp=?_7h5r``KULvE`(7WjArb3Oe^i^}wVP}90*LYt z=>_1`c#wGj-1d1EcHe;`D!Q;5S66R5Vw%{p?8NU)T7~gPOi-G7#)_LdRQVf{YnNQ@ z#M+w_*T|*28={)d5KZJzQEE#m7T@A&OBoYG4qvy(fVRo(b{wTQF39C98Bcf0Ys*1@ zSH9Il8`zfM#W${RHZVdwQaIiy1SOAod`40cc`@7d?m56Zn%QFq2^7B3Kf-P7uo;@@ zO!r}!q@Z9-6t1|w!xn;g;7j=gC%o9|*p&LViNk+KVJ#Z40SWG|jTA==6LcDKUzD&( zYi^zIDluxunWBNp&s>;P{=}btjgEMWK5*&?Q=kvSLG&<|!f1BC#=*OOUH8`xILf|u z3ahk_g*4vw@5+C&=03kAp~`0XOq9*z2i_B^`F(A?VT{G(f%xf7$m6p)KrwZd;fffx0MhzG+1GI&*zYKNAeBOyMazi2gkMUKns1a8MZ`Jq@q_+^dw=&Ay%_ zo$>H$$iFXwKX2;W?!(mkbjmDkHSZszV;@bwjrjGd|RFLnUM2VPVf%6;GHuSqR`a zX)iK`XYriRy+WJrO6{A)(#?bgIRjV%7qstk%H_^8=n{fX|8c zhAupA-2AJkR$~J62U`UH{>soboEkQnbCk01el$g&dn+(1T$rI&OxbSB1QHyV2?x1H zU;|%q1!*cJaIcHrReyW(i_LoIU8xgYpN+zW^*Z;<>6T^`6OKo_jOiv4h##;|ar9=p zxdIX575T9u!U6j?prQkcK(zWcq8qI0nC&TL&c zYX>t!*6eU>+1tLsDHJC^7`;$W5KV&6`*}OuZOI8=qp1ClBX_;M@RFO5%}iV}*GeP> z!;;*uWqCG0Q=t91?7>w4l)%~x`6(mU8CsJ?0vl1gb2yqlz3`MYYymdsq*HtzlEgM< zS>{)2V|@5F4wb<@Jr0gi&fVRSV=K)iHY3z<+@@VKNNgU%Ds-k_TH;01) zO}P{p)XQd;EljAcqUFNps^{#>G$Qv=K~uPl#&DL1J%LrDdv&aC&bC1o9;%JBtk)$r z_mHQFbqyBAkxZ=q;72EcT;i~8H`Sob*%=-oz!q*kJg+wBH^!%`OWMf&*ANLnFacnNM|=AeyWbHAEAT+$ z2yrcF*V{b}fStli+%TtNwE#LVtuaJO)FZPhCkLGm+!_Xr=5{39*!efE40Q7}WAlm{ zRK@n0#sxBKef4Ynw_s)3itYW@ZE*pP^-g6#8g*3;E(Y%t1WH>sjk=ERWCmYmXBU)mSp#J|-u}nln)@y#KuVdci$zze%l6U&fRQSJoUoE1ABi+g0ku zAv7|dKQr*mN$w<3zOnt}KR2!iXTT%HOtXKR!${P}aA7snw5ZbJs9ZVe z8>*9}ys?;Lc;zLF%S=DwceBKcPm(D2B(o(BFx3uT8xV*FhkS-XwC*xas3lL>cY`f13dO5p|O&5`M~5gq1~Gh4|nDm*1VT zKjDnEbrK_G$$n|=>756L!_7N+RZGtt_*vAv%EF$_ov>UuDJkdTJi(?)p@%1v7H&_EI$$c zz$T4YcVXBqCBs>%OE)l#%WtyerV=ZY@YKApygZzVqt~7x%&UrAMiY-_WY#|&nm%TZ z5zT*J;Ld&E$Rud1vVVI*s}uE!v0CpzXJqfUf3Nn-!3@B&`Y8wLD&RLVdfa!sQsz%@ z=l1ON=Ea*&{$N!G)`d4*ltHnJlU3ART3Hjb)ZE5|m$Q8FSu%j<;Ft@87~k!ea{wFn zL@|fw4-UTG+OEjG`^y5Q2xCOp&cwkZ(j>x3d>;3^zx0dsFIYUwNleruSRK zansG?WUQMOd&)KgiH8UmO<$a4+BD&F%fT*Xce1jy*TIhwTV`{))R|bZlxR*rhPZ}O z`#{_D3^sf)>BC_SY_}#p>tU%F{Lm8Z0+~iE>a?T&*$)UKOBOY|uqZ#(VO~OYOE?Q( z6?wst0pap1>KOawB-n*0vNqaS;;>c{%*0;8s^%hkb}&W-&IHZmv>bZ)?-$}9U!*_( z_a|&?#D@#FIePhorr#)v5axW0ck~nWur&%^j{}X1b15A1n849n^;dbMiuf?RF=MZP zh%!X5jE3-!mD(wd?Y}WlQW}>8F)~(kz6{4MyqII~k4Jr3`*|J?QrwY)AqweLJxSGa=|&Z}OP|L9 zy!hcoNk*&L z|4CvUE0nKO^R1X)R3z6Wc~Rc?KUu>;k`b!5@{9+&et(QBXcLr6Y&%}E2%X_2rbBW= zZ(QOz`}?EjjZHA(Wv`4~k1BOH(E3GQPVz=OnXk z%KmQFY@&j%4?U)Ma9QOh3nP!Y=43tVY3*QJkF#<7ED!8zo7$Oy>gShm7~ z0D8vEXj>?MOIjan-z+3%spL!*=~{p4wWT}$9n}_Y8%dt4;=zqT+L!tBcRP-vm6l!K zMCwrL=(E(_Z2*+%xe4~QjF!h^328c`&uqoVdW}RTX-~?inSnq{-v4Sjqz7Ay5DbV$ zxH-6BX7;f&qs2^MPbDGfn3Xz7zH9>8kQI3S+U|&VPH@mX=ic6Py&&Xe{}{o8SGLAu zCwujvv$!?P#ajZJ7Lt9-#Y|P)BFi!6Dg##QavWpldt^5Y`+Cr`e#6c;@7WZ?fK0Oa z&2S5KF;A^>!bM>Ao4Vsrj$zUM>WkDdLM(WBy7JUmM^BrU09{UvYQNfKK&YC~C`~P6 zs=8cr638_>YRE*URUie)+I)=Z+2?hmeSPA#ddg%}1XQ}0sAp3QNh+RH_HO~>*3u|_ zYO7T!YDDm2Sz#r|;KVrJ<`^r47*n-=cduzpgCs(wx{vmu4zsG}UP)ALhWqV7go4MP zudbriS4wFc{?-fc$|gKGx>b83_V(zT=amAdHZ(THxD;j?C&*iBtIn|CdST!B3Jo9c zI>-GO)w)5A9=ft6Q2hZke=o|eL&xflL_e28ocF-c+Kxk_3?#qYwV~fCd^nPr}RMo@=GR42IGN+oog0|wUj3foL8kZm*1Oik<$D1`^d`>jM z@+}+GalibJ*SZk2bpe`&vh%NV03Vg-{&7L*YWo62V>Ue}B>f$SpS=0T7Z$zpHUGsf zLDlbs+>c9#(H`zSXhJ-U(+OaYvp%fQe?~P|-p8qr7x;hinJJ|!4_j>%&5XVjzoEOe zb1jQM*4KH$YW`c`P5@9t#2`6l+4^s*JA?z}#ZNa9B!R4&S~Hg;5_ECi8ahyLxUwli zx;tQ)_e-KaMo@M?C)ijEhn-treYxYA4nUz;qiRuvxgESCM*P$>wunlmH3bkv>e0+Phj zRX!Vrj#+66-8Qw!!821@tHCzoZ6vi%J8RF+)tn+G`7~cI zTYn)yn~rL;6qYu`yey}zR#knr*9v>yl%QneFY(ap`Gn}Cndl;tY=i3E^r9R@*l-n& z6}J_jmT#fm_@4qeD&MSm(hNVAK%c8Z{=6m7DpRTh^(F1BZupOH(}V&?hK*BCJ5G3H z5e~))G=60lcv6}aZv22zdY3dk0q{c3Vo0qUBiAPOU&w*%HO!zDWUL-lE=mx&jS!kLd4u|efZ|Jvc+b%!Pq+PAP-Xm;?8*}@ z203Kg&jPC;l9wg5E40iKG z1#Vn6I{hI<%|9)eyc9eJR3}ua9iZ|b?z-{XT)u%-q;eLWyqullonz@S=2JF#u(XOP z;`W7%`QKck6v?S{8mOWE>T%vOQJT$(@#0puyEh13F}{9<0FL@YT1XoMS8?uofO}cL zPy&X{^4~yLkT_Ne?v(Z~wv0o|o90#|?ffyS&B1WTs6CEea5$KU^Dv3g$Vi4x+HKSi zS#Lx+2%T!l^r8Bv5lbaO(h!6@YPh(i)0igiP93p2m{Y5P@}=D5nA*?1iljPl zrPprX9}L;pVrD|?L6uUDhQ9hOF>=Dhx~16j!hu7bOVup%%@wJIoSb#tm$RDv+T>E9 zO~T(%kuGJ_(afJS)lkECz2nvp$h?tp3JU+}Tk%Ow?PvLES8BGz@`P1&=X7E$eGSjR zG6*v25`O)-e5%m>aGv+9u^xrp$ur}sUf&A6{OMo@BNd{Js`=yuKC(6E@d@dUoAu}EKfeUerTk058Aj6x!` ztaWU2O(IypO~PV#PQ|L{SOt-+3x$Vi{3LKV|%A>Of0IoKLuU?1|WIf zJ}!pvD~mnMpMu(kG~31D6MtWKP%^72p9bC>~PDAI~ zohw}?`L+9BshZzJV}lG5QySY_Ud*l#tk)4?hpkIsU$4!#MujyMdC+ax5g&NKL=7*s zJ)`K7Y&|j>eR3QCK}PMsL?ga!v;Qc-(*R zhci0mUyiGkmY*k_Oj<>`A$uu9xCH>mg~MlZNYFtHT(5kC+1$}H;=$TMR6RGd(7e#q zk*+hBzs^u^m%;nZ_on`+kBKFzG{Oktd_0;M`$TSGN{7Cr78iW~cup zK|p1D6T>de#MU2U7F~~!AHM?U{XzO2&u z+!2513A`45f@^7xIxEJ8)0 z%~0USnI$=2R{nhJOFW+KnPX$l6C$(7pf&E*#fCkvVB!@#nmtT*(|qhfgs{Wx&4~PE zZ&hh*R7rc2NR|ucSbe0YM2tUVIQV5UCZt*|sLMxbCj8dtEgjV6(h2I57z=)bRo?uY zEb>88cw^^7URfVZK+)3F4d$qi&WeaW`uFge>;l;`-&Zp8fQm&{wty>~iA_lz>Q1|U8DS{T@!ns*^*kZsE0^mZI3g0nhWvy zp~&n6qj|luvGwdxi}|uO?mrki7ivxp;1N;eSv(QK&A+8m@%#(-B-3hxoRr)QOmWvA zdZ>frts^nqMk}K{%@`PPhH^FgUK5dVMJO+T&83<>D-n2q!o>rR?(HJ?V53U4lIse6 zjnv1*d*3~U{Iyv)zEV~=S1OyIb!1Dpzx9kw;lkMB2d@e~6F=qwq7{H&5m6tB7WR6p zcJTJH8)@naLKMzv=n%KRVv+Jz6F%KHZ=1cF5Q_j9fp2LlUFctnacc|nF+!xd-iF+$ zE8H6WR+sI+9yUt6AWt>|a+WG+gGe|#n=BsAwD|z$@iwMact&|Y`4{pD`@^h5HvvBDt^;MNLdbbs zwZ4Elqo73p_ymZV&%&OqEApkZ);-}&5m5=4Ivt4$QCWDG6*6)j1|3`x#fMe11#}(| zZQZ6m3P96Cec~d$c!|oS+CU9MXE>Mz?M|JLsPYzpws**(ke>aCfHksp*MZeB*S52) zp8ScKAs3P+A;&)c(q}p25p?W}q$r1$NS@`%{2a5GtE~VqcA4CnR?jZg@`{5Q_Sjmv zBAbGR>&C+)OWb|Ew{e=5^Tn);m}A%Pqk86V30SgJ)E6 zr&}`5;DqdWg$SJrM&lIsgHV#!Q}$xW^*`%gDxW+ zsHgcmaYC6M#tAuK9p-Kkj{B_vHoaNs$0*Vy%s*Q1X&Wpaos!F(w17 zbkHG-`=!x~wa`fxg^ZjKJYVdPk8%t4${W%_n+n4&;!CF}$GUC&lVRDh1}A*je4{7} zH%kgN2aHF6KDq8nL`P;n$fw-G!= zyy+^B6<~=rM*(saIE&&Ie!B+=W8>-L`k<0|g$*77R+Q)AxW_X;bt~qTa3QDR_Ac1^!|qvNM7K0coDGw+X;y_~cVF26QYPIz3UkMzk0vo)44ez4C})C8=FLw-F2 z&GvZmiZw4~zdySh9?KGZj`@m5Fo(PIt2S5B?ALM*S{_VV+|0Hy4>UBs_Qh;J!OWn| zkmZCTb5;|ssG{wI^n(O{booVk~uj*{e(Th_0S;Z z>mogibq&hAqIEj@Vo`G-&T}bVYrIxH(+pE>+{|21s`)}2H>(j3@utT5R%ip*GjGED zqsH(N+3M#}#kKsZ##+)*a7vMHwEK^CLj3=YY@tx#-?9by08g>_t|Ou!nqg@TEfRM` zlRJ7l;ULIKJxpDlHdFF(>Ee7&mq3)xPi`-l7uqUycDbi0c`YbunnyJ(ZYt5uN~&)(hQPaJw>LVGdZl9Fir!?4L>DgO6VVcK7YH#S^OIPbVuBiX>b;89Pia6p%pNXea?a)s@5(5>s|aWawMX z)75o0xNqdKS7VPR=V9pC`G;OfJ(eel4C`F=yvgVTyv`{WwE)34UHUG)OqE!lP|&;S zuusa=a8TR>i>f{V4H17FGD`*Hc1p#M+igM9Eh1WIR@jmxe5(Nl;wjW!CB$a^$B7{{A zJJFd+BVZu_hZ>`8)XC20a|#}(cswJ4C0$L~g@Qn_5@2?p%(*vd8;U_dP1V!msp_vyJeN?eOCu8NCZ3IRPWZd3Q zy7F|!qLyFIm0?2k2bz#ii*ycER9sGy!GS*~x?b$)Ao1Y_%jq(_>gk+w+Wd%*E4c6Y z%iL;0umFZxgIM)x*x{xP{WE=tJpsLi_9_+|`3rpe*=m8M8lf=fNH=-s#b(|^970C! zN-bdw)rEr0LY$6NE{!=#c*gRK*^v%6$N z1*E?8J`Ot9c|PgW)jb}l%wvikmK=Q@;eBGeko)=6%*_TkNTmCS4zy$Uyn?OrJn&4&vD&Zu&Vs$Legk*3R*Z!Az+W4nJf*zaLT}k}UkkilVayi$dD!fN~&(K`Y zv$)h>Ae9AUbU20nrBzTU4mx5+9>m8IFE<*TR{bT6>~->Ar-@T+aLwD-NdOvrUFiP| zS_AaO|HeQ6#ZlryMF=yKh5s3`w>+Ga?~7Vcvllpuwjt;H}b%S8}XN?@z}Us!$|yp&U*dBB=Ok6 zjmaN`(JAsj3IOp@6$z%wB*L%Z{pRTOQjVK@`hUm=F=>EX-=1X28f$lrLSSZ&U>idm z{U)HcpE4eMob^AR{67dC#1HF!0aAsEM0D+t>qFcKjyBiSq8>Z$9hC?xo0Bz40yAYHiC@x^5xyAFkgvR|W40sBl-?6BG6|K_cd_L5b6h*_V%AVdRCs$}AHQ&h$jF zu6~crsd~4Q3YGss`Io-s@2UN9*92n^6#b*mCoC74gahs@Ii^`J_Ssb?WWqtecp5e3 zXMT7a`X2?uB{nfTBklJAR}N4t2o;2#`*qBq0fEa7|s zV3l#$)+B3T%R;ayK&AXcbZO7#0PK~w>c8|{VKLX-QzrpIpwK`xFR(#0B2K-XxekJR zFnv*77g%ec^!{Y}^BK^f5*w^im(K#Uwk~3?ic5fRK%Y0Q^B-da)GSv4 zb0{|bZxK(?|4>`J2Dq%>l$7>W!T+m$@joRSIcj*UE$Pp_`66(+NurEv8!n-QHoU!1 zI(o{$UC}8SSG}JkVTv>~^rDFidyRx6Zx@ywf{~v>p0MW93KMTcqfPe;%X0lg;%bmV zQIB`rErJdYxR*!=LuyFvyELWcgYUZ=Y4%E=jD0UXMUPA3UXDvcc9)lML0gIEuhCwI zy1SH_kDDfXrv>djj~x4|Z|3tp44@Ot1$%YV6t;?}$f7{|N?m}(FjQOra+{n!cp#>+ z4be;_I}zGFx$e06zHaYX`ZSC6!;*p1hcwHl6}2W9;y49a$UY+FQgjw51%#>q^aZ1s z9;NQk?5?q8Ggi;7Y_QO-Ody!ke{7rU#WU`Hz3TO5R5#^4Ke-1PYADIEM55^gI#8aT zEJa2$`#B545{^DIY(n%xp0+qY?@n(%O(x1UvNj1FTt$mM13Pyu=FW{Pg{V&hhr%ba zTn?}1QNCZTq68fl8dK5YiqO|17vjWl)X`-x>6oA2fOxM_Y3}Avzich42TkUNGnL-C zAsJtn&1Qg)!1~S00z+fvT4rKsjds#g~rc> ztvPaaeJlcbh3=%pnQZIQZQr)A23>J7r7OJ#K2px9e?Mp8;K|8?UROd(7 z`D9)hSic(~Esc@{XxAm2G{Oog_4W)TxAYZaO|(FJLil_H(LnOq@Rig>mMDr`#gdp?huB&`oB1Z?3JKYfCocs^>7Zdi-%`^#f>pe8VJmCnXL+239amKGJ?n!~PwH|GU9s>y zv0s5!|M=b7MS$gEe)4u$G0M1)VOQDGlmp5(Zka*`Tm4qS{;d zi{2YM0O`FD_AS0)qJN)j>q|RBzH^dD0^|U_DR-aVOP7Zs9?>#as&eRf*;2#XQ9D}Z zsOmEGaf;rDfQinHhGL;k9T!Lz#ay+G%-?6w*30b_dx!5JaRr6{u3&k=S4R-LnQ>wl zbjWcKg06xVpmJ9v?xQ}5ql^z99M%nTkw1teq;BHgk#ltfj;}D7n;oijg76ZO65Fp_ z!>S8p9nXZiDGwfu;8IeaFjVF`vnb>VwtKJ09fM68ImJT5dTrjen|-D{SO&p4Xibk8 zyyA`v7W4PT#v7%B@AR0gDt!z-4Ss8Y*?46$E}Eu7zQ)+>GorME zw~w2noVooP*)xkIpDlUBvE1c$OlnbNA20F>^-J$j4|vXnXD9VtxOt6PD<}zoGIriR zMquK0EOpz;OkQaIf z-M9?aa8|o5X9D#BJc|R{R-J6e_n_SE6x6wRL%77e+P166q6O?O`R4HMtKs_KPg>1a z3EXk~R%qJ~8T51a!Jqg`sajS&O=R-H8*Fm5_ z7QABDVxV?}?`*=oYsfJr)-OEmTG)v{Y7L@>wJbWLHa{2&AQ~N8rzF~6$jle@B8N9k zmMRVy_ZxgPCnQ0bHi|9vX1iR-3ozyuD&Nr1Ei47`P?tMch5livlqHM{-}jB!M~_(p2Ttgf?`nldl>Gy?k69xB`CI)UjlO^ozCeppcW_nBvLBY>Bx z!k`6ll2L)i(7uaVvHP4VlYR5E026#RefB8IvsnLX-ya?L4gbv;t(xHOF{g;z5!>~18==Y z)d6H!NPGfw>2fM+GB~~|5-=&0R4mz%AU1>?G?gq1bULw=Tl#unBD-E>YZLKdbEEyB zaTfFb3{1s~#hmi06^D0(8ZO^%D+MG@+)&ZKuq zW4&~Y2EYpRmMLsrqP6^*viw@3O=sMmWU{vR%Fa;Fp3|fyxnFi6??0S{X(q>$X}3zi zH_0vP1*!mR8DNy|?3*;$O`B1zHxJ|5O&!oV^JtK-FTLd|M|*Y+ey3C69WEQL*B+Ks zB$>p|w7mgMQJ<8@c;Vsa*G0VWnho+_?Xk79XV@A;+Q>auR|NZVESq7#@f2gPPg;Bl zx9*Zoj~M z_Q@0wA=~eIcIBwgAnOZ2&bW5)BOKcPV(i|Bw|%&WqKH97Wc$*)yT@HTDNJ$4 zAv%*sTBFm?`}}w!lI%0Cgg`o&A_F`puly2g34VAE+x(g_uoD&~6ZHhx^N$hsXCzl3 zFXJa9Rep&^7)mnb?!FqB7|9%!?f$VF*4eHj8Hrrh7agFf&|T6pX?kQ#nEIeu$P1a- zBE*|L+tmDT24!TI%FhE-eMZYx4nmK5c|AcpF7ZieVUh%~=pi`=6dJ8C!vxK^od>ep zaYPbKGkT=aUK}p*6}4h=%QhVCcEPihsDX)gQgFMg+U$s20xTgC(Hbn3p(CJ4GpB9{ z97yKLH5cmtJ@*jW%`tIoM zgRH9Mys4r0$ht^u$co8V#L;r%ev+q>t)4Y49~;Zuc=wcvFsK;qI=*}qpWq=Q+d(8H z%kHZTslBKeaPmZUQ?pV2Z4P2q$l~Xa4VBhDQ=#9bB!esC*&ha#q;mRZ_4~VbN3by zOr(AV7!+3mu~~KMblwoMXzn&s?{VtFK9=lyX2MJWhl$KKn!|!vD*CwhAT_H3jIJ`IH;L5jO!JZGoF5M)|aW z9xtavuVq51=7;@F$xDKhh}%Q$79yktN|9o|A=}~h4#Wp1KM8UbgB|j&Et%XN$|i&; zj{RDuebyAAoCU)nwHXgXnuA+y2?ybu`H+gucZp=<7s8NYuwXx!^d49xktdx-!S`F- zzX8Xn5z;?O^|}Ec{lD0I�!-hHVr?0i{F*iAV{^C@5eA6b2~)6mcFE1Qb!kP^3vy zN(6)i1O@31UDVK|NDD1U4Mkc2L1`hODIF5&f%LPPXPjr=b>8!x^SwXLI%lo<7f5#Y z&fV|pzRIguZ57Ss7DU8s2jj;5K@>)uZoj4ff!)8-y_OI0~YO3Q}trrSiK3 zL_uq4zSEU;fK)d$`^&A4-I_EuV~0=#GL2rR$ucB7F~kkdePaiWi~MOWW<_)^mL~VBpI*Lp*ljTBy_@pNrpfWn$bA zY!iALrl(a_`(=c%6O{yVV~_$XU0r-RUKwv5X_MG(inv?LC*fEt=IlEmS_@-J@}Uo-x)6D+w^xsG80lccE*K{)fxB9@sN z^CnK_7P2I$`yR|fn1D*m79?K0z;wNY41A%~-gZQ4h1t+L(Dca}V0sd8dO{?;1 z8k^E5xJjR106Iji@OjT8q5Cw&?jvkJt52u)Z^>Qa7>+zqc?kKU1PFHyN;Ric#eA(i zT>JwLANWB@u)3B~3|Vg#Cb!M>W(~1ieeVXTo$sHA^y1n!Q>ef7niM00sOJ~93DAb4 zBF<{oFoR9FPY^hpdZ;%i*9pP@1w2@ndifJU+BI@<^Th7AX7?uM^WD2m^|QaC6t@#R z#>`n(0})S3K6wb0y;SjK8-29%Zy3_woS*a*33#uKjE&d^N)yjuT{O@1rK{fs%%SpGy@Ra9?ELdqKaF9^it;Q_>>i|>f}fX3nA z(I!Vu!S!k*p8bNnVc7j{c@N6g4=G?~XYleEZk>Yzzjb|+2o))c+RD3fzx^*dzJVyf zeGX0ET>YefhQO_<&geuFKgoHXPUViAQNh`uxnHWz@^r$1gT2zwksI>BE(cz@AO9Ip za*_i7L$gIwcJ#9K{hQ*VFZZc=d0kDi z;e-AC7d!@P7sKtu)!LH5tucSazUlapn-TBT{4l$V^~;RPeMbdl_-kb#vs~(F5W2-> zGvqh5z~J6L_FHYovt0fWi1p;J}CSTlmcz&!z@Lh5Ym&`g`hC zoo)VD1)>O*&CzePf7vXKt}o*J?MJva$F7DMC=m_{Naw7JEVO{@zd{SUgY_3&ivn{- zn+z6Aj6L*aMPC9uVmVjTcp?!54vV*pxfce`TM+&wKHI(FSfdqrO&?QXdrT)KY?h$ie3IPfB+$8T=Gk^UO@`NBl zl=j=-swx0|zai#?m>)lnXpM}4=_tT9ohlHvG8RmHB@pq~cmg<%FoVBli3k+v?Fl`k zZW$6BQk5itXhJMyW&~WW-0jD``%&1em>`BPN!p*<5BVROt z))6uO`|%5>o~B3rY%{XQOnL!TKyU}pqO$v{j^_Utp-9Q!&I&FS0t&jnE~Eb`ECp`+ zy5(=8(!^a+sYgmg>_LNbJU@|S=f^Kr%BvZ86nn*^8%5{iME0pPSlCxLyr``>DYfL0 zrqQh|t@b(SLEJ~+Ealv>zpk@Iun3Jm$~m6n3cY_t62pNj*o-1DC+sq_W;phrBVWqrlb0BZq;c$1EplKt_bHLorzPil@za`HPD zm*0N>sj#m}-o*(7Df#FTzxmKBVuVcZM^!rPNQH;!zNAxce4iGtzu<{E%W^ ztpM90vuE3MrouGGArIBl@X_Vzc3lT{17V|)oAMnH8rEo_gPPvfy1Cq(5TuI3T6P7s zy$yDer0FIqdwn5_%99`_Isaql+xB<@Dxf21yA<^Z#G>AM(o83j3>ry*bH`)w{iVj8 zhP?4?94F=}-4o?gt-W^X_QXAQV{*#cKTQ9BnEwAT{r_S5|HJhEhw1+h)Bhi)|36Is zf0+LNF#Z2w`v1f9|A*=S57Ynu$Hs*JF#Z2w`v1f9|A*=S57YnuJ4}CP)H{+_3JHU7 zxOtKY*(&`5HDBoaU(x={ZOT}GvZZ72h0peBM|6GS%ZNEP`XejBXr-o&wadXTqxN=L zaYqZ$`FGP308cq>(1g|KPoIB9IGNabJ~j~GA@-u{EX+49dd5sPp23MXJt6TG6L7@_ zg^Uq5H!v2OnB3&GR~wT^ z&aEmT>ls`2*tS)L%gmMtgMIQ=#|?NWa*^Wy-gE>N=}1~xxrub_ke82sr1rz0UhF^^xhf?*|;RBx@dzFFc;)|256dt*Lb6 zT@8+8C#ydCT?c%mz!l_csJoEyx$?KChW0{KyM(7-cHDmfxWRHRdF*07g5v3l1Hkce zMcGzdOq@mDIIHw|<1u(ur6;Pk@t8{Eui?5?i&T8`3rkvRdLf}8kLh^B_if?^Oj6<~ z>Q?)K&2J(Tt@4RO3u{)-KTyn%V4dJ`JXh0}C0K(!h?id?mT?igKWcC@_-iOQ8lhON zOl^+cjnmL#vE{~pw7xL%^*%?pYm?zqNmDRN>$)VGATiR*{{6$gz)af^uE{BId+-)i zeUy0aU>(&4bfFAtOfb6Zwr6D-N(AcTONKO0CdyQdS@4AY(3kW2x+RCO{oNk+pf_vY8OvfD12n9&l(FA_&?nzb@ksl{cBp84R$w)m z-P5Ki1WeXIN6T-+AuYYVmbcQQl;&cE$UA2qLKgI2 zJmyQQvUkt820X!`?tCJu_&e%A@>@FcqId!OxpCUst(&kW^^11~%iQ@h;sO&ZVjYMh zzb~QL_fbnnVqz?jw~I84w*&5b;AIL+|QYe0tWrbKITtKQAT2=2HAg+2yy4I-EB_P<(C@R8Kb&TffH z0`tux&d#6Fn{i-ciY$-u{w~#DZ<)%w+DHyQjqjt6Hbd$N6fR+wt0)Ft9;MofSXORf zm$AfMA@yE?Gej_YxmW^kKPoc>Go)BuzAxaggysowHmQ6}Fjiq;+4&+H8GWh3Qh~;_ z7Ts!>PCjeWaIz)i3vO)dV#C?-uRZIoCx+Rq0@NMd%`{&Bq-6;`V_2GFrW74Auy~d8 z@Vy(&LE!`HVcDge{t&Ji7P$6@$N`z)p5%h#l{(56@-RudMng_gbJ2MfN^n%U2H9iS~__6$z#@9r~AggJH( zYsx#_mBUb^@i&kJL<9G-r#w%69<+>F=5Gfuk7NpGuWB29Kt&R(qw6hN8e8YHmQU8_>(?@l}*J)=FpS+ac4Wm9&@B3>4KJzJS5{IUw3#)e*nr=a&>i{Wmh8&JOW zs=J)_b&`73O4K?~nkD}#WTYIgjOnY2jLrS4xmO(YJ?HPR1n}8CV}w6iK8&a<{7K^Ljz9ck*N)bME!Wp>Cgu z$Bl|-M+S7umidqbHa|cH3;;YeGXvj9VpMlrgH8*qAEAi><(|+DcL;2e{%Sj!J0I*$ z5ADPDT&D49Zf5jV#uGl4xeW78WShiKkZD@K6hYQ=OX|uga^C^E#8K@vnJ3@2#pr;< z>n1%d4@|F2Ejcl)g5YZBRhZg)XqkL);c=$4@Y%$ukV`esYcai1O>@duP1$2M?w@St z+QbXfPYexbyi@N?LMLQ%EY(RoF7a)|j$DAUN)3JEkVlj;?BNh*ZVom&%$)L~@M+4s zVZ)PB_PNI*4DKf%as>#Vmqz=8=9-1>_Z?mqAU=SQDv#r21xTfCocV?Bbv`f7cW**2 zm%sif^|@p~zti0#cSms(MSE25C98qfgw@?T(0{1duu!^R&qo zmNXTZJzGwQjzxLv<~8}bR8hNT`?}J2GkC0L$`%c`6!Cn=X5psdpRiYzdWlU>H|6nq zXku;KQtI(JRH~RO?vbQ^imFxkZVCT)-oLVCv1L4GPlB`vNR_q>N%gZ4WEn&8GN_RN z{+r>!gn0!#TB4{yGu1wUyP#TVi95(ie$TQ|QP8jc8M;-K5WJ$*ZDRE_)B%?t)ZMwv z&CV2>5TC_{b@WXw~&1*26{4QDEMVWfO5_ z%;CIpih$ke|(e5-0;NV%N||x68Dtuglznw^zj%6-MSt_sw|jw%q2E_{tFw z1MK40=VKRC0VoK*d7p;r`CsdL$W`n*uRykcu{#aG=K;c zsw7fIXv+ra3h;Po1{{0$kjf7uO3c#t?s}e;CZ|!OYh8%)00({P#E^1>JK0Rd)1d^! z%lCIA{Ni*A9F#2W*cHmA>az~*(2z5%=+VAeFa&Tw4w?4>T$TM~PVJG}wdv@!RMGq( zAxBmckYnXb_mLYG z%Mb2@R^L09zpOI(6;N@d_$%F_r&BF;SuaPS4<}XE1A?K1nJN9t*kb&%09hDW2K8m9 zQjf1TQ@2zZo^H61|SRzDP|LJl}=lkP9$+vybGFr z78JODKuM{CShZP&+S*gM6&-hvRV?U`8SB?u71!VxFKj*2{B*8aall0`Y?gqiuB_#p zvSW{~r(H(FIr_{x)$@&BThsH$Iks9=>`0m?DaGX$U49c(`IJ=v9`64A1CO5cjL>WW z{dqtEGdXKL3W)dMnp}V{7!i<_ZHO%0*}*tk+ByV~62Mw{-An;qrq@>Yt8TanBg`u} zFur3w+$*>FjJs;F5=IJ}_x@ijJv`2G($dZs<1%bgpJ(EyypD@V<@oif-htx_;5Wg` zPCdw?pi6H6=V}f}$rHim>8`~oJ&E#s3rJ@y`7i8y+Z`p!(|?(!DI-%2F0TsFP7J?5 zwcB&_Xn&at&^JB#z*~j!-JZ*Fw}&B%z0y6Ep3BBC0|quG@=W{XR(3fCWycP87lN5n z1pG}WQ)DHdrmt9%vN=VP7}keMln&<22j2>Jqaq<<)%3s~oduGv?bGaWJL*L%%&P-_ zN$!(lTjqnh4W`HjQ5t-2pz|#Iy`;lgl%_;IOkFo4 z+$$eGsOv0%d?6GoS8w#RKi56IpEtjXbGMBny4g49Ag~Q5i|Cy$)vKz7f3=wya1WyK>dp`72eR2fMj7Ql z)6Mu-ab9yEchVMk#(`OVp{&uk%7lOwbA=gMhF`8S@n9y2ql>bK3JiYG%h=&tiqnvn z){{Mbn>S+1Z3KKPk>7R5ZEiesWwUww8!y}sQG+~~#PY(*xP{;T&q7p@EHOLq8R(;H zd&;7$&q-IpkmA?`AJ0R`wM}(kRPa0-&(P@5Ti_fgHn0H|!RSggf-SAIgbul}%cN{D z0Sw!ZWQofxbtxzfxaHi5dNaYz3PxOh+A-7nEGTB(e%*0kWOiGOrwZjQn8-Hy@?*>M z38x#0)1Ls3cs9(?^Kd}$knE*NKpgFS3KL(}@sSG*a{yliNK2QrfYH14WyGqpH_9&K zkB@o(7oqom6}0}(-g52+vPw#In673`IB2#Yf#VCB1-uyW`#Ez(SLF9~`~4|Sz2xf^ z<{JjPVq~2rhQA?(7n}`tk1w}UM~zYy1<2Gncvog4_A`aQovTB8>34cz<52{2^KxAt z9gN&|#QiA|=M_^HCpW!Ns(Ly1LVK)!T213SVk6v#S4yEeef-hJ)u-yk2Nd{dXn@EE z_|B)I_xcuomtbW=Iwi2yUmQp0paaNT`V(2}64x8p*geir?44pt$^nCOuM<&MR$|uw zB-8OEU;%b)MQ`tgW~Urd6Q2X3!eg>yW+%GbbzHC&HuD^^nhBWn6-noOK=mtsHK{sZ zM(eJGA6e?MQmmmm^G(6%%Dy^@-2VO4ULZ@zkn^Xd``&zjw(BU7enL9DI^D>lNkKq^ zY3N08tbKk#K@F?Y@j_^EnAhiy7%Md9kG&#qAFmkRL)w;4_Y01Ydq^E{Qr~H<-yFgh zo2W}+Sgo5!H!JdWF<@8ogL8dVkO4w0pydtoZDv@G+oZFulHBSrF)kwh`Q|ryC&Gi8 z&h1IR;xsBTQun}*%l6x|Rhuk6pv<;P|3$!EtI#E_n^PpP z=s(GDGx?_owi>6oR?1|*IktH`ivKL+V*L4BBDcvls*Wc1in)_2kCp|fjL#X;+^*(ST#Kr@i2CiLZKa;ZaO^L`(#dC0 zC9KqT_Nnz4YzvlOqeJ~IBB~w4vK_V6?CfUx36c0{=I-t5?m?~VYiX)nPq9*7)AF0A zdQ2ek5SOE+-;OHstXtLP;#d=L z7Ji_!RXr1evGuiY0x!HsZ&ps$$k)nkXsh_>hpFP_Su>BN6e|A;ZupN z!tDpXwydYYhCDNs)001-7S!-g4F*%*N9*|+%}c?nIoI#l^kGF#eLx+tV@cp!*X=0+ zFgM1Ze#n13dycwFfsm+%pQa`(4<9VNM`G-r9Zy({B3b{%Z30N+DxqF~l34ELksA*i zb0Z2_x~V1anQc>#*?iT!=irj5{A@)V-E$y{zCWWOQoX8%`O7WjA-4_;5)(poT0L-q zY_-D^l=%EyI>;+{+hM5}0jP}KZgeDrmpNyP^HKIR<^GV8-m6oETROius2P4x29zZZ zzipMC*e&IU;_yFdnP2*CjNI()G|p;j4wyN}P?oNKUO8~ZrZwywuU`C%-mLKlk2lhY zexv0JQA0ul@DJka(C$i`)p93*2 z-GX#xFFMK?h_2jdBlpAT2a{h(pfR8waXO~;oE5HLmsPfoDN+67Fp|JQFBg}sG6j+gNH0Ukq)z~&Hd z{CsX23A!0uV+VIVsk?A`?M|JUP6Jj_%;Gc4KoG|E`G>e4fqYYNS9`l1bYq zwI~|H?$zt*9{o=0JDR$=0}pSy<0vrS&=L9K{vW*=qZ_W-WKSRoV?z&iY1-4~YyQfU zt(B%r(|Iev?AZ}(9=9Ded>8Ch0~mb_cKj_(xt5QMF5o@?*?ZT=9{tG9tDoF;r=91CGY7*5jt$&h1p>$kj`w^|RZqq@939+S|1?@1oL)9)y84gHk*5loRJ^~o{q zc6Zd$*4y%J%F3_Pmpst|AEk>Xz9WvE*II@v)1E4&3NRJTac{$;pODax2l!}OdTH3` zfRFPu2t$v@e{&o6c*K2zjF1S#q(FAfzIRQ$gXa5Id=_Ou8}l=NqZKbD!q$gN$KS&n z2S`rwau0y`u1TpDz z=A|2X-{V&iCZ0E(-Fxb)b!bvYA#ICTekH5 z{V2I5usNpyzo6NU0RT<3O6 zrf@`$!TgCpbMfU2L&ytL7e{oK5XaQEL)#w6tdtS1bv|Lw$Vv&&pxcR0lzI@_KDM+r zFPS`4)l79%+Z)s(N8r=P_`V6woQ|*4Y(q z@@DY|do3=Y(**`5caD1Qt>3W|xkhfmfDg`oY)XZIk3s!%Mx)}xz%rEflWK`EsyD=7 zDK$5^SrsBzEroRlo*dkOxNxn!65EiiBq_AetvFs3NFj_U1wdc+l}{No(((m$I?38A zj^x65?2#9A7Bc>&ta)N#{^Y_AZFz}rEF@z8yb#L|ru(I@MO9{Yh zDTqXO($E`FC)hlUJan;~Da>olOmbt)6Fuj3!V$bKd&F^*c8!FU(uo~5_eXW|d1?Hk z@l_(1U!SXRX`l&<;450!=(WMOg}+hAdW009i1wm(+;F&fK$j|`c+nZAuF?E%sNi3w z?Bn8qT5_vE6_s*g9^F34w|*AYFb&ef<+IFHhx?L8I@e6wb(eav`v6HaU^!9zFC`dj zfSI+HAeW40yC5UXN8FXPP|V=i=U-o!7#;y9uv3a++r*H{5BnqJg zlAcCP*yoeJrS|PqQcP+}3daZrXD%;52D)eB7H62 zij?r7-$k9a?-1{1K21z_<=kUX&e?rn^nc)E9SxSG0WvnPh3Ac7TT8_&=+$Ea4%FHR z>y6}9x_sHPqN8FG%t)D8J*!UV?RhkK&)4y|TPMr=W8oZ`NWwB3PG(~pWHKP=( z{nu*#CDk3j(MMJE$%xv#0Vc4k7P0U&v5F)-I$MlUw&pcNll@274F!y^&Sqm-a-J@2h$Ifgon=WeA`RX_)-#iih$;XSGbSHZHC>;Twm<3l|;q)&V zjR_mQ8^;*V1A(dDlL1@caSxHQkZ^|-_@cBQ<;eCpj@_I$J=jJJhb$eqAlmi#0uQ>_ z+Uz7@*Z1Zk*{(Y0qqC zq-9n2TBFmVqx+PhbIWxH5yknsdo(Kuu#Y@79|rFn+o0X+yaJc=1)~v17!@61hG|8JiyKZH-JCxoM8~D{M`e{{l z-JEQnv78ekVWb6!FZGJUZR9fMpvWJ&%J=7XgLN5nEI2EKZ_LaKNWbtkVSZDt-ipI{ zI~CneNaCsQ=!!i7wkBSmY_aJ80;8tx$ja8NyY2(q{t9%wD49Zst{eV0GZFB^_{Wp1H|pCcVY2K^3G{9={gO z1LhQ!UOxL4TB$Qzc^x(n*)pRR_ViUvx%(1~G|Oy^N`QI{U*U&OM*Y~dm!0G38Y&c@ zgninr%$nuCrS=l&oRRpotFwKkQ7p?l!@>%$IsBxEzP*uEw+>WV|6sh&2jOp=fHC*` zzT%Z|4!=9MB5BAz(iP8})()pkgG8$yUx7!*B?i^Eq}*lT4p4|V=kyR=oHNkDm^YLO z&21JP$4`HPwyQ>Uy^$0kP|o10)2%^b_I!Oy>!G+{-!=C4U+3ZK90uDrp#j9~p`{eV zjgCFgS%yVDYdsE%5AMOJ~PukYMP&MX7sJ0lkM!j7h1S)3XxUvQB%j=gNahGE=60W7|8 zcNGZBDag5l{y<)`zS@0{9b4lWVa5=Tg-zK1-ul4R_cxR~j8}_17BV0eY7yAgcfJ4A z0kN66(h|}G1Ao5vbK4Q9oF3+~gCX0AnSI{_X+rB`R(=0j^i+o20FsUEF_|1TXCqm^ zPeP@3R!UEzI0vIygH=wY={J(=*hL;>`e`jxVx|})Q&N4^`oZo5*Mya#JomnOAklOV zr~nifL-u__x97_(jrAkFlUrsp@E%A@z<@j!)gy+_zo46H@!~FE1y68p-*MeudsPni zE~}b5 zImF6*A#_AsJC`9cTSMsHN$ED>1m5LhZu$!Vvl{fC8{K{ZJTa7wta`>5F|pYxr~5IO zbZOVW`3Dt6D*-705nT7@t8O!kp`GZ2(9v$=4bZ7sTv_m0!sEfTc?6>)=_W?b5qD_y z_(`hW-1DOevnK^|zwqL1kNpwr1y*+p_gd!3U=4;fdA0n;73Mn`*g-6TDs);VzrD07 zEn*?gk>JLcQx`?L`>J4DEeJVQ6^p5MEZKVKJj&8D-VdIG@ zGWpeOlx#N*9>-DzI$rgVEo2sW%>IHrRuDT#7+ACiQuu&~)+I0!XK9lMjC5j8V}QYt zTq|cd0{=nJ($+xbIEMw*8$f9BvGgXCmdX1$*sx9|=xJwbZw{{u?TgT1$a zSx`G#@79r8WSYYq*eOIYs~9^9@m6i(eJqp;myON020R?P6K*yVf7}#0-tu7j} zaNNYSfu4n04$vz&n5F$O<8cUae=xY@t&-QLljlv}A>46N@=^>o{57&!{0b*C#*YHbPyRMK8dyi+^RwkJCpO)}k_%6T;Kc`ZokZVoy1SV(}lpW~E z*A%M#_?d0^9eJY-epz1L7&`KhLBzpcXx3Z6Q842EUiblOs+a=9uvzZ@j#q(K;8mZm zKPdB|5(1w$sx^HkUaaPzEa!y>l21Y^mBNnxI=HcWe?w>FAxh+5nQ#Ad<^O;23ox|e z)!Lc-T!zzq{bx8x9qasIHsr#Ij;A@M#yc8Gh8Ip*Ufw|u-r`^8N5xAsmgWCj>@_6o zKiI3;Y$=YsvlxR|q5RyH^Y{PWEbP{4mhSA)+1jh6>n-P@fx6<<55yFX3TfafZ!f<% zIxE8g0uf6!35PZkGCXbtX2Ujl{9Cb#L?sf_fmnLF)9NNNyI^F}f{W6oURcEnP~`s- z&3@i6$owg@&M1B+HRk4BVGJCRsjup%jTe!&RWx<8$#*qcS3&;Y_yY6K6Al&f6)Zt~ zqBgxHjM4$?r(2ot>Z^WtjYDv;vnS0)E{ezVxli`k7*)_UG#vlLbF;hvp8|KcWIxFX z59u22Y^AYPaF=cRqS#VK58))AgnEvZM8CtGO^`ZZL(cY)(3Y^w(48=s=0Qw}u%g_L z*~%Ru2g7=T%Ey8m^Taq8w+_oX%a+=ypdt@UFZ;ba)|${> z`BHoE)M=iTR$ZH8A=h&%$h$kqjga-5NLLAr^(hdkYpz<=D(~Qe3QF8YaR^pGzkPxX zzcGkP99@?QMf(}nZYetcj0@ER7%pySKbr3Gi|ZB9P(*}=`gmLhpsP&51%0&3H9kaZ zSpBkv_1E5$Zn9Ed3u9k$jTfB+*mJ2DjEn+BYQ5}kRvO50O;ck%> z{c36oRjEIwO73M<_=z60D=fDR6&aXaj8SgsExVovnD<;nSt2Zj*HvNnTdDf&o9ShN zR+E^(z+Azmckbb1JNttUYz*+t7#(YO{k0t*%p5+m3uCq*_%?NhP>HR4_*cSO*w~nL z)1N>PpxZt~3Y68ssuUb|y8BSAVwWxh_Hs*|;@Jc!0zS}CVRe(SXalgssa=0AZouX! z)789P==D%_x=P^X8bn3%@t%}D>zM0y5^bWZ`)bJ`v-ZoUp;u<14`GEVJak(K=^Rz% zD@}LUqTf(D7E({V@Z5{gY^i_OQg5oi+jzGH&;5 zL8|pF$d7k`L_Yd(a+0^&bLKZkwQBLJA?tSOm+750-27Yl3G1VFtm;KW*53a6hUvt6 z$lp)n__6kuFoGUK&2OI+Rhb#0i#K@P%ct4IP~CE@%Oir-^XWo~iGbLbi)s50uy7-(lEUr_GP1ikl}&9Q{`PaZZv3we*%BMzm! znLHBTG`HVHFjz(?-LS6f`<&XC-ytippt4%qV;YW$dBMn|*D{pTVZ$di(aL3Bsgu}( zeT%lzSI^*14Ba-&7$dyg8|Y{?{fXaggLd>vRMinuV&fdsmeYLsR|`A66lzO(Fhvm` zMNgSE_XsIIOAy;5P7-zy8>-@;c@DRQgc0Z+Gb%-KwgIM`Hljwd{7A=tFk?QTL8}2+ z?1w+FSh4>ei~aQfDi#a45s7Z0DG;pFukBM$-Nybb*}X2#K=gL#Ga)H{jhmX9%`d_) zj1e9UTop%yZ8dZ=S&z&gP;!@SeeKwfTaOKt`74i&z3^WCZze zI>`M9nTwjL)nD3~RsEkcLSiUZ;BS_?qM0^E0xFA*AmA|U!q1j?umOnfR=%VZ+;9A8 z>6f1l7n|w_5S0hZxD2!o6(;;NTlhi(Jg*Ht?ct|fDl-1FamZfs%m%*o5MVGU^7k*j zKHeNF~)KMDB8Hk23wE5ynrC1Ei%$3SGK-F+C3GkJry&TpV(wAUyPXRmv0un*T3_% zJ>|)@3E^{cogxAR+gO03hp$)REshJV|`<287Z#ZF|Zw zAJJO6I3b4})_$mTvBI`&!DO>}L)1SQH|7db#?8&U-(Z;KTa*5jHt0sH1VQHRtF&1& z=w?8m`Wtddpvy}M^^K_#FS8ju%x?B3hy9}3#7D&B-Bh>s{I~2t##B`_XR9g3zjJtd z0^wAAXW93p@~@xA*IHNV!KTr1*yPWaK(^_KOB z><_LjWs&!xjTzh8oar-=*b5u?wv%G}5@B@4)5o-ZRAX%6FH|IGdY&{^ zd7`@xkHW~YcDE4QDoVKMAi5#VH(ev;G@81??y9h+Yz)FDwn}685s7K=%upI=13bIq z25gjY*yu;>>3em6ac_ygS`VCe=K$J#cBT?xiZKX_$hrEhcPeKlIWeM3uYqFz0C$L;vX9+CRKLx+Vf>*o zO$UB{*zHGB4 zadkYxKYOgIRGA`saq1gE!U?fY%b;eQJR(BwOfq3;G*!Qk63;`mPue=MVamHIO$W^6JOT-8MkK86t#vifDH1!Lq{_cR!Ww8Q#L-?=4^z8F zOGh}TGwzW}GOn>&+09Oc&`)~r%`iOR3BZARlF|}uotPXTA8N@yZa}Ju8Yfo!mLXPk zD_<2pQ6_MO3Z0(Pc_zy5%$%+-_0j5T=HmB>_XKjSpCnkNs{l@z;nD*DcqTRb$v7d{ z^6iJ<-V5sM@{?*K*6fz!s9O8cx8aO!e!D4g(Rh1r6yTcgv1?JI8CQ>^an{PLn}y%` zG`Ylfqv2!?0E?0m6#V3|b*0N|_F{Om=w5BdAnMs_AgF-jb}+}v%21T{;(}~{h^nc{ zyLHS3Lz;yC(r^@zKshUo+E&JW>DV6=1WKD3hHd4HR5kOyL0E2onr5GC(}8M_IJkai zw&@-Xp4wxKR01-)JVU9E4fgtHde68xJsmB!La(~~s?06LD|hhGuPoyEA1%-whD=B3|T+nu&im_gzD(@I!vK9*3gtziKE?yGN>WQ56a7#&dBFIpRM{s z&V0RS9se2?C1yWtaT*o*Aqp@THf6s-y=F0(=;Ufi7)ac`(0p;KJCOGz}Ql z8N^Vk&;Y_aqtIEjFH3E6FhZDnV)MC~(EF*d%Sxy#uTV8iO%9ja9n#*dD(q5bJ}B}* z29yy#6BzcU@wx7l4lZHQM2@8HgnLKrKIA$XpSA8yv-g^@=Y%-MO*Xp0zn+=yYYguh z9v$XhDcq|Sxg^ZOu67B;tu8GA|Vrw2`S^Li8FKQj##re6#FmleX@Zz@$x>SS(I*U%D&tr zME?WoJu($N{D~fjHHcdoglG)ZbUfI^obcezx zsRkOHC_LQ1Pv<`P!mmfX@JS;dJ#lM=&Mct&l->HJP?ggT;Ou{(wI(6utu@JsYo*uN ziKzUj@i*t95I-O{X6*R`g?52j1(80ef_>hBQ;Z^1G37Qd0ZUbOz(W&YL#T%H$Zv3X z71TLpE&Kuzi}A(!Ha#r_FA+CS`+`ns>00ulPqmceXN+`OY7Y`ZbO=s&-2!u#jt&U9 z5C5z(OU^vA)v2;UXl8IZ{2=Q&5UH}-(z5TRKqUA?(^Gr_mM+c}#aJj_e z*r7i}ygwu58l#tF!9Y4BFtxb_NBg@?@u(66OyGp6z3!hB05^SP`yXqisAp34DY4$J z$R}BoomL;e_3hJA0B!Ire2J@Q)WM}kH%Hy0by@)jDDc#gHTBhk8hKW!xv+NJJUwS zrzlDL5jpGgWVKWgynY&xcB1E(;R4iu{A+l~+Yy61qcrfRIoOgKZ0eq5dbl})QtF

^0IYpih>v`=^JtXn!R>f9`i_9E| z*ormRp_sQLb=V0vbp_lAUizP5{SwW_vcy+-kV4z@uqaWHV`o(FC4Ow$U1vOaup#A| z4VF1$xkQL+yv7C&CVrDuUpz8eEfW3q+?q7|Ye_FC@?`O8pbzn%(c^tfaQ}rXPu;KZ zzyr+70Z9u!Z0aP@2J;E8x5IT)v0@fjzdi>-?Jt=3X%1R%ESYiY54_253cWfHSL^LQ zc7)snE4c@RaM!XQxA6)@;`tn&Nv&otJTUsed=Gy;Js{J%KW;VTeGL;}H8jB@38-sj z>5QMyaEzrtuzN7RIh8CncA=#_#Txk@HMj4Zosy$4%3XzGn*Lfh`*u$%5g`aG9cf2h)%6P(C#AjTrkWn1gNVg(T6)V(MJlO3M$ zYN&Jg8sF->MQ4d*ms0Qg@JR=vyap^$cIYy>?onW7;7r$Sp7wcUu};qPk=k!Nd{{v% z6d|M`)%}L&A5#p>M%*?4kbsz@(lv7BKI!FnVX~#Ba%|-(h~VZ3#0d-eUAw@D5WTt* znBmjVVE#Q4Wn7U$7W-8WE~Yfc24vR-8UAf^sdCVHWGm0v^AY|R-^fcL8Y=yZx0{8f zrCdkst^T&%yoAafA3{7E+wB z;xEZSDAOO?S0b>-3Ogzzy6@ay7Kxkkz)QQ}zru;^GIn3`0K2U}H@rOi{hs|#jv8akulI{3>=HwnyMpRD8>1eRXSKK1V%Ytl86i@MbA{VRiVN<8jOS=? zfL#7hxb`dZFS-@)0RRt}a!gP6eb#rSIs}9#JB~NTb*+gzjMR$1Ky?D##rPAsR+s*7 z*s(c(Aqgrs{!NROP@2HoXvB>Ln~HhtM9pA&eFS`OIJ()#PH}xL7FsA*qVq|V>UiTu zs{O?+73%!Mq;@WG;wOoF0Y*Ba2{QM-)M6svhL5H60Yq*iiiXj=DZp;TzQ|h1)_PUf zLWAJQ`KnGj<7Q91B)ue>6tZm#IUCb-^6MQc5U5>8)AK)#s}drVB!*KSY*?igoAYm` z!k+)FPyr0qqXe_RBHh}9m*RMMaV&mr=GsN^{!rb4RAKT9u>VMLZBF|2y0l6OQ~JY! zRp|gA)5vX3W3miOauiiv8o*wUI#dWAaKE3}hrM*6yV6o?GvzFlawkK=)#$J*=xQo# z1bNM#BTBn$Wot!13J$nbJ2U$s$x_Lo^*wPTn4oyrkfRc?SW|+aPw`-YGzAT$0lI~` z$xVCpV(nOLndsjt2>>kZ(xEx_1N+j(18==j6I!9Xh;t*=ewlwRnQpMd)gaKPIo_QS zdR;~%yT7gdE79(lPm>zRV^#O@M#Q!SWD%j}s?lv2Ml;is#_neY3l&Q6H0q_W^zno% zv-=8|h7_kQFFhF9KD}M{@`cF`;(^-E*0s>vR(UsswT>!Y5$JYe-^4w~y50T2&g@e_ zZygBfvt2z-<}2iBNl-WZTW0`JhY0t>-0BYy*}r{k?(U*#-s1H+NSK;CoUZdHy4pA% zZNQg4qMB|x0|F?0qw(%z6S~UyhM# zr8n~u(ankr7|(B@O;fS2Zn~Y^6Db86B%Pr~jJ}lB{UT0lWfnVTwi?i#s6#_ahqvPs zuLD!-zX2$qCtVDcDvK!{pPHSyb~P!avJk3?Q3D zTGN3PV|spT?{STW#5Mmz2sLpcNMm}~M3Z?Ck4@&gRY#=Xd6m_@-HO9|%ADm+<@~$g7g!gt>@RTsoUGAN)Pm)O_lBL`xrbb<14^6kc^TZexGp&W zcCFbFhlRi?ryr#<>ZWfp+5QyGvr<)v;+}VE!{6(E@B}LG)-f7*$0)O$&7=G4mb1A0 zMyxg6!k#M*JQw*28@GN1FR;B~|6=uwINJi5 zZBYvAMK?5_OY8B@o^P_azj(o!v@7c`KHh%uGxML9z2DmIZ$0z*;g5{#31yp)O$U0G z>zZ}M_k?*{-zV>?b^Rfm_4o0uvvuwdcB`rS*VZT0oqM??LGtSw`QX$o$*tuc z>k`}@X0Z2O?b#)CLw{BN)oxjn)%6~m--_5Wue~UBTUZMZznD)B${bzpd&wi!-#L@ox z<+@te&Bwy6FLb;P=D1j4AfJ4<`RC1j{V&bB7yVTH(s_U351HR~4p!F-r`&$O{lo9R zZ#hw-0eqjXZ?5b4=^3zHYu`orUyXYbZ;RH}-vUie&AcJ&-TbA&)93+s$THhDce3pJ=FTYFd4?|gs!P7JudcMIx%TXigd-G8Zz`$K>0U$>t&2RO0Rvvlgxtlv=+ z*ees2)%3mB)vnnCoLaiV0KBFBfw5IH_uYbn-mxxAKX_SwKBV{N_Q69YuOfo#mUWae z{N4KPW%}N^tg|;Z%5MuiwLE~W+DxAFx60J{8+@JT8hwjY-=g|-TVKxZh`3X2ad&<` zb)CDYoAYVt%g39J95c!c`M-R{_WIx{JMFrE9T(ai<`DRA(I-nci#m<`oo> zWLk2&l5Nukx6QwMectFUhGJ?yt37w(w32+W6(=J3n$= zesd^hDVP6!y6(PGWnSHbKU~sV*bTlb+`ZMHcY0C%hK+|0{!icjljpsmePr#PRhP`hkvZrgclpY0c+6grKy+80k+A81$aEpG0 zzqAjyV()t7kM`~QRL;+v?tPePr6FvUXszhK_c8Mm;0E3DA1BoB{O9((d)VK>P^p#y O2s~Z=T-G@yGywpl#8a{W literal 0 HcmV?d00001 From 9ee903d03c139de48b91fe2efcdaa68f6cba1841 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 14:44:00 +0800 Subject: [PATCH 077/150] fix monitor process for multiple task --- det-yolov5-tmi/mining/mining_cald.py | 19 +++++++++++++++- det-yolov5-tmi/start.py | 34 ++++++++++++++++++++-------- det-yolov5-tmi/train.py | 30 ++++++++++++------------ det-yolov5-tmi/utils/ymir_yolov5.py | 28 +++++++++++++++++++---- 4 files changed, 81 insertions(+), 30 deletions(-) diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index d93fb43..ba0f825 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -6,6 +6,7 @@ from typing import Dict, List, Tuple import cv2 +from easydict import EasyDict as edict import numpy as np from nptyping import NDArray from scipy.stats import entropy @@ -32,6 +33,21 @@ def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: class MiningCald(YmirYolov5): + def __init__(self, cfg: edict): + super().__init__(cfg) + + if cfg.ymir.run_mining and cfg.ymir.run_infer: + mining_task_idx = 0 + # infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + # infer_task_idx = 0 + task_num = 1 + + self.task_idx = mining_task_idx + self.task_num = task_num + def mining(self) -> List: N = dr.items_count(env.DatasetType.CANDIDATE) monitor_gap = max(1, N // 100) @@ -86,7 +102,8 @@ def mining(self) -> List: idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, + task_idx=self.task_idx, task_num=self.task_num) monitor.write_monitor_logger(percent=percent) return mining_result diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 9e82742..61c4dbe 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -11,7 +11,8 @@ from ymir_exc import env, monitor from ymir_exc import result_writer as rw -from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, download_weight_file, get_merged_config, +from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, + download_weight_file, get_merged_config, get_weight_file, get_ymir_process) @@ -23,10 +24,19 @@ def start() -> int: if cfg.ymir.run_training: _run_training(cfg) else: + if cfg.ymir.run_mining and cfg.ymir.run_infer: + mining_task_idx = 0 + infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + infer_task_idx = 0 + task_num = 1 + if cfg.ymir.run_mining: - _run_mining(cfg) + _run_mining(cfg, mining_task_idx, task_num) if cfg.ymir.run_infer: - _run_infer(cfg) + _run_infer(cfg, infer_task_idx, task_num) return 0 @@ -109,25 +119,28 @@ def _run_training(cfg: edict) -> None: monitor.write_monitor_logger(percent=1.0) -def _run_mining(cfg: edict()) -> None: +def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: # generate data.yaml for mining out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + monitor.write_monitor_logger(percent=get_ymir_process( + stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) command = 'python3 mining/mining_cald.py' logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) - monitor.write_monitor_logger(percent=1.0) + monitor.write_monitor_logger(percent=get_ymir_process( + stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) -def _run_infer(cfg: edict) -> None: +def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: # generate data.yaml for infer out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + monitor.write_monitor_logger(percent=get_ymir_process( + stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) N = dr.items_count(env.DatasetType.CANDIDATE) infer_result = dict() @@ -142,11 +155,12 @@ def _run_infer(cfg: edict) -> None: idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) monitor.write_monitor_logger(percent=percent) rw.write_infer_result(infer_result=infer_result) - monitor.write_monitor_logger(percent=1.0) + monitor.write_monitor_logger(percent=get_ymir_process( + stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) if __name__ == '__main__': diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 449c85d..c42098b 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -40,25 +40,25 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config, write_old_ymir_training_result -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -from utils.plots import plot_evolve, plot_labels -from utils.metrics import fitness -from utils.loss import ComputeLoss -from utils.loggers.wandb.wandb_utils import check_wandb_resume -from utils.loggers import Loggers +import val # for end-of-epoch mAP +from models.experimental import attempt_load +from models.yolo import Model +from utils.autoanchor import check_anchors +from utils.autobatch import check_train_batch_size +from utils.callbacks import Callbacks +from utils.datasets import create_dataloader +from utils.downloads import attempt_download from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer) -from utils.downloads import attempt_download -from utils.datasets import create_dataloader -from utils.callbacks import Callbacks -from utils.autobatch import check_train_batch_size -from utils.autoanchor import check_anchors -from models.yolo import Model -from models.experimental import attempt_load -import val # for end-of-epoch mAP +from utils.loggers import Loggers +from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.loss import ComputeLoss +from utils.metrics import fitness +from utils.plots import plot_evolve, plot_labels +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first +from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config, write_old_ymir_training_result LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index bc3fe7e..aa80a72 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -32,7 +32,13 @@ class YmirStage(IntEnum): CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def get_ymir_process(stage: YmirStage, p: float) -> float: +def get_ymir_process(stage: YmirStage, p: float, task_idx: int=0, task_num: int=1) -> float: + """ + stage: pre-process/task/post-process + p: percent for stage + task_idx: index for multiple tasks like mining (task_idx=0) and infer (task_idx=1) + task_num: the total number of multiple tasks. + """ # const value for ymir process PREPROCESS_PERCENT = 0.1 TASK_PERCENT = 0.8 @@ -41,12 +47,14 @@ def get_ymir_process(stage: YmirStage, p: float) -> float: if p < 0 or p > 1.0: raise Exception(f'p not in [0,1], p={p}') + init = task_idx * 1.0 / task_num + ratio = 1.0 / task_num if stage == YmirStage.PREPROCESS: - return PREPROCESS_PERCENT * p + return init + PREPROCESS_PERCENT * p * ratio elif stage == YmirStage.TASK: - return PREPROCESS_PERCENT + TASK_PERCENT * p + return init + (PREPROCESS_PERCENT + TASK_PERCENT * p) * ratio elif stage == YmirStage.POSTPROCESS: - return PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p + return init + (PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p) * ratio else: raise NotImplementedError(f'unknown stage {stage}') @@ -101,6 +109,18 @@ class YmirYolov5(): def __init__(self, cfg: edict): self.cfg = cfg + if cfg.ymir.run_mining and cfg.ymir.run_infer: + # mining_task_idx = 0 + infer_task_idx = 1 + task_num = 2 + else: + # mining_task_idx = 0 + infer_task_idx = 0 + task_num = 1 + + self.task_idx=infer_task_idx + self.task_num=task_num + device = select_device(cfg.param.get('gpu_id', 'cpu')) self.model = self.init_detector(device) From 691bed7fcedc629e933223a8dae4672208d3e3d9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 14:53:45 +0800 Subject: [PATCH 078/150] update dockerfile --- det-yolov5-tmi/cuda102.dockerfile | 4 ++-- det-yolov5-tmi/cuda111.dockerfile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index 3afe7e4..bd7fd97 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -20,13 +20,13 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi # Copy file from host to docker and install requirements -ADD ./det-yolov5-tmi /app +COPY . /app RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ && pip install -r /app/requirements.txt diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index ca19784..f0ab4cc 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -21,13 +21,13 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ # install ymir-exc sdk RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install --force-reinstall -U "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ + pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ else \ pip install ymir-exc; \ fi # Copy file from host to docker and install requirements -ADD ./det-yolov5-tmi /app +COPY . /app RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ && pip install -r /app/requirements.txt From 95fac1ed1769299279325e71f37dc0f68613463f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 15:52:08 +0800 Subject: [PATCH 079/150] add system config into training-template --- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 2 + det-mmdetection-tmi/docker/Dockerfile.cuda111 | 2 + det-mmdetection-tmi/infer-template.yaml | 8 +- det-mmdetection-tmi/mining-template.yaml | 8 +- .../mmdet/core/evaluation/eval_hooks.py | 3 + det-mmdetection-tmi/mmdet/datasets/ymir.py | 19 +---- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 77 ++++++++++++++++--- det-mmdetection-tmi/start.py | 5 -- det-mmdetection-tmi/training-template.yaml | 2 + det-mmdetection-tmi/ymir_infer.py | 19 ++++- det-mmdetection-tmi/ymir_mining.py | 20 ++++- 11 files changed, 115 insertions(+), 50 deletions(-) diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 62ea15e..dd73fb5 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -7,6 +7,7 @@ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel # mmcv>=1.3.17, <=1.5.0 ARG MMCV="1.4.3" ARG SERVER_MODE=prod +ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -14,6 +15,7 @@ ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV LANG=C.UTF-8 ENV FORCE_CUDA="1" ENV PYTHONPATH=. +ENV YMIR_VERSION=${YMIR} # Set timezone RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index 08fe8f4..e4320d4 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -7,12 +7,14 @@ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime # mmcv>=1.3.17, <=1.5.0 ARG MMCV="1.4.3" ARG SERVER_MODE=prod +ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" ENV FORCE_CUDA="1" ENV PYTHONPATH=. +ENV YMIR_VERSION=${YMIR} # Set timezone RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo 'Asia/Shanghai' >/etc/timezone diff --git a/det-mmdetection-tmi/infer-template.yaml b/det-mmdetection-tmi/infer-template.yaml index cc2f1e7..bf61d79 100644 --- a/det-mmdetection-tmi/infer-template.yaml +++ b/det-mmdetection-tmi/infer-template.yaml @@ -1,8 +1,4 @@ -# samples_per_gpu: 2 -# workers_per_gpu: 2 -# max_epochs: 300 -# config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' -# args_options: '' +shm_size: '32G' +export_format: 'ark:raw' cfg_options: '' conf_threshold: 0.2 -# port: 12345 diff --git a/det-mmdetection-tmi/mining-template.yaml b/det-mmdetection-tmi/mining-template.yaml index 7dd411c..5649a3c 100644 --- a/det-mmdetection-tmi/mining-template.yaml +++ b/det-mmdetection-tmi/mining-template.yaml @@ -1,7 +1,3 @@ -# samples_per_gpu: 2 -# workers_per_gpu: 2 -# max_epochs: 300 -# config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' -# args_options: '' +shm_size: '32G' +export_format: 'ark:raw' cfg_options: '' -# port: 12345 diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index dc40801..6b10dc1 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -84,6 +84,8 @@ def _do_evaluate(self, runner): # the best checkpoint if self.save_best and key_score: self._save_ckpt(runner, key_score) + + # TODO obtain best_score from runner # best_score = runner.meta['hook_msgs'].get( # 'best_score', self.init_value_map[self.rule]) # if self.compare_func(key_score, best_score): @@ -176,6 +178,7 @@ def _do_evaluate(self, runner): if self.save_best and key_score: self._save_ckpt(runner, key_score) + # TODO obtain best_score from runner # best_score = runner.meta['hook_msgs'].get( # 'best_score', self.init_value_map[self.rule]) # if self.compare_func(key_score, best_score): diff --git a/det-mmdetection-tmi/mmdet/datasets/ymir.py b/det-mmdetection-tmi/mmdet/datasets/ymir.py index 1276310..9215624 100644 --- a/det-mmdetection-tmi/mmdet/datasets/ymir.py +++ b/det-mmdetection-tmi/mmdet/datasets/ymir.py @@ -2,8 +2,6 @@ # wangjiaxin 2022-04-25 import os.path as osp - -# from PIL import Image import imagesize import json @@ -58,10 +56,6 @@ def load_annotations(self, ann_file): for line in lines: # split any white space img_path, ann_path = line.strip().split() - img_path = osp.join(self.data_root, self.img_prefix, img_path) - ann_path = osp.join(self.data_root, self.ann_prefix, ann_path) - # img = Image.open(img_path) - # width, height = img.size width, height = imagesize.get(img_path) images.append( dict(id=image_counter, @@ -104,8 +98,7 @@ def load_annotations(self, ann_file): self.img_ids = self.coco.get_img_ids() # self.img_ids = list(self.coco.imgs.keys()) assert len(self.img_ids) > 0, 'image number must > 0' - N = len(self.img_ids) - print(f'load {N} image from YMIR dataset') + print(f'load {len(self.img_ids)} image from YMIR dataset') data_infos = [] total_ann_ids = [] @@ -136,10 +129,6 @@ def get_txt_ann_info(self, txt_path): Returns: dict: Annotation info of specified index. """ - - # img_id = self.data_infos[idx]['id'] - # txt_path = osp.splitext(img_path)[0]+'.txt' - # txt_path = self.get_ann_path_from_img_path(img_path) anns = [] if osp.exists(txt_path): with open(txt_path, 'r') as fp: @@ -150,13 +139,10 @@ def get_txt_ann_info(self, txt_path): obj = [int(x) for x in line.strip().split(',')[0:5]] # YMIR category id starts from 0, coco from 1 category_id, xmin, ymin, xmax, ymax = obj - bbox = [xmin, ymin, xmax, ymax] h, w = ymax-ymin, xmax-xmin ignore = 0 if self.min_size: assert not self.test_mode - w = bbox[2] - bbox[0] - h = bbox[3] - bbox[1] if w < self.min_size or h < self.min_size: ignore = 1 @@ -185,10 +171,7 @@ def get_cat_ids(self, idx): """ cat_ids = [] - # img_path = self.data_infos[idx]['file_name'] - # txt_path = self.get_ann_path_from_img_path(img_path) txt_path = self.data_infos[idx]['ann_path'] - txt_path = osp.join(self.data_root, self.ann_prefix, txt_path) if osp.exists(txt_path): with open(txt_path, 'r') as fp: lines = fp.readlines() diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 2c232e2..21bbc62 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -7,12 +7,13 @@ import os import os.path as osp from enum import IntEnum -from typing import Any, List +from typing import Any, List, Optional import mmcv from easydict import EasyDict as edict from mmcv import Config from nptyping import NDArray, Shape, UInt8 +from packaging.version import Version from ymir_exc import env from ymir_exc import result_writer as rw @@ -27,7 +28,13 @@ class YmirStage(IntEnum): CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def get_ymir_process(stage: YmirStage, p: float = 0.0) -> float: +def get_ymir_process(stage: YmirStage, p: float, task_idx: int = 0, task_num: int = 1) -> float: + """ + stage: pre-process/task/post-process + p: percent for stage + task_idx: index for multiple tasks like mining (task_idx=0) and infer (task_idx=1) + task_num: the total number of multiple tasks. + """ # const value for ymir process PREPROCESS_PERCENT = 0.1 TASK_PERCENT = 0.8 @@ -36,12 +43,14 @@ def get_ymir_process(stage: YmirStage, p: float = 0.0) -> float: if p < 0 or p > 1.0: raise Exception(f'p not in [0,1], p={p}') + init = task_idx * 1.0 / task_num + ratio = 1.0 / task_num if stage == YmirStage.PREPROCESS: - return PREPROCESS_PERCENT * p + return init + PREPROCESS_PERCENT * p * ratio elif stage == YmirStage.TASK: - return PREPROCESS_PERCENT + TASK_PERCENT * p + return init + (PREPROCESS_PERCENT + TASK_PERCENT * p) * ratio elif stage == YmirStage.POSTPROCESS: - return PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p + return init + (PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p) * ratio else: raise NotImplementedError(f'unknown stage {stage}') @@ -158,7 +167,15 @@ def get_weight_file(cfg: edict) -> str: return "" -def write_ymir_training_result(last: bool = False, key_score=None): +def write_ymir_training_result(last: bool = False, key_score: Optional[float] = None): + YMIR_VERSION = os.environ.get('YMIR_VERSION', '1.2.0') + if Version(YMIR_VERSION) >= Version('1.2.0'): + write_latest_ymir_training_result(last, key_score) + else: + write_ancient_ymir_training_result(key_score) + + +def write_latest_ymir_training_result(last: bool = False, key_score: Optional[float] = None): if key_score: logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') @@ -170,14 +187,14 @@ def write_ymir_training_result(last: bool = False, key_score=None): # eval_result may be empty dict {}. map = eval_result.get('bbox_mAP_50', 0) - work_dir = os.getenv('YMIR_MODELS_DIR') - if work_dir is None or not osp.isdir(work_dir): + WORK_DIR = os.getenv('YMIR_MODELS_DIR') + if WORK_DIR is None or not osp.isdir(WORK_DIR): raise Exception( - f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {work_dir}') + f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') # assert only one model config file in work_dir result_files = [osp.basename(f) for f in glob.glob( - osp.join(work_dir, '*')) if osp.basename(f) != 'result.yaml'] + osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] if last: # save all output file @@ -186,7 +203,7 @@ def write_ymir_training_result(last: bool = False, key_score=None): stage_name='last') else: # save newest weight file in format epoch_xxx.pth or iter_xxx.pth - weight_files = [osp.join(work_dir, f) for f in result_files if f.startswith( + weight_files = [osp.join(WORK_DIR, f) for f in result_files if f.startswith( ('iter_', 'epoch_')) and f.endswith('.pth')] if len(weight_files) > 0: @@ -194,7 +211,7 @@ def write_ymir_training_result(last: bool = False, key_score=None): max(weight_files, key=os.path.getctime)) stage_name = osp.splitext(newest_weight_file)[0] - training_result_file = osp.join(work_dir, 'result.yaml') + training_result_file = osp.join(WORK_DIR, 'result.yaml') if osp.exists(training_result_file): with open(training_result_file, 'r') as f: training_result = yaml.safe_load(f) @@ -207,3 +224,39 @@ def write_ymir_training_result(last: bool = False, key_score=None): rw.write_model_stage(files=[newest_weight_file] + config_files, mAP=float(map), stage_name=stage_name) + + +def write_ancient_ymir_training_result(key_score: Optional[float] = None): + if key_score: + logging.info(f'key_score is {key_score}') + + COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') + if COCO_EVAL_TMP_FILE is None: + raise Exception( + 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + + eval_result = mmcv.load(COCO_EVAL_TMP_FILE) + # eval_result may be empty dict {}. + map = eval_result.get('bbox_mAP_50', 0) + + WORK_DIR = os.getenv('YMIR_MODELS_DIR') + if WORK_DIR is None or not osp.isdir(WORK_DIR): + raise Exception( + f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') + + # assert only one model config file in work_dir + result_files = [osp.basename(f) for f in glob.glob( + osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + + training_result_file = osp.join(WORK_DIR, 'result.yaml') + if osp.exists(training_result_file): + with open(training_result_file, 'r') as f: + training_result = yaml.safe_load(f) + + training_result['model'] = result_files + training_result['map'] = max(map, training_result['map']) + else: + training_result = dict(model=result_files, map=map) + + with open(training_result_file, 'w') as f: + yaml.safe_dump(training_result, f) diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index e4b1398..12a6f9c 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -39,17 +39,12 @@ def _run_mining() -> None: command = 'python3 ymir_mining.py' logging.info(f'start mining: {command}') subprocess.run(command.split(), check=True) - - monitor.write_monitor_logger(percent=1.0) logging.info("mining finished") - def _run_infer() -> None: command = 'python3 ymir_infer.py' logging.info(f'start infer: {command}') subprocess.run(command.split(), check=True) - - monitor.write_monitor_logger(percent=1.0) logging.info("infer finished") diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index a56133d..37b2da9 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -1,3 +1,5 @@ +shm_size: '32G' +export_format: 'ark:raw' samples_per_gpu: 2 workers_per_gpu: 2 max_epochs: 300 diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index b4716e2..0530bf0 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -81,6 +81,18 @@ class YmirModel: def __init__(self, cfg: edict): self.cfg = cfg + if cfg.ymir.run_mining and cfg.ymir.run_infer: + # mining_task_idx = 0 + infer_task_idx = 1 + task_num = 2 + else: + # mining_task_idx = 0 + infer_task_idx = 0 + task_num = 1 + + self.task_idx=infer_task_idx + self.task_num=task_num + # Specify the path to model config and checkpoint file config_file = get_config_file(cfg) checkpoint_file = get_weight_file(cfg) @@ -120,11 +132,14 @@ def main(): idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + percent = get_ymir_process( + stage=YmirStage.TASK, p=idx / N, task_idx=model.task_idx, task_num=model.task_num) monitor.write_monitor_logger(percent=percent) rw.write_infer_result(infer_result=infer_result) - + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, + p=1, task_idx=model.task_idx, task_num=model.task_num) + monitor.write_monitor_logger(percent=percent) return 0 diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py index f4bea0c..0299edc 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining.py @@ -7,6 +7,7 @@ from typing import Any, Dict, List, Tuple import cv2 +from easydict import EasyDict as edict import numpy as np from nptyping import NDArray from scipy.stats import entropy @@ -238,6 +239,19 @@ def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: class YmirMining(YmirModel): + def __init__(self, cfg: edict): + super().__init__(cfg) + if cfg.ymir.run_mining and cfg.ymir.run_infer: + mining_task_idx = 0 + # infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + # infer_task_idx = 0 + task_num = 1 + self.task_idx = mining_task_idx + self.task_num = task_num + def mining(self): N = dr.items_count(env.DatasetType.CANDIDATE) monitor_gap = max(1, N // 100) @@ -295,7 +309,8 @@ def mining(self): idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N) + percent = get_ymir_process( + stage=YmirStage.TASK, p=idx / N, task_idx=self.task_idx, task_num=self.task_num) monitor.write_monitor_logger(percent=percent) return mining_result @@ -350,6 +365,9 @@ def main(): mining_result = miner.mining() rw.write_mining_result(mining_result=mining_result) + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, + p=1, task_idx=miner.task_idx, task_num=miner.task_num) + monitor.write_monitor_logger(percent=percent) return 0 From d59e3230c1903f97c5b6b3301f1330874b96cb86 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 16:01:26 +0800 Subject: [PATCH 080/150] update comment --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 21bbc62..dd9b333 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -134,7 +134,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: interval = max(1, mmdet_cfg.runner.max_epochs//30) mmdet_cfg.evaluation.interval = interval mmdet_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') - # Whether to evaluating the AP for each class + # TODO Whether to evaluating the AP for each class # mmdet_cfg.evaluation.classwise = True return mmdet_cfg @@ -142,7 +142,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority - find weight file in cfg.param.model_params_path or cfg.param.model_params_path + find weight file in cfg.param.pretrained_model_params or cfg.param.model_params_path """ if cfg.ymir.run_training: model_params_path: List = cfg.param.pretrained_model_params From 685f2c766fe731b8b3ea1282e71b71bfd6d87ab1 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 19 Jul 2022 16:27:24 +0800 Subject: [PATCH 081/150] add default value --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index dd9b333..e8819b3 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -145,9 +145,9 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.pretrained_model_params or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path: List = cfg.param.pretrained_model_params + model_params_path: List = cfg.param.get('pretrained_model_params', []) else: - model_params_path: List = cfg.param.model_params_path + model_params_path: List = cfg.param.get('model_params_path', []) model_dir = cfg.ymir.input.models_dir model_params_path = [ From 0c134eca39c56f95d65a6447a564a18456ad4db9 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 20 Jul 2022 19:33:53 +0800 Subject: [PATCH 082/150] update runtime --- det-mmdetection-tmi/requirements/runtime.txt | 1 + det-mmdetection-tmi/ymir_infer.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/requirements/runtime.txt b/det-mmdetection-tmi/requirements/runtime.txt index 9754131..cf0fac6 100644 --- a/det-mmdetection-tmi/requirements/runtime.txt +++ b/det-mmdetection-tmi/requirements/runtime.txt @@ -2,6 +2,7 @@ matplotlib numpy pycocotools six +scipy terminaltables easydict nptyping diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 0530bf0..ecec19e 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -60,9 +60,9 @@ def mmdet_result_to_ymir(results: List[DETECTION_RESULT], def get_config_file(cfg): if cfg.ymir.run_training: - model_params_path: List = cfg.param.pretrained_model_params + model_params_path: List = cfg.param.get('pretrained_model_params',[]) else: - model_params_path: List = cfg.param.model_params_path + model_params_path: List = cfg.param.get('model_params_path',[]) model_dir = cfg.ymir.input.models_dir config_files = [ From 384c75ebe5204834bdf7f7a034c697a67025442a Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 20 Jul 2022 19:35:31 +0800 Subject: [PATCH 083/150] fix asnumpy error --- det-yolov4-mining/write_result.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/det-yolov4-mining/write_result.py b/det-yolov4-mining/write_result.py index 6ee21d9..ea3f19f 100644 --- a/det-yolov4-mining/write_result.py +++ b/det-yolov4-mining/write_result.py @@ -247,16 +247,12 @@ def _write_results(prediction, num_classes, input_dim, confidence=0.5, nms_conf= # """ # yolov3 alexey def _prep_results(load_images, img_batch, output, input_dim): - im_dim_list = nd.array([(x.shape[1], x.shape[0]) for x in load_images]) - im_dim_list = nd.tile(im_dim_list, 2) - im_dim_list = im_dim_list[output[:, 0], :] - scaling_factor = input_dim / im_dim_list - output[:, 3:7] /= scaling_factor + output = output.asnumpy() for i in range(output.shape[0]): - output[i, [3, 5]] = nd.clip(output[i, [3, 5]], a_min=0.0, a_max=im_dim_list[i][0].asscalar()) - output[i, [4, 6]] = nd.clip(output[i, [4, 6]], a_min=0.0, a_max=im_dim_list[i][1].asscalar()) + h, w = load_images[0].shape[0:2] + output[i, [3, 5]] = np.clip(output[i, [3, 5]]*w/input_dim, a_min=0.0, a_max=w) + output[i, [4, 6]] = np.clip(output[i, [4, 6]]*h/input_dim, a_min=0.0, a_max=h) - output = output.asnumpy() boxes = [] for i in range(len(load_images)): bboxs = [] From e903573020c4acdbf7dc4c1afd381e402ae7c76e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 22 Jul 2022 11:35:28 +0800 Subject: [PATCH 084/150] update result writer --- .gitignore | 4 - det-yolov5-tmi/Dockerfile | 64 ------ det-yolov5-tmi/README.md | 306 ++------------------------- det-yolov5-tmi/README_yolov5.md | 304 ++++++++++++++++++++++++++ det-yolov5-tmi/cuda102.dockerfile | 2 + det-yolov5-tmi/cuda111.dockerfile | 3 + det-yolov5-tmi/mining/mining_cald.py | 3 +- det-yolov5-tmi/start.py | 62 +++--- det-yolov5-tmi/train.py | 19 +- det-yolov5-tmi/utils/ymir_yolov5.py | 106 +++++----- 10 files changed, 412 insertions(+), 461 deletions(-) delete mode 100644 det-yolov5-tmi/Dockerfile create mode 100644 det-yolov5-tmi/README_yolov5.md diff --git a/.gitignore b/.gitignore index 6dbd818..5563689 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,3 @@ -# dockerfile for China -*.dockerfile.cn -det-mmdetection-tmi/docker/*.cn - *.png *.jpg *.img diff --git a/det-yolov5-tmi/Dockerfile b/det-yolov5-tmi/Dockerfile deleted file mode 100644 index 489dd04..0000000 --- a/det-yolov5-tmi/Dockerfile +++ /dev/null @@ -1,64 +0,0 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license - -# Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch -FROM nvcr.io/nvidia/pytorch:21.10-py3 - -# Install linux packages -RUN apt update && apt install -y zip htop screen libgl1-mesa-glx - -# Install python dependencies -COPY requirements.txt . -RUN python -m pip install --upgrade pip -RUN pip uninstall -y torch torchvision torchtext -RUN pip install --no-cache -r requirements.txt albumentations wandb gsutil notebook \ - torch==1.10.2+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html -# RUN pip install --no-cache -U torch torchvision - -# Create working directory -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -# Copy contents -COPY . /usr/src/app - -# Downloads to user config dir -ADD https://ultralytics.com/assets/Arial.ttf /root/.config/Ultralytics/ - -# Set environment variables -# ENV HOME=/usr/src/app - - -# Usage Examples ------------------------------------------------------------------------------------------------------- - -# Build and Push -# t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t - -# Pull and Run -# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t - -# Pull and Run with local directory access -# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t - -# Kill all -# sudo docker kill $(sudo docker ps -q) - -# Kill all image-based -# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest) - -# Bash into running container -# sudo docker exec -it 5a9b5863d93d bash - -# Bash into stopped container -# id=$(sudo docker ps -qa) && sudo docker start $id && sudo docker exec -it $id bash - -# Clean up -# docker system prune -a --volumes - -# Update Ubuntu drivers -# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/ - -# DDP test -# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3 - -# GCP VM from Image -# docker.io/ultralytics/yolov5:latest diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index b03a7c5..fba577d 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -1,304 +1,30 @@ -

-

- - -

-
-
- CI CPU testing - YOLOv5 Citation - Docker Pulls -
- Open In Colab - Open In Kaggle - Join Forum -
+# yolov5-ymir readme +- [yolov5 readme](./README_yolov5.md) -
-

-YOLOv5 🚀 is a family of object detection architectures and models pretrained on the COCO dataset, and represents Ultralytics - open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. -

+## change log - +- add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining - +- add `utils/ymir_yolov5.py` for useful functions -
+ - `get_merged_config()` add ymir path config `cfg.yaml` and hyper-parameter `cfg.param` -##
Documentation
+ - `convert_ymir_to_yolov5()` generate yolov5 dataset config file `data.yaml` -See the [YOLOv5 Docs](https://docs.ultralytics.com) for full documentation on training, testing and deployment. + - `write_ymir_training_result()` save model weight, map and other files. -##
Quick Start Examples
+ - `get_weight_file()` get pretrained weight or init weight file from ymir system -
-Install +- modify `utils/datasets.py` for ymir dataset format -Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a -[**Python>=3.7.0**](https://www.python.org/) environment, including -[**PyTorch>=1.7**](https://pytorch.org/get-started/locally/). +- modify `train.py` for training process monitor -```bash -git clone https://github.com/ultralytics/yolov5 # clone -cd yolov5 -pip install -r requirements.txt # install -``` +- add `mining/data_augment.py` and `mining/mining_cald.py` for mining -
+- add `training/infer/mining-template.yaml` for `/img-man/training/infer/mining-template.yaml` -
-Inference +- add `cuda102/111.dockerfile`, remove origin `Dockerfile` -Inference with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36) -. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest -YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). +- modify `requirements.txt` -```python -import torch - -# Model -model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # or yolov5m, yolov5l, yolov5x, custom - -# Images -img = 'https://ultralytics.com/images/zidane.jpg' # or file, Path, PIL, OpenCV, numpy, list - -# Inference -results = model(img) - -# Results -results.print() # or .show(), .save(), .crop(), .pandas(), etc. -``` - -
- - - -
-Inference with detect.py - -`detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from -the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. - -```bash -python detect.py --source 0 # webcam - img.jpg # image - vid.mp4 # video - path/ # directory - path/*.jpg # glob - 'https://youtu.be/Zgi9g1ksQHc' # YouTube - 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream -``` - -
- -
-Training - -The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) -results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) -and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest -YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are -1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://github.com/ultralytics/yolov5/issues/475) times faster). Use the -largest `--batch-size` possible, or pass `--batch-size -1` for -YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB. - -```bash -python train.py --data coco.yaml --cfg yolov5n.yaml --weights '' --batch-size 128 - yolov5s 64 - yolov5m 40 - yolov5l 24 - yolov5x 16 -``` - - - -
- -
-Tutorials - -* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED -* [Tips for Best Training Results](https://github.com/ultralytics/yolov5/wiki/Tips-for-Best-Training-Results)  ☘️ - RECOMMENDED -* [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW -* [Roboflow for Datasets, Labeling, and Active Learning](https://github.com/ultralytics/yolov5/issues/4975)  🌟 NEW -* [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) -* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW -* [TFLite, ONNX, CoreML, TensorRT Export](https://github.com/ultralytics/yolov5/issues/251) 🚀 -* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) -* [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) -* [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) -* [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) -* [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW -* [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) - -
- -##
Environments
- -Get started in seconds with our verified environments. Click each icon below for details. - - - -##
Integrations
- - - -|Weights and Biases|Roboflow ⭐ NEW| -|:-:|:-:| -|Automatically track and visualize all your YOLOv5 training runs in the cloud with [Weights & Biases](https://wandb.ai/site?utm_campaign=repo_yolo_readme)|Label and export your custom datasets directly to YOLOv5 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | - - - - -##
Why YOLOv5
- -

-
- YOLOv5-P5 640 Figure (click to expand) - -

-
-
- Figure Notes (click to expand) - -* **COCO AP val** denotes mAP@0.5:0.95 metric measured on the 5000-image [COCO val2017](http://cocodataset.org) dataset over various inference sizes from 256 to 1536. -* **GPU Speed** measures average inference time per image on [COCO val2017](http://cocodataset.org) dataset using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) V100 instance at batch-size 32. -* **EfficientDet** data from [google/automl](https://github.com/google/automl) at batch size 8. -* **Reproduce** by `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt` -
- -### Pretrained Checkpoints - -[assets]: https://github.com/ultralytics/yolov5/releases - -[TTA]: https://github.com/ultralytics/yolov5/issues/303 - -|Model |size
(pixels) |mAPval
0.5:0.95 |mAPval
0.5 |Speed
CPU b1
(ms) |Speed
V100 b1
(ms) |Speed
V100 b32
(ms) |params
(M) |FLOPs
@640 (B) -|--- |--- |--- |--- |--- |--- |--- |--- |--- -|[YOLOv5n][assets] |640 |28.0 |45.7 |**45** |**6.3**|**0.6**|**1.9**|**4.5** -|[YOLOv5s][assets] |640 |37.4 |56.8 |98 |6.4 |0.9 |7.2 |16.5 -|[YOLOv5m][assets] |640 |45.4 |64.1 |224 |8.2 |1.7 |21.2 |49.0 -|[YOLOv5l][assets] |640 |49.0 |67.3 |430 |10.1 |2.7 |46.5 |109.1 -|[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7 -| | | | | | | | | -|[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6 -|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6 -|[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0 -|[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4 -|[YOLOv5x6][assets]
+ [TTA][TTA]|1280
1536 |55.0
**55.8** |72.7
**72.7** |3136
- |26.2
- |19.4
- |140.7
- |209.8
- - -
- Table Notes (click to expand) - -* All checkpoints are trained to 300 epochs with default settings. Nano and Small models use [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) hyps, all others use [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml). -* **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
Reproduce by `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` -* **Speed** averaged over COCO val images using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) instance. NMS times (~1 ms/img) not included.
Reproduce by `python val.py --data coco.yaml --img 640 --task speed --batch 1` -* **TTA** [Test Time Augmentation](https://github.com/ultralytics/yolov5/issues/303) includes reflection and scale augmentations.
Reproduce by `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment` - -
- -##
Contribute
- -We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible. Please see our [Contributing Guide](CONTRIBUTING.md) to get started, and fill out the [YOLOv5 Survey](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experiences. Thank you to all our contributors! - - - -##
Contact
- -For YOLOv5 bugs and feature requests please visit [GitHub Issues](https://github.com/ultralytics/yolov5/issues). For business inquiries or -professional support requests please visit [https://ultralytics.com/contact](https://ultralytics.com/contact). - -
- - +- other modify support onnx export, not important. diff --git a/det-yolov5-tmi/README_yolov5.md b/det-yolov5-tmi/README_yolov5.md new file mode 100644 index 0000000..b03a7c5 --- /dev/null +++ b/det-yolov5-tmi/README_yolov5.md @@ -0,0 +1,304 @@ +
+

+ + +

+
+
+ CI CPU testing + YOLOv5 Citation + Docker Pulls +
+ Open In Colab + Open In Kaggle + Join Forum +
+ +
+

+YOLOv5 🚀 is a family of object detection architectures and models pretrained on the COCO dataset, and represents Ultralytics + open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. +

+ + + + + +
+ +##
Documentation
+ +See the [YOLOv5 Docs](https://docs.ultralytics.com) for full documentation on training, testing and deployment. + +##
Quick Start Examples
+ +
+Install + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a +[**Python>=3.7.0**](https://www.python.org/) environment, including +[**PyTorch>=1.7**](https://pytorch.org/get-started/locally/). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install +``` + +
+ +
+Inference + +Inference with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36) +. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest +YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```python +import torch + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # or yolov5m, yolov5l, yolov5x, custom + +# Images +img = 'https://ultralytics.com/images/zidane.jpg' # or file, Path, PIL, OpenCV, numpy, list + +# Inference +results = model(img) + +# Results +results.print() # or .show(), .save(), .crop(), .pandas(), etc. +``` + +
+ + + +
+Inference with detect.py + +`detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from +the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. + +```bash +python detect.py --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + path/*.jpg # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream +``` + +
+ +
+Training + +The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) +results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) +and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest +YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are +1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://github.com/ultralytics/yolov5/issues/475) times faster). Use the +largest `--batch-size` possible, or pass `--batch-size -1` for +YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB. + +```bash +python train.py --data coco.yaml --cfg yolov5n.yaml --weights '' --batch-size 128 + yolov5s 64 + yolov5m 40 + yolov5l 24 + yolov5x 16 +``` + + + +
+ +
+Tutorials + +* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED +* [Tips for Best Training Results](https://github.com/ultralytics/yolov5/wiki/Tips-for-Best-Training-Results)  ☘️ + RECOMMENDED +* [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW +* [Roboflow for Datasets, Labeling, and Active Learning](https://github.com/ultralytics/yolov5/issues/4975)  🌟 NEW +* [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) +* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW +* [TFLite, ONNX, CoreML, TensorRT Export](https://github.com/ultralytics/yolov5/issues/251) 🚀 +* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) +* [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) +* [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) +* [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) +* [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW +* [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) + +
+ +##
Environments
+ +Get started in seconds with our verified environments. Click each icon below for details. + + + +##
Integrations
+ + + +|Weights and Biases|Roboflow ⭐ NEW| +|:-:|:-:| +|Automatically track and visualize all your YOLOv5 training runs in the cloud with [Weights & Biases](https://wandb.ai/site?utm_campaign=repo_yolo_readme)|Label and export your custom datasets directly to YOLOv5 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | + + + + +##
Why YOLOv5
+ +

+
+ YOLOv5-P5 640 Figure (click to expand) + +

+
+
+ Figure Notes (click to expand) + +* **COCO AP val** denotes mAP@0.5:0.95 metric measured on the 5000-image [COCO val2017](http://cocodataset.org) dataset over various inference sizes from 256 to 1536. +* **GPU Speed** measures average inference time per image on [COCO val2017](http://cocodataset.org) dataset using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) V100 instance at batch-size 32. +* **EfficientDet** data from [google/automl](https://github.com/google/automl) at batch size 8. +* **Reproduce** by `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt` +
+ +### Pretrained Checkpoints + +[assets]: https://github.com/ultralytics/yolov5/releases + +[TTA]: https://github.com/ultralytics/yolov5/issues/303 + +|Model |size
(pixels) |mAPval
0.5:0.95 |mAPval
0.5 |Speed
CPU b1
(ms) |Speed
V100 b1
(ms) |Speed
V100 b32
(ms) |params
(M) |FLOPs
@640 (B) +|--- |--- |--- |--- |--- |--- |--- |--- |--- +|[YOLOv5n][assets] |640 |28.0 |45.7 |**45** |**6.3**|**0.6**|**1.9**|**4.5** +|[YOLOv5s][assets] |640 |37.4 |56.8 |98 |6.4 |0.9 |7.2 |16.5 +|[YOLOv5m][assets] |640 |45.4 |64.1 |224 |8.2 |1.7 |21.2 |49.0 +|[YOLOv5l][assets] |640 |49.0 |67.3 |430 |10.1 |2.7 |46.5 |109.1 +|[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7 +| | | | | | | | | +|[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6 +|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6 +|[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0 +|[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4 +|[YOLOv5x6][assets]
+ [TTA][TTA]|1280
1536 |55.0
**55.8** |72.7
**72.7** |3136
- |26.2
- |19.4
- |140.7
- |209.8
- + +
+ Table Notes (click to expand) + +* All checkpoints are trained to 300 epochs with default settings. Nano and Small models use [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) hyps, all others use [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml). +* **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
Reproduce by `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` +* **Speed** averaged over COCO val images using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) instance. NMS times (~1 ms/img) not included.
Reproduce by `python val.py --data coco.yaml --img 640 --task speed --batch 1` +* **TTA** [Test Time Augmentation](https://github.com/ultralytics/yolov5/issues/303) includes reflection and scale augmentations.
Reproduce by `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment` + +
+ +##
Contribute
+ +We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible. Please see our [Contributing Guide](CONTRIBUTING.md) to get started, and fill out the [YOLOv5 Survey](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experiences. Thank you to all our contributors! + + + +##
Contact
+ +For YOLOv5 bugs and feature requests please visit [GitHub Issues](https://github.com/ultralytics/yolov5/issues). For business inquiries or +professional support requests please visit [https://ultralytics.com/contact](https://ultralytics.com/contact). + +
+ + diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index bd7fd97..031859d 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -3,7 +3,9 @@ ARG CUDA="10.2" ARG CUDNN="7" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +# support SERVER_MODE=dev or prod ARG SERVER_MODE=prod +# support YMIR=1.0.0, 1.1.0 or 1.2.0 ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index f0ab4cc..c238bd5 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -4,9 +4,12 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +# support SERVER_MODE=dev or prod ARG SERVER_MODE=prod +# support YMIR=1.0.0, 1.1.0 or 1.2.0 ARG YMIR="1.1.0" + ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index ba0f825..0fde401 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -37,12 +37,11 @@ def __init__(self, cfg: edict): super().__init__(cfg) if cfg.ymir.run_mining and cfg.ymir.run_infer: + # multiple task, run mining first, infer later mining_task_idx = 0 - # infer_task_idx = 1 task_num = 2 else: mining_task_idx = 0 - # infer_task_idx = 0 task_num = 1 self.task_idx = mining_task_idx diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 61c4dbe..4f0648f 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -13,7 +13,7 @@ from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, download_weight_file, get_merged_config, - get_weight_file, get_ymir_process) + get_weight_file, get_ymir_process, write_ymir_training_result) def start() -> int: @@ -25,6 +25,7 @@ def start() -> int: _run_training(cfg) else: if cfg.ymir.run_mining and cfg.ymir.run_infer: + # multiple task, run mining first, infer later mining_task_idx = 0 infer_task_idx = 1 task_num = 2 @@ -59,12 +60,20 @@ def _run_training(cfg: edict) -> None: batch_size = cfg.param.batch_size model = cfg.param.model img_size = cfg.param.img_size - save_period = cfg.param.save_period + save_period = max(1, min(epochs // 10, int(cfg.param.save_period))) args_options = cfg.param.args_options gpu_id = str(cfg.param.gpu_id) gpu_count = len(gpu_id.split(',')) if gpu_id else 0 port = int(cfg.param.get('port', 29500)) sync_bn = cfg.param.get('sync_bn', False) + if isinstance(sync_bn, str): + if sync_bn.lower() in ['f', 'false']: + sync_bn = False + elif sync_bn.lower() in ['t', 'true']: + sync_bn = True + else: + raise Exception(f'unknown bool str sync_bn = {sync_bn}') + weights = get_weight_file(cfg) if not weights: # download pretrained weight @@ -72,38 +81,35 @@ def _run_training(cfg: edict) -> None: models_dir = cfg.ymir.output.models_dir + commands = ['python3'] if gpu_count == 0: - command = f'python3 train.py --epochs {epochs} ' + \ - f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ - f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ - f'--img-size {img_size} ' + \ - f'--save-period {save_period} ' + \ - f'--device cpu' + device = 'cpu' elif gpu_count == 1: - command = f'python3 train.py --epochs {epochs} ' + \ - f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ - f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ - f'--img-size {img_size} ' + \ - f'--save-period {save_period} ' + \ - f'--device {gpu_id}' + device = gpu_id else: - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} ' + \ - f'--master_port {port} train.py --epochs {epochs} ' + \ - f'--batch-size {batch_size} --data {out_dir}/data.yaml --project /out ' + \ - f'--cfg models/{model}.yaml --name models --weights {weights} ' + \ - f'--img-size {img_size} ' + \ - f'--save-period {save_period} ' + \ - f'--device {gpu_id}' - - if sync_bn: - command += " --sync-bn" + device = gpu_id + commands += f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split() + + commands += ['train.py', + '--epochs', str(epochs), + '--batch-size', str(batch_size), + '--data', f'{out_dir}/data.yaml', + '--project', '/out', + '--cfg', f'models/{model}.yaml', + '--name', 'models', '--weights', weights, + '--img-size', str(img_size), + '--save-period', str(save_period), + '--device', device] + + if gpu_count > 1 and sync_bn: + commands.append("--sync-bn") if args_options: - command += f" {args_options}" + commands += args_options.split() - logging.info(f'start training: {command}') + logging.info(f'start training: {commands}') - subprocess.run(command.split(), check=True) + subprocess.run(commands, check=True) monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) # 3. convert to onnx and save model weight to design directory @@ -114,7 +120,7 @@ def _run_training(cfg: edict) -> None: # save hyperparameter shutil.copy(f'models/{model}.yaml', f'{models_dir}/{model}.yaml') - + write_ymir_training_result(cfg) # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index c42098b..513c25b 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -24,7 +24,6 @@ from pathlib import Path import numpy as np -from packaging.version import Version import torch import torch.distributed as dist import torch.nn as nn @@ -58,7 +57,7 @@ from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config, write_old_ymir_training_result +from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -77,12 +76,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary opt.ymir_cfg = '' # yaml cannot dump edict, remove it here log_dir = Path(ymir_cfg.ymir.output.tensorboard_dir) - YMIR_VERSION = os.environ.get('YMIR_VERSION', '1.2.0') - if Version(YMIR_VERSION) >= Version('1.2.0'): - latest_ymir = True - else: - latest_ymir = False - # Directories w = save_dir # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir @@ -425,10 +418,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') weight_file = str(w / f'epoch{epoch}.pt') - if latest_ymir: - write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) - else: - write_old_ymir_training_result(ymir_cfg, results, maps, rewrite=True) + write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) @@ -477,10 +467,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear torch.cuda.empty_cache() # save the best and last weight file with other files in models_dir if RANK in [-1, 0]: - if latest_ymir: - write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') - else: - write_old_ymir_training_result(ymir_cfg, (), np.array([0]), rewrite=False) + write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') return results diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index aa80a72..6f16c2c 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -2,16 +2,18 @@ utils function for ymir and yolov5 """ import glob +import os import os.path as osp import shutil from enum import IntEnum from typing import Any, Dict, List, Tuple +from easydict import EasyDict as edict import numpy as np import torch import yaml -from easydict import EasyDict as edict from nptyping import NDArray, Shape, UInt8 +from packaging.version import Version from ymir_exc import env from ymir_exc import result_writer as rw @@ -32,7 +34,7 @@ class YmirStage(IntEnum): CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def get_ymir_process(stage: YmirStage, p: float, task_idx: int=0, task_num: int=1) -> float: +def get_ymir_process(stage: YmirStage, p: float, task_idx: int = 0, task_num: int = 1) -> float: """ stage: pre-process/task/post-process p: percent for stage @@ -47,8 +49,9 @@ def get_ymir_process(stage: YmirStage, p: float, task_idx: int=0, task_num: int= if p < 0 or p > 1.0: raise Exception(f'p not in [0,1], p={p}') - init = task_idx * 1.0 / task_num ratio = 1.0 / task_num + init = task_idx / task_num + if stage == YmirStage.PREPROCESS: return init + PREPROCESS_PERCENT * p * ratio elif stage == YmirStage.TASK: @@ -110,16 +113,15 @@ class YmirYolov5(): def __init__(self, cfg: edict): self.cfg = cfg if cfg.ymir.run_mining and cfg.ymir.run_infer: - # mining_task_idx = 0 + # multiple task, run mining first, infer later infer_task_idx = 1 task_num = 2 else: - # mining_task_idx = 0 infer_task_idx = 0 task_num = 1 - self.task_idx=infer_task_idx - self.task_num=task_num + self.task_idx = infer_task_idx + self.task_num = task_num device = select_device(cfg.param.get('gpu_id', 'cpu')) @@ -225,15 +227,30 @@ def convert_ymir_to_yolov5(cfg: edict) -> None: def write_ymir_training_result(cfg: edict, - map50: float, - epoch: int, - weight_file: str) -> int: + map50: float = 0.0, + epoch: int = 0, + weight_file: str = "") -> int: + YMIR_VERSION = os.getenv('YMIR_VERSION', '1.2.0') + if Version(YMIR_VERSION) >= Version('1.2.0'): + write_latest_ymir_training_result(cfg, map50, epoch, weight_file) + else: + write_ancient_ymir_training_result(cfg, map50) + + +def write_latest_ymir_training_result(cfg: edict, + map50: float, + epoch: int, + weight_file: str) -> int: """ for ymir>=1.2.0 cfg: ymir config map50: map50 epoch: stage weight_file: saved weight files, empty weight_file will save all files + + 1. save weight file for each epoch. + 2. save weight file for last.pt, best.pt and other config file + 3. save weight file for best.onnx, no valid map50, attach to stage f"{model}_last_and_best" """ model = cfg.param.model # use `rw.write_training_result` to save training result @@ -246,63 +263,38 @@ def write_ymir_training_result(cfg: edict, files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*')) if not f.endswith('.pt')] + ['last.pt', 'best.pt'] + training_result_file = cfg.ymir.output.training_result_file + if osp.exists(training_result_file): + with open(cfg.ymir.output.training_result_file, 'r') as f: + training_result = yaml.safe_load(stream=f) + + map50 = max(training_result.get('map',0.0), map50) rw.write_model_stage(stage_name=f"{model}_last_and_best", files=files, mAP=float(map50)) return 0 -def write_training_result(model: List[str], map: float, class_aps: Dict[str, float], **kwargs: dict) -> None: +def write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: """ for 1.0.0 <= ymir <=1.1.0 """ - training_result = { - 'model': model, - 'map': map, - 'class_aps': class_aps, - } - training_result.update(kwargs) + + files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] + training_result_file = cfg.ymir.output.training_result_file + if osp.exists(training_result_file): + with open(cfg.ymir.output.training_result_file, 'r') as f: + training_result = yaml.safe_load(stream=f) + + training_result['model'] = files + training_result['map'] = max(training_result.get('map', 0), map50) + else: + training_result = { + 'model': files, + 'map': map50, + 'stage_name': f'{cfg.param.model}' + } env_config = env.get_current_env() with open(env_config.output.training_result_file, 'w') as f: yaml.safe_dump(training_result, f) - - -def write_old_ymir_training_result(cfg: edict, results: Tuple, maps: NDArray, rewrite=False) -> int: - """ - for 1.0.0 <= ymir <=1.1.0 - cfg: ymir config - results: (mp, mr, map50, map, loss) - maps: map@0.5:0.95 for all classes - rewrite: set true to ensure write the best result - """ - - if not rewrite: - training_result_file = cfg.ymir.output.training_result_file - if osp.exists(training_result_file): - with open(cfg.ymir.output.training_result_file, 'r') as f: - training_result = yaml.safe_load(stream=f) - - files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] - - training_result['model_names'] = files + ['best.onnx'] - write_training_result(**training_result) - - return 0 - - class_names = cfg.param.class_names - mp = results[0] # mean of precision - mr = results[1] # mean of recall - map50 = results[2] # mean of ap@0.5 - map = results[3] # mean of ap@0.5:0.95 - - files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] - # use `rw.write_training_result` to save training result - write_training_result(model=files + ['best.onnx'], - map=float(map), - map50=float(map50), - precision=float(mp), - recall=float(mr), - class_aps={class_name: v - for class_name, v in zip(class_names, maps.tolist())}) - return 0 From 4e36ca567792b147dcef91e0231d3c26ca5d3419 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 22 Jul 2022 11:55:04 +0800 Subject: [PATCH 085/150] remove s --- det-yolov5-tmi/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 513c25b..d28fdb8 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -426,7 +426,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear if RANK == -1 and stopper(epoch=epoch, fitness=fi): break - # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576 + # Stop DDP TODO: known issues https://github.com/ultralytics/yolov5/pull/4576 # stop = stopper(epoch=epoch, fitness=fi) # if RANK == 0: # dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks From 7410409658c58744e9282ce8613dbf539806e55d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 22 Jul 2022 12:10:23 +0800 Subject: [PATCH 086/150] use _ to help coder --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 11 ++++++----- det-mmdetection-tmi/tools/train.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index e8819b3..aac1df8 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -68,8 +68,9 @@ def get_merged_config() -> edict: return merged_cfg -def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: +def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: """ + useful for training process - modify dataset config - modify model output channel - modify epochs, checkpoint, tensorboard config @@ -170,12 +171,12 @@ def get_weight_file(cfg: edict) -> str: def write_ymir_training_result(last: bool = False, key_score: Optional[float] = None): YMIR_VERSION = os.environ.get('YMIR_VERSION', '1.2.0') if Version(YMIR_VERSION) >= Version('1.2.0'): - write_latest_ymir_training_result(last, key_score) + _write_latest_ymir_training_result(last, key_score) else: - write_ancient_ymir_training_result(key_score) + _write_ancient_ymir_training_result(key_score) -def write_latest_ymir_training_result(last: bool = False, key_score: Optional[float] = None): +def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[float] = None): if key_score: logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') @@ -226,7 +227,7 @@ def write_latest_ymir_training_result(last: bool = False, key_score: Optional[fl stage_name=stage_name) -def write_ancient_ymir_training_result(key_score: Optional[float] = None): +def _write_ancient_ymir_training_result(key_score: Optional[float] = None): if key_score: logging.info(f'key_score is {key_score}') diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index 74121ff..b3b6d65 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -17,7 +17,7 @@ from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes -from mmdet.utils.util_ymir import modify_mmdet_config, get_merged_config +from mmdet.utils.util_ymir import _modify_mmdet_config, get_merged_config def parse_args(): @@ -101,7 +101,7 @@ def main(): cfg = Config.fromfile(args.config) print(cfg) # modify mmdet config from file - cfg = modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) + cfg = _modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) From 7c667d5778862de3f33e111f303a85adabd6aaa6 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 22 Jul 2022 12:11:59 +0800 Subject: [PATCH 087/150] use _ to help user --- det-yolov5-tmi/utils/ymir_yolov5.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 6f16c2c..be78660 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -232,12 +232,12 @@ def write_ymir_training_result(cfg: edict, weight_file: str = "") -> int: YMIR_VERSION = os.getenv('YMIR_VERSION', '1.2.0') if Version(YMIR_VERSION) >= Version('1.2.0'): - write_latest_ymir_training_result(cfg, map50, epoch, weight_file) + _write_latest_ymir_training_result(cfg, map50, epoch, weight_file) else: - write_ancient_ymir_training_result(cfg, map50) + _write_ancient_ymir_training_result(cfg, map50) -def write_latest_ymir_training_result(cfg: edict, +def _write_latest_ymir_training_result(cfg: edict, map50: float, epoch: int, weight_file: str) -> int: @@ -275,7 +275,7 @@ def write_latest_ymir_training_result(cfg: edict, return 0 -def write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: +def _write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: """ for 1.0.0 <= ymir <=1.1.0 """ From 9a2c5449ce3f93f11ba312442a25111286efee72 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 27 Jul 2022 14:34:04 +0800 Subject: [PATCH 088/150] update master --- README.MD | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.MD b/README.MD index 3618622..72e60b0 100644 --- a/README.MD +++ b/README.MD @@ -1,5 +1,13 @@ # ymir-executor 使用文档 +## ymir-1.0.0 official image + +- yolov4 + +- yolov5 + +- mmdetection + ## det-yolov4-training - yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法直接在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 From 87e18b3d50f63720964f411a3dca58107be99937 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 28 Jul 2022 15:08:57 +0800 Subject: [PATCH 089/150] update --- README.MD | 25 +- det-mmdetection-tmi/README.md | 342 ++-------------------------- det-mmdetection-tmi/README_mmdet.md | 329 ++++++++++++++++++++++++++ det-mmdetection-tmi/README_ymir.md | 25 -- det-yolov5-tmi/README.md | 6 + 5 files changed, 373 insertions(+), 354 deletions(-) create mode 100644 det-mmdetection-tmi/README_mmdet.md delete mode 100644 det-mmdetection-tmi/README_ymir.md diff --git a/README.MD b/README.MD index 72e60b0..3d7f8e4 100644 --- a/README.MD +++ b/README.MD @@ -1,6 +1,8 @@ # ymir-executor 使用文档 -## ymir-1.0.0 official image +- [ymir](https://github.com/IndustryEssentials/ymir) + +## ymir-1.1.0 official image - yolov4 @@ -8,9 +10,17 @@ - mmdetection +- [detectron2](https://github.com/yzbx/ymir-detectron2) + + - ymir1.0.0的镜像与ymir1.1.0兼容 + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.0.0-detectron2-tmi + ``` + ## det-yolov4-training -- yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法直接在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 +- yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 ``` cd det-yolov4-training @@ -35,7 +45,7 @@ docker build -t ymir-executor/yolov4:cuda112-mi -f cuda112.dockerfile . ## det-yolov5-tmi -- [修改说明](./det-yolov5-tmi/README_yolov5.md) +- [change log](./det-yolov5-tmi/README.md) - yolov5训练、挖掘及推理镜像,镜像构建时会从github上下载权重, 如果访问github不稳定, 建议提前将模型权重下载并在构建时复制到镜像中. @@ -62,7 +72,7 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## det-mmdetection-tmi -- [修改说明](./det-mmdetection-tmi/README_ymir.md) +- [change log](./det-mmdetection-tmi/README.md) ``` cd det-mmdetection-tmi @@ -71,11 +81,12 @@ docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi -f docker/Doc docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi -f docker/Dockerfile.cuda111 --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 . ``` - ## 如何制作自己的ymir-executor - [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) +- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) ymir镜像开发辅助库 + ## 如何导入预训练模型 - [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) @@ -101,9 +112,9 @@ docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi -f docker/Doc - 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. ``` - cd ymir-executor + cd ymir-executor/det-yolov5-tmi - docker build -t ymir-executor/yolov5 . -f det-yolov5-tmi/cuda111.dockerfile + docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev ``` ## 镜像运行完`/in`与`/out`目录中的文件被清理 diff --git a/det-mmdetection-tmi/README.md b/det-mmdetection-tmi/README.md index c1d63cc..b2ed690 100644 --- a/det-mmdetection-tmi/README.md +++ b/det-mmdetection-tmi/README.md @@ -1,329 +1,27 @@ -
- -
 
-
- OpenMMLab website - - - HOT - - -      - OpenMMLab platform - - - TRY IT OUT - - -
-
 
+# det-mmdetection-tmi -[![PyPI](https://img.shields.io/pypi/v/mmdet)](https://pypi.org/project/mmdet) -[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmdetection.readthedocs.io/en/latest/) -[![badge](https://github.com/open-mmlab/mmdetection/workflows/build/badge.svg)](https://github.com/open-mmlab/mmdetection/actions) -[![codecov](https://codecov.io/gh/open-mmlab/mmdetection/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmdetection) -[![license](https://img.shields.io/github/license/open-mmlab/mmdetection.svg)](https://github.com/open-mmlab/mmdetection/blob/master/LICENSE) -[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmdetection.svg)](https://github.com/open-mmlab/mmdetection/issues) +- [mmdetection](./README_mmdet.md) - +`mmdetection` framework for object `det`ection `t`raining/`m`ining/`i`nfer task -[📘Documentation](https://mmdetection.readthedocs.io/en/v2.21.0/) | -[🛠️Installation](https://mmdetection.readthedocs.io/en/v2.21.0/get_started.html) | -[👀Model Zoo](https://mmdetection.readthedocs.io/en/v2.21.0/model_zoo.html) | -[🆕Update News](https://mmdetection.readthedocs.io/en/v2.21.0/changelog.html) | -[🚀Ongoing Projects](https://github.com/open-mmlab/mmdetection/projects) | -[🤔Reporting Issues](https://github.com/open-mmlab/mmdetection/issues/new/choose) - -
- -## Introduction - -English | [简体中文](README_zh-CN.md) - -MMDetection is an open source object detection toolbox based on PyTorch. It is -a part of the [OpenMMLab](https://openmmlab.com/) project. - -The master branch works with **PyTorch 1.5+**. - -
-Major features - -- **Modular Design** - - We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules. - -- **Support of multiple frameworks out of box** - - The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc. - -- **High efficiency** - - All basic bbox and mask operations run on GPUs. The training speed is faster than or comparable to other codebases, including [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet). - -- **State of the art** - - The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward. - -
- -Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox. - -## License - -This project is released under the [Apache 2.0 license](LICENSE). - -## Changelog - -**2.22.0** was released in 24/2/2022: - -- Support [MaskFormer](configs/maskformer), [DyHead](configs/dyhead), [OpenImages Dataset](configs/openimages) and [TIMM backbone](configs/timm_example) -- Support visualization for Panoptic Segmentation -- Release a good recipe of using ResNet in object detectors pre-trained by [ResNet Strikes Back](https://arxiv.org/abs/2110.00476), which consistently brings about 3~4 mAP improvements over RetinaNet, Faster/Mask/Cascade Mask R-CNN - -Please refer to [changelog.md](docs/en/changelog.md) for details and release history. - -For compatibility changes between different versions of MMDetection, please refer to [compatibility.md](docs/en/compatibility.md). - -## Overview of Benchmark and Model Zoo - -Results and models are available in the [model zoo](docs/en/model_zoo.md). - -
- Architectures -
- - - - - - - - - - - - - - - - - -
- Object Detection - - Instance Segmentation - - Panoptic Segmentation - - Other -
- - - - - - - -
  • Contrastive Learning
  • - - -
  • Distillation
  • - - -
    - -
    - Components -
    - - - - - - - - - - - - - - - - - -
    - Backbones - - Necks - - Loss - - Common -
    - - - - - - - -
    - -Some other methods are also supported in [projects using MMDetection](./docs/en/projects.md). - -## Installation - -Please refer to [get_started.md](docs/en/get_started.md) for installation. - -## Getting Started - -Please see [get_started.md](docs/en/get_started.md) for the basic usage of MMDetection. -We provide [colab tutorial](demo/MMDet_Tutorial.ipynb), and full guidance for quick run [with existing dataset](docs/en/1_exist_data_model.md) and [with new dataset](docs/en/2_new_data_model.md) for beginners. -There are also tutorials for [finetuning models](docs/en/tutorials/finetune.md), [adding new dataset](docs/en/tutorials/customize_dataset.md), [designing data pipeline](docs/en/tutorials/data_pipeline.md), [customizing models](docs/en/tutorials/customize_models.md), [customizing runtime settings](docs/en/tutorials/customize_runtime.md) and [useful tools](docs/en/useful_tools.md). - -Please refer to [FAQ](docs/en/faq.md) for frequently asked questions. - -## Contributing - -We appreciate all contributions to improve MMDetection. Ongoing projects can be found in out [GitHub Projects](https://github.com/open-mmlab/mmdetection/projects). Welcome community users to participate in these projects. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. - -## Acknowledgement - -MMDetection is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks. -We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors. - -## Citation - -If you use this toolbox or benchmark in your research, please cite this project. +# build docker image ``` -@article{mmdetection, - title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark}, - author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and - Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and - Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and - Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and - Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong - and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua}, - journal= {arXiv preprint arXiv:1906.07155}, - year={2019} -} -``` +docker build -t ymir-executor/mmdet:cuda102-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda102 . -## Projects in OpenMMLab +docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda111 . +``` -- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. -- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages. -- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. -- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. -- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. -- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark. -- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. -- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox. -- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. -- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark. -- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark. -- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark. -- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark. -- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. -- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. -- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. -- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. -- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. -- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework. +# changelog +- modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format +- modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process +- modify `mmdet/datasets/__init__.py, mmdet/datasets/coco.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. +- modify `requirements/runtime.txt` to add new dependent package. +- add `mmdet/utils/util_ymir.py` for ymir training/infer/mining +- add `ymir_infer.py` for infer +- add `ymir_mining.py` for mining +- add `ymir_train.py` modify `tools/train.py` to update the mmcv config for training +- add `start.py`, the entrypoint for docker image +- add `training-template.yaml, infer-template.yaml, mining-template.yaml` for ymir pre-defined hyper-parameters. +- add `docker/Dockerfile.cuda102, docker/Dockerfile.cuda111` to build docker image +- remove `docker/Dockerfile` to avoid misuse diff --git a/det-mmdetection-tmi/README_mmdet.md b/det-mmdetection-tmi/README_mmdet.md new file mode 100644 index 0000000..c1d63cc --- /dev/null +++ b/det-mmdetection-tmi/README_mmdet.md @@ -0,0 +1,329 @@ +
    + +
     
    +
    + OpenMMLab website + + + HOT + + +      + OpenMMLab platform + + + TRY IT OUT + + +
    +
     
    + +[![PyPI](https://img.shields.io/pypi/v/mmdet)](https://pypi.org/project/mmdet) +[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmdetection.readthedocs.io/en/latest/) +[![badge](https://github.com/open-mmlab/mmdetection/workflows/build/badge.svg)](https://github.com/open-mmlab/mmdetection/actions) +[![codecov](https://codecov.io/gh/open-mmlab/mmdetection/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmdetection) +[![license](https://img.shields.io/github/license/open-mmlab/mmdetection.svg)](https://github.com/open-mmlab/mmdetection/blob/master/LICENSE) +[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmdetection.svg)](https://github.com/open-mmlab/mmdetection/issues) + + + +[📘Documentation](https://mmdetection.readthedocs.io/en/v2.21.0/) | +[🛠️Installation](https://mmdetection.readthedocs.io/en/v2.21.0/get_started.html) | +[👀Model Zoo](https://mmdetection.readthedocs.io/en/v2.21.0/model_zoo.html) | +[🆕Update News](https://mmdetection.readthedocs.io/en/v2.21.0/changelog.html) | +[🚀Ongoing Projects](https://github.com/open-mmlab/mmdetection/projects) | +[🤔Reporting Issues](https://github.com/open-mmlab/mmdetection/issues/new/choose) + +
    + +## Introduction + +English | [简体中文](README_zh-CN.md) + +MMDetection is an open source object detection toolbox based on PyTorch. It is +a part of the [OpenMMLab](https://openmmlab.com/) project. + +The master branch works with **PyTorch 1.5+**. + +
    +Major features + +- **Modular Design** + + We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules. + +- **Support of multiple frameworks out of box** + + The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc. + +- **High efficiency** + + All basic bbox and mask operations run on GPUs. The training speed is faster than or comparable to other codebases, including [Detectron2](https://github.com/facebookresearch/detectron2), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet). + +- **State of the art** + + The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward. + +
    + +Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox. + +## License + +This project is released under the [Apache 2.0 license](LICENSE). + +## Changelog + +**2.22.0** was released in 24/2/2022: + +- Support [MaskFormer](configs/maskformer), [DyHead](configs/dyhead), [OpenImages Dataset](configs/openimages) and [TIMM backbone](configs/timm_example) +- Support visualization for Panoptic Segmentation +- Release a good recipe of using ResNet in object detectors pre-trained by [ResNet Strikes Back](https://arxiv.org/abs/2110.00476), which consistently brings about 3~4 mAP improvements over RetinaNet, Faster/Mask/Cascade Mask R-CNN + +Please refer to [changelog.md](docs/en/changelog.md) for details and release history. + +For compatibility changes between different versions of MMDetection, please refer to [compatibility.md](docs/en/compatibility.md). + +## Overview of Benchmark and Model Zoo + +Results and models are available in the [model zoo](docs/en/model_zoo.md). + +
    + Architectures +
    + + + + + + + + + + + + + + + + + +
    + Object Detection + + Instance Segmentation + + Panoptic Segmentation + + Other +
    + + + + + + + +
  • Contrastive Learning
  • + + +
  • Distillation
  • + + +
    + +
    + Components +
    + + + + + + + + + + + + + + + + + +
    + Backbones + + Necks + + Loss + + Common +
    + + + + + + + +
    + +Some other methods are also supported in [projects using MMDetection](./docs/en/projects.md). + +## Installation + +Please refer to [get_started.md](docs/en/get_started.md) for installation. + +## Getting Started + +Please see [get_started.md](docs/en/get_started.md) for the basic usage of MMDetection. +We provide [colab tutorial](demo/MMDet_Tutorial.ipynb), and full guidance for quick run [with existing dataset](docs/en/1_exist_data_model.md) and [with new dataset](docs/en/2_new_data_model.md) for beginners. +There are also tutorials for [finetuning models](docs/en/tutorials/finetune.md), [adding new dataset](docs/en/tutorials/customize_dataset.md), [designing data pipeline](docs/en/tutorials/data_pipeline.md), [customizing models](docs/en/tutorials/customize_models.md), [customizing runtime settings](docs/en/tutorials/customize_runtime.md) and [useful tools](docs/en/useful_tools.md). + +Please refer to [FAQ](docs/en/faq.md) for frequently asked questions. + +## Contributing + +We appreciate all contributions to improve MMDetection. Ongoing projects can be found in out [GitHub Projects](https://github.com/open-mmlab/mmdetection/projects). Welcome community users to participate in these projects. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. + +## Acknowledgement + +MMDetection is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks. +We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors. + +## Citation + +If you use this toolbox or benchmark in your research, please cite this project. + +``` +@article{mmdetection, + title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark}, + author = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and + Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and + Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and + Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and + Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong + and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua}, + journal= {arXiv preprint arXiv:1906.07155}, + year={2019} +} +``` + +## Projects in OpenMMLab + +- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. +- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages. +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. +- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark. +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox. +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. +- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark. +- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark. +- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark. +- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark. +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. +- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. +- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework. diff --git a/det-mmdetection-tmi/README_ymir.md b/det-mmdetection-tmi/README_ymir.md deleted file mode 100644 index 1281e7f..0000000 --- a/det-mmdetection-tmi/README_ymir.md +++ /dev/null @@ -1,25 +0,0 @@ -# det-mmdetection-tmi - -`mmdetection` framework for object `det`ection `t`raining/`m`ining/`i`nfer task - -# build docker image - -``` -docker build -t ymir-executor/mmdet:cuda102-tmi -build-arg SERVER_MODE=dev -f docker/Dockerfile.cuda102 . - -docker build -t ymir-executor/mmdet:cuda111-tmi -build-arg SERVER_MODE=dev -f docker/Dockerfile.cuda111 . -``` - -# changelog -- modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format -- modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process -- modify `mmdet/datasets/__init__.py, mmdet/datasets/coco.py` and add `mmdet/datasets/ymir.py`, add class `YmirDataset` to load YMIR dataset. -- modify `requirements/runtime.txt` to add new dependent package. -- add `mmdet/utils/util_ymir.py` for ymir training/infer/mining -- add `ymir_infer.py` for infer -- add `ymir_mining.py` for mining -- add `ymir_train.py` modify `tools/train.py` to update the mmcv config for training -- add `start.py`, the entrypoint for docker image -- add `training-template.yaml, infer-template.yaml, mining-template.yaml` for ymir pre-defined hyper-parameters. -- add `docker/Dockerfile.cuda102, docker/Dockerfile.cuda111` to build docker image -- remove `docker/Dockerfile` to avoid misuse diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index fba577d..520d78c 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -1,6 +1,12 @@ # yolov5-ymir readme - [yolov5 readme](./README_yolov5.md) +``` +docker build -t ymir/ymir-executor:ymir1.1.0-cuda102-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda102.dockerfile . + +docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . +``` + ## change log - add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining From b13264e64caec1a098ae804f844e1445709925d6 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 2 Aug 2022 17:55:11 +0800 Subject: [PATCH 090/150] update readme --- README.MD | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/README.MD b/README.MD index 3d7f8e4..5b8bc04 100644 --- a/README.MD +++ b/README.MD @@ -2,22 +2,31 @@ - [ymir](https://github.com/IndustryEssentials/ymir) -## ymir-1.1.0 official image +## ymir-1.1.0 official image -- yolov4 +- yolov4 -- yolov5 +- yolov5 -- mmdetection +- mmdetection - [detectron2](https://github.com/yzbx/ymir-detectron2) - + + - [change log](https://github.com/yzbx/ymir-detectron2/blob/master/README.md) + - ymir1.0.0的镜像与ymir1.1.0兼容 ``` docker pull youdaoyzbx/ymir-executor:ymir1.0.0-detectron2-tmi ``` +- [yolov7](https://github.com/yzbx/ymir-yolov7) + + - [change log](https://github.com/yzbx/ymir-yolov7/blob/main/ymir/README.md) + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi + ``` + ## det-yolov4-training - yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 @@ -114,7 +123,7 @@ docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi -f docker/Doc ``` cd ymir-executor/det-yolov5-tmi - docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev + docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev ``` ## 镜像运行完`/in`与`/out`目录中的文件被清理 From aa6e44503d7914bda35dea5aef13ebe81dea079c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 2 Aug 2022 19:27:05 +0800 Subject: [PATCH 091/150] merge yolov4 training and mining --- README.MD | 18 ++++++++++--- det-yolov4-mining/Dockerfile | 20 --------------- det-yolov4-mining/cuda112.dockerfile | 15 ----------- .../.circleci/config.yml | 0 .../.travis.yml | 0 .../3rdparty/pthreads/bin/pthreadGC2.dll | Bin .../3rdparty/pthreads/bin/pthreadVC2.dll | Bin .../3rdparty/pthreads/include/pthread.h | 0 .../3rdparty/pthreads/include/sched.h | 0 .../3rdparty/pthreads/include/semaphore.h | 0 .../3rdparty/pthreads/lib/libpthreadGC2.a | Bin .../3rdparty/pthreads/lib/pthreadVC2.lib | Bin .../3rdparty/stb/include/stb_image.h | 0 .../3rdparty/stb/include/stb_image_write.h | 0 .../CMakeLists.txt | 0 .../DarknetConfig.cmake.in | 0 .../LICENSE | 0 .../Makefile | 0 .../README.md | 0 .../build.ps1 | 0 .../calc_map.sh | 0 .../cfg/9k.labels | 0 .../cfg/9k.names | 0 .../cfg/9k.tree | 0 .../cfg/Gaussian_yolov3_BDD.cfg | 0 .../cfg/alexnet.cfg | 0 .../cfg/cd53paspp-gamma.cfg | 0 .../cfg/cifar.cfg | 0 .../cfg/cifar.test.cfg | 0 .../cfg/coco.data | 0 .../cfg/coco.names | 0 .../cfg/coco9k.map | 0 .../cfg/combine9k.data | 0 .../cfg/crnn.train.cfg | 0 .../cfg/csdarknet53-omega.cfg | 0 .../cfg/cspx-p7-mish-omega.cfg | 0 .../cfg/cspx-p7-mish.cfg | 0 .../cfg/cspx-p7-mish_hp.cfg | 0 ...csresnext50-panet-spp-original-optimal.cfg | 0 .../cfg/csresnext50-panet-spp.cfg | 0 .../cfg/darknet.cfg | 0 .../cfg/darknet19.cfg | 0 .../cfg/darknet19_448.cfg | 0 .../cfg/darknet53.cfg | 0 .../cfg/darknet53_448_xnor.cfg | 0 .../cfg/densenet201.cfg | 0 .../cfg/efficientnet-lite3.cfg | 0 .../cfg/efficientnet_b0.cfg | 0 .../cfg/enet-coco.cfg | 0 .../cfg/extraction.cfg | 0 .../cfg/extraction.conv.cfg | 0 .../cfg/extraction22k.cfg | 0 .../cfg/go.test.cfg | 0 .../cfg/gru.cfg | 0 .../cfg/imagenet.labels.list | 0 .../cfg/imagenet.shortnames.list | 0 .../cfg/imagenet1k.data | 0 .../cfg/imagenet22k.dataset | 0 .../cfg/imagenet9k.hierarchy.dataset | 0 .../cfg/inet9k.map | 0 .../cfg/jnet-conv.cfg | 0 .../cfg/lstm.train.cfg | 0 .../cfg/openimages.data | 0 .../cfg/resnet101.cfg | 0 .../cfg/resnet152.cfg | 0 .../cfg/resnet152_trident.cfg | 0 .../cfg/resnet50.cfg | 0 .../cfg/resnext152-32x4d.cfg | 0 .../cfg/rnn.cfg | 0 .../cfg/rnn.train.cfg | 0 .../cfg/strided.cfg | 0 .../cfg/t1.test.cfg | 0 .../cfg/tiny-yolo-voc.cfg | 0 .../cfg/tiny-yolo.cfg | 0 .../cfg/tiny-yolo_xnor.cfg | 0 .../cfg/tiny.cfg | 0 .../cfg/vgg-16.cfg | 0 .../cfg/vgg-conv.cfg | 0 .../cfg/voc.data | 0 .../cfg/writing.cfg | 0 .../cfg/yolo-voc.2.0.cfg | 0 .../cfg/yolo-voc.cfg | 0 .../cfg/yolo.2.0.cfg | 0 .../cfg/yolo.cfg | 0 .../cfg/yolo9000.cfg | 0 .../cfg/yolov1/tiny-coco.cfg | 0 .../cfg/yolov1/tiny-yolo.cfg | 0 .../cfg/yolov1/xyolo.test.cfg | 0 .../cfg/yolov1/yolo-coco.cfg | 0 .../cfg/yolov1/yolo-small.cfg | 0 .../cfg/yolov1/yolo.cfg | 0 .../cfg/yolov1/yolo.train.cfg | 0 .../cfg/yolov1/yolo2.cfg | 0 .../cfg/yolov2-tiny-voc.cfg | 0 .../cfg/yolov2-tiny.cfg | 0 .../cfg/yolov2-voc.cfg | 0 .../cfg/yolov2.cfg | 0 .../cfg/yolov3-openimages.cfg | 0 .../cfg/yolov3-spp.cfg | 0 .../cfg/yolov3-tiny-prn.cfg | 0 .../cfg/yolov3-tiny.cfg | 0 .../cfg/yolov3-tiny_3l.cfg | 0 .../cfg/yolov3-tiny_obj.cfg | 0 .../cfg/yolov3-tiny_occlusion_track.cfg | 0 .../cfg/yolov3-tiny_xnor.cfg | 0 .../cfg/yolov3-voc.cfg | 0 .../cfg/yolov3-voc.yolov3-giou-40.cfg | 0 .../cfg/yolov3.cfg | 0 .../cfg/yolov3.coco-giou-12.cfg | 0 .../cfg/yolov3_5l.cfg | 0 .../cfg/yolov4-csp-swish.cfg | 0 .../cfg/yolov4-csp-x-swish-frozen.cfg | 0 .../cfg/yolov4-csp-x-swish.cfg | 0 .../cfg/yolov4-csp.cfg | 0 .../cfg/yolov4-custom.cfg | 0 .../cfg/yolov4-p5-frozen.cfg | 0 .../cfg/yolov4-p5.cfg | 0 .../cfg/yolov4-p6.cfg | 0 .../cfg/yolov4-sam-mish-csp-reorg-bfm.cfg | 0 .../cfg/yolov4-tiny-3l.cfg | 0 .../cfg/yolov4-tiny-custom.cfg | 0 .../cfg/yolov4-tiny.cfg | 0 .../cfg/yolov4-tiny_contrastive.cfg | 0 .../cfg/yolov4.cfg | 0 .../cfg/yolov4_iter1000.cfg | 0 .../cfg/yolov4x-mish.cfg | 0 .../cmake/Modules/FindCUDNN.cmake | 0 .../cmake/Modules/FindPThreads4W.cmake | 0 .../cmake/Modules/FindStb.cmake | 0 .../config_and_train.py | 0 .../convert_label_ark2txt.py | 3 ++- .../convert_model_darknet2mxnet_yolov4.py | 0 .../counters_per_class.txt | 0 .../cuda101.dockerfile | 10 +++++--- .../cuda112.dockerfile | 10 +++++--- .../darknet.py | 0 .../darknet_images.py | 0 .../darknet_video.py | 0 .../data/9k.tree | 0 .../data/coco.names | 0 .../data/coco9k.map | 0 .../data/goal.txt | 0 .../data/imagenet.labels.list | 0 .../data/imagenet.shortnames.list | 0 .../data/labels/make_labels.py | 0 .../data/openimages.names | 0 .../data/voc.names | 0 .../image_yolov3.sh | 0 .../image_yolov4.sh | 0 .../img.txt | 0 .../include/darknet.h | 0 .../include/yolo_v2_class.hpp | 0 .../json_mjpeg_streams.sh | 0 .../make_train_test_darknet.sh | 0 .../mining}/.dockerignore | 0 .../mining}/README.md | 0 .../mining}/active_learning/__init__.py | 0 .../mining}/active_learning/apis/__init__.py | 0 .../mining}/active_learning/apis/al_api.py | 0 .../active_learning/apis/docker_api.py | 0 .../active_learning/dataset/__init__.py | 0 .../active_learning/dataset/datareader.py | 0 .../dataset/labeled_dataset.py | 0 .../dataset/unlabeled_dataset.py | 0 .../model_inference/__init__.py | 0 .../model_inference/centernet.py | 0 .../model_inference/yolo_models.py | 0 .../active_learning/strategy/__init__.py | 0 .../mining}/active_learning/strategy/aldd.py | 0 .../active_learning/strategy/aldd_yolo.py | 0 .../mining}/active_learning/strategy/cald.py | 0 .../active_learning/strategy/data_augment.py | 0 .../strategy/random_strategy.py | 0 .../mining}/active_learning/utils/__init__.py | 0 .../mining}/active_learning/utils/al_log.py | 0 .../mining}/active_learning/utils/operator.py | 0 .../mining}/al_main.py | 0 .../mining}/combined_class.txt | 0 .../mining}/docker_main.py | 15 ++++++----- .../mining}/docker_readme.md | 0 .../mining}/infer-template.yaml | 0 .../mining}/mining-template.yaml | 4 +-- .../mining}/monitor_process.py | 0 .../mining}/start.sh | 0 .../mining}/test_api.py | 0 .../mining}/test_centernet.py | 0 .../mining}/tools/al_strategsy_union.py | 0 .../mining}/tools/imagenet_hard_negative.py | 0 .../mining}/tools/plot_dataset_class_hist.py | 0 .../mining}/tools/visualize_aldd.py | 0 .../mining}/tools/visualize_cald.py | 0 .../mining}/write_result.py | 0 .../net_cam_v3.sh | 0 .../net_cam_v4.sh | 0 .../src/.editorconfig | 0 .../src/activation_kernels.cu | 0 .../src/activation_layer.c | 0 .../src/activation_layer.h | 0 .../src/activations.c | 0 .../src/activations.h | 0 .../src/art.c | 0 .../src/avgpool_layer.c | 0 .../src/avgpool_layer.h | 0 .../src/avgpool_layer_kernels.cu | 0 .../src/batchnorm_layer.c | 0 .../src/batchnorm_layer.h | 0 .../src/blas.c | 0 .../src/blas.h | 0 .../src/blas_kernels.cu | 0 .../src/box.c | 0 .../src/box.h | 0 .../src/captcha.c | 0 .../src/cifar.c | 0 .../src/classifier.c | 0 .../src/classifier.h | 0 .../src/coco.c | 0 .../src/col2im.c | 0 .../src/col2im.h | 0 .../src/col2im_kernels.cu | 0 .../src/compare.c | 0 .../src/connected_layer.c | 0 .../src/connected_layer.h | 0 .../src/conv_lstm_layer.c | 0 .../src/conv_lstm_layer.h | 0 .../src/convolutional_kernels.cu | 0 .../src/convolutional_layer.c | 0 .../src/convolutional_layer.h | 0 .../src/cost_layer.c | 0 .../src/cost_layer.h | 0 .../src/cpu_gemm.c | 0 .../src/crnn_layer.c | 0 .../src/crnn_layer.h | 0 .../src/crop_layer.c | 0 .../src/crop_layer.h | 0 .../src/crop_layer_kernels.cu | 0 .../src/csharp/CMakeLists.txt | 0 .../src/csharp/YoloCSharpWrapper.cs | 0 .../src/dark_cuda.c | 0 .../src/dark_cuda.h | 0 .../src/darknet.c | 0 .../src/darkunistd.h | 0 .../src/data.c | 0 .../src/data.h | 0 .../src/deconvolutional_kernels.cu | 0 .../src/deconvolutional_layer.c | 0 .../src/deconvolutional_layer.h | 0 .../src/demo.c | 0 .../src/demo.h | 0 .../src/detection_layer.c | 0 .../src/detection_layer.h | 0 .../src/detector.c | 0 .../src/dice.c | 0 .../src/dropout_layer.c | 0 .../src/dropout_layer.h | 0 .../src/dropout_layer_kernels.cu | 0 .../src/gaussian_yolo_layer.c | 0 .../src/gaussian_yolo_layer.h | 0 .../src/gemm.c | 0 .../src/gemm.h | 0 .../src/getopt.c | 0 .../src/getopt.h | 0 .../src/gettimeofday.c | 0 .../src/gettimeofday.h | 0 .../src/go.c | 0 .../src/gru_layer.c | 0 .../src/gru_layer.h | 0 .../src/http_stream.cpp | 0 .../src/http_stream.h | 0 .../src/httplib.h | 0 .../src/im2col.c | 0 .../src/im2col.h | 0 .../src/im2col_kernels.cu | 0 .../src/image.c | 0 .../src/image.h | 0 .../src/image_opencv.cpp | 0 .../src/image_opencv.h | 0 .../src/layer.c | 0 .../src/layer.h | 0 .../src/list.c | 0 .../src/list.h | 0 .../src/local_layer.c | 0 .../src/local_layer.h | 0 .../src/lstm_layer.c | 0 .../src/lstm_layer.h | 0 .../src/matrix.c | 0 .../src/matrix.h | 0 .../src/maxpool_layer.c | 0 .../src/maxpool_layer.h | 0 .../src/maxpool_layer_kernels.cu | 0 .../src/network.c | 0 .../src/network.h | 0 .../src/network_kernels.cu | 0 .../src/nightmare.c | 0 .../src/normalization_layer.c | 0 .../src/normalization_layer.h | 0 .../src/option_list.c | 0 .../src/option_list.h | 0 .../src/parser.c | 0 .../src/parser.h | 0 .../src/region_layer.c | 0 .../src/region_layer.h | 0 .../src/reorg_layer.c | 0 .../src/reorg_layer.h | 0 .../src/reorg_old_layer.c | 0 .../src/reorg_old_layer.h | 0 .../src/representation_layer.c | 0 .../src/representation_layer.h | 0 .../src/rnn.c | 0 .../src/rnn_layer.c | 0 .../src/rnn_layer.h | 0 .../src/rnn_vid.c | 0 .../src/route_layer.c | 0 .../src/route_layer.h | 0 .../src/sam_layer.c | 0 .../src/sam_layer.h | 0 .../src/scale_channels_layer.c | 0 .../src/scale_channels_layer.h | 0 .../src/shortcut_layer.c | 0 .../src/shortcut_layer.h | 0 .../src/softmax_layer.c | 0 .../src/softmax_layer.h | 0 .../src/super.c | 0 .../src/swag.c | 0 .../src/tag.c | 0 .../src/tree.c | 0 .../src/tree.h | 0 .../src/upsample_layer.c | 0 .../src/upsample_layer.h | 0 .../src/utils.c | 0 .../src/utils.h | 0 .../src/version.h | 0 .../src/version.h.in | 0 .../src/voxel.c | 0 .../src/writing.c | 0 .../src/yolo.c | 0 .../src/yolo_console_dll.cpp | 0 .../src/yolo_layer.c | 0 .../src/yolo_layer.h | 0 .../src/yolo_v2_class.cpp | 0 det-yolov4-tmi/start.py | 24 ++++++++++++++++++ .../train.sh | 0 .../train_watcher.py | 0 .../train_yolov3.sh | 0 .../training-template.yaml | 2 +- .../video_yolov3.sh | 0 .../video_yolov4.sh | 0 .../warm_up_training.py | 0 347 files changed, 66 insertions(+), 55 deletions(-) delete mode 100644 det-yolov4-mining/Dockerfile delete mode 100644 det-yolov4-mining/cuda112.dockerfile rename {det-yolov4-training => det-yolov4-tmi}/.circleci/config.yml (100%) rename {det-yolov4-training => det-yolov4-tmi}/.travis.yml (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/bin/pthreadGC2.dll (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/bin/pthreadVC2.dll (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/include/pthread.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/include/sched.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/include/semaphore.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/lib/libpthreadGC2.a (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/pthreads/lib/pthreadVC2.lib (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/stb/include/stb_image.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/3rdparty/stb/include/stb_image_write.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/CMakeLists.txt (100%) rename {det-yolov4-training => det-yolov4-tmi}/DarknetConfig.cmake.in (100%) rename {det-yolov4-training => det-yolov4-tmi}/LICENSE (100%) rename {det-yolov4-training => det-yolov4-tmi}/Makefile (100%) rename {det-yolov4-training => det-yolov4-tmi}/README.md (100%) rename {det-yolov4-training => det-yolov4-tmi}/build.ps1 (100%) rename {det-yolov4-training => det-yolov4-tmi}/calc_map.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/9k.labels (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/9k.names (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/9k.tree (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/Gaussian_yolov3_BDD.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/alexnet.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cd53paspp-gamma.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cifar.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cifar.test.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/coco.data (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/coco.names (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/coco9k.map (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/combine9k.data (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/crnn.train.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/csdarknet53-omega.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cspx-p7-mish-omega.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cspx-p7-mish.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/cspx-p7-mish_hp.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/csresnext50-panet-spp-original-optimal.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/csresnext50-panet-spp.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/darknet.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/darknet19.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/darknet19_448.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/darknet53.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/darknet53_448_xnor.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/densenet201.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/efficientnet-lite3.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/efficientnet_b0.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/enet-coco.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/extraction.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/extraction.conv.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/extraction22k.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/go.test.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/gru.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/imagenet.labels.list (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/imagenet.shortnames.list (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/imagenet1k.data (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/imagenet22k.dataset (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/imagenet9k.hierarchy.dataset (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/inet9k.map (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/jnet-conv.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/lstm.train.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/openimages.data (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/resnet101.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/resnet152.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/resnet152_trident.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/resnet50.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/resnext152-32x4d.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/rnn.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/rnn.train.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/strided.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/t1.test.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/tiny-yolo-voc.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/tiny-yolo.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/tiny-yolo_xnor.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/tiny.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/vgg-16.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/vgg-conv.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/voc.data (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/writing.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolo-voc.2.0.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolo-voc.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolo.2.0.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolo.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolo9000.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/tiny-coco.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/tiny-yolo.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/xyolo.test.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/yolo-coco.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/yolo-small.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/yolo.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/yolo.train.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov1/yolo2.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov2-tiny-voc.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov2-tiny.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov2-voc.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov2.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-openimages.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-spp.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny-prn.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny_3l.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny_obj.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny_occlusion_track.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-tiny_xnor.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-voc.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3-voc.yolov3-giou-40.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3.coco-giou-12.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov3_5l.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-csp-swish.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-csp-x-swish-frozen.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-csp-x-swish.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-csp.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-custom.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-p5-frozen.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-p5.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-p6.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-tiny-3l.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-tiny-custom.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-tiny.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4-tiny_contrastive.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4_iter1000.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cfg/yolov4x-mish.cfg (100%) rename {det-yolov4-training => det-yolov4-tmi}/cmake/Modules/FindCUDNN.cmake (100%) rename {det-yolov4-training => det-yolov4-tmi}/cmake/Modules/FindPThreads4W.cmake (100%) rename {det-yolov4-training => det-yolov4-tmi}/cmake/Modules/FindStb.cmake (100%) rename {det-yolov4-training => det-yolov4-tmi}/config_and_train.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/convert_label_ark2txt.py (97%) rename {det-yolov4-training => det-yolov4-tmi}/convert_model_darknet2mxnet_yolov4.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/counters_per_class.txt (100%) rename det-yolov4-training/Dockerfile => det-yolov4-tmi/cuda101.dockerfile (82%) rename {det-yolov4-training => det-yolov4-tmi}/cuda112.dockerfile (82%) rename {det-yolov4-training => det-yolov4-tmi}/darknet.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/darknet_images.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/darknet_video.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/9k.tree (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/coco.names (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/coco9k.map (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/goal.txt (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/imagenet.labels.list (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/imagenet.shortnames.list (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/labels/make_labels.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/openimages.names (100%) rename {det-yolov4-training => det-yolov4-tmi}/data/voc.names (100%) rename {det-yolov4-training => det-yolov4-tmi}/image_yolov3.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/image_yolov4.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/img.txt (100%) rename {det-yolov4-training => det-yolov4-tmi}/include/darknet.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/include/yolo_v2_class.hpp (100%) rename {det-yolov4-training => det-yolov4-tmi}/json_mjpeg_streams.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/make_train_test_darknet.sh (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/.dockerignore (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/README.md (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/apis/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/apis/al_api.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/apis/docker_api.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/dataset/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/dataset/datareader.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/dataset/labeled_dataset.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/dataset/unlabeled_dataset.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/model_inference/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/model_inference/centernet.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/model_inference/yolo_models.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/aldd.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/aldd_yolo.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/cald.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/data_augment.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/strategy/random_strategy.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/utils/__init__.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/utils/al_log.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/active_learning/utils/operator.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/al_main.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/combined_class.txt (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/docker_main.py (88%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/docker_readme.md (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/infer-template.yaml (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/mining-template.yaml (95%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/monitor_process.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/start.sh (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/test_api.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/test_centernet.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/tools/al_strategsy_union.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/tools/imagenet_hard_negative.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/tools/plot_dataset_class_hist.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/tools/visualize_aldd.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/tools/visualize_cald.py (100%) rename {det-yolov4-mining => det-yolov4-tmi/mining}/write_result.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/net_cam_v3.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/net_cam_v4.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/.editorconfig (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/activation_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/activation_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/activation_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/activations.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/activations.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/art.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/avgpool_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/avgpool_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/avgpool_layer_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/batchnorm_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/batchnorm_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/blas.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/blas.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/blas_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/box.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/box.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/captcha.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/cifar.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/classifier.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/classifier.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/coco.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/col2im.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/col2im.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/col2im_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/compare.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/connected_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/connected_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/conv_lstm_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/conv_lstm_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/convolutional_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/convolutional_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/convolutional_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/cost_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/cost_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/cpu_gemm.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/crnn_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/crnn_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/crop_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/crop_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/crop_layer_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/csharp/CMakeLists.txt (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/csharp/YoloCSharpWrapper.cs (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dark_cuda.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dark_cuda.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/darknet.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/darkunistd.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/data.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/data.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/deconvolutional_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/deconvolutional_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/deconvolutional_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/demo.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/demo.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/detection_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/detection_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/detector.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dice.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dropout_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dropout_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/dropout_layer_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gaussian_yolo_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gaussian_yolo_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gemm.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gemm.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/getopt.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/getopt.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gettimeofday.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gettimeofday.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/go.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gru_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/gru_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/http_stream.cpp (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/http_stream.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/httplib.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/im2col.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/im2col.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/im2col_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/image.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/image.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/image_opencv.cpp (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/image_opencv.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/list.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/list.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/local_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/local_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/lstm_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/lstm_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/matrix.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/matrix.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/maxpool_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/maxpool_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/maxpool_layer_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/network.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/network.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/network_kernels.cu (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/nightmare.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/normalization_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/normalization_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/option_list.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/option_list.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/parser.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/parser.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/region_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/region_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/reorg_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/reorg_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/reorg_old_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/reorg_old_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/representation_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/representation_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/rnn.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/rnn_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/rnn_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/rnn_vid.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/route_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/route_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/sam_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/sam_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/scale_channels_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/scale_channels_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/shortcut_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/shortcut_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/softmax_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/softmax_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/super.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/swag.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/tag.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/tree.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/tree.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/upsample_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/upsample_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/utils.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/utils.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/version.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/version.h.in (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/voxel.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/writing.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/yolo.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/yolo_console_dll.cpp (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/yolo_layer.c (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/yolo_layer.h (100%) rename {det-yolov4-training => det-yolov4-tmi}/src/yolo_v2_class.cpp (100%) create mode 100644 det-yolov4-tmi/start.py rename {det-yolov4-training => det-yolov4-tmi}/train.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/train_watcher.py (100%) rename {det-yolov4-training => det-yolov4-tmi}/train_yolov3.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/training-template.yaml (96%) rename {det-yolov4-training => det-yolov4-tmi}/video_yolov3.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/video_yolov4.sh (100%) rename {det-yolov4-training => det-yolov4-tmi}/warm_up_training.py (100%) diff --git a/README.MD b/README.MD index 5b8bc04..b03b375 100644 --- a/README.MD +++ b/README.MD @@ -4,11 +4,22 @@ ## ymir-1.1.0 official image -- yolov4 +- [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) -- yolov5 + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu111-tmi + ``` + +- [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) -- mmdetection + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi + ``` + +- [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi + ``` - [detectron2](https://github.com/yzbx/ymir-detectron2) @@ -23,6 +34,7 @@ - [yolov7](https://github.com/yzbx/ymir-yolov7) - [change log](https://github.com/yzbx/ymir-yolov7/blob/main/ymir/README.md) + ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` diff --git a/det-yolov4-mining/Dockerfile b/det-yolov4-mining/Dockerfile deleted file mode 100644 index 4305760..0000000 --- a/det-yolov4-mining/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM industryessentials/mxnet_python:1.5.0_gpu_cu101mkl_py3_ub18 - -RUN sed -i '/developer\.download\.nvidia\.com\/compute\/cuda\/repos/d' /etc/apt/sources.list.d/* \ - && sed -i '/developer\.download\.nvidia\.com\/compute\/machine-learning\/repos/d' /etc/apt/sources.list.d/* \ - && apt-key del 7fa2af80 \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \ - && dpkg -i cuda-keyring_1.0-1_all.deb -RUN apt-get update && apt-get install -y --no-install-recommends libsm6 libxext6 libfontconfig1 libxrender1 libgl1-mesa-glx \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -RUN pip3 install --upgrade pip setuptools wheel && pip3 install opencv-python pyyaml scipy tqdm && rm -rf /root/.cache/pip3 - -COPY . /app -WORKDIR /app -RUN cp ./start.sh /usr/bin/start.sh && \ - mkdir -p /img-man && \ - cp ./mining-template.yaml /img-man/mining-template.yaml && \ - cp ./infer-template.yaml /img-man/infer-template.yaml && \ - cp ./README.md /img-man/readme.md -CMD sh /usr/bin/start.sh diff --git a/det-yolov4-mining/cuda112.dockerfile b/det-yolov4-mining/cuda112.dockerfile deleted file mode 100644 index 871b00f..0000000 --- a/det-yolov4-mining/cuda112.dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM industryessentials/ymir-executor:cuda112-yolov4-training - -RUN apt-get update && apt-get install -y --no-install-recommends libsm6 libxext6 libfontconfig1 libxrender1 libgl1-mesa-glx \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -RUN pip3 install --upgrade pip setuptools wheel && pip3 install opencv-python pyyaml scipy tqdm && rm -rf /root/.cache/pip3 - -COPY . /app -WORKDIR /app -RUN cp ./start.sh /usr/bin/start.sh && \ - mkdir -p /img-man && \ - cp ./mining-template.yaml /img-man/mining-template.yaml && \ - cp ./infer-template.yaml /img-man/infer-template.yaml && \ - cp ./README.md /img-man/readme.md -CMD sh /usr/bin/start.sh diff --git a/det-yolov4-training/.circleci/config.yml b/det-yolov4-tmi/.circleci/config.yml similarity index 100% rename from det-yolov4-training/.circleci/config.yml rename to det-yolov4-tmi/.circleci/config.yml diff --git a/det-yolov4-training/.travis.yml b/det-yolov4-tmi/.travis.yml similarity index 100% rename from det-yolov4-training/.travis.yml rename to det-yolov4-tmi/.travis.yml diff --git a/det-yolov4-training/3rdparty/pthreads/bin/pthreadGC2.dll b/det-yolov4-tmi/3rdparty/pthreads/bin/pthreadGC2.dll similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/bin/pthreadGC2.dll rename to det-yolov4-tmi/3rdparty/pthreads/bin/pthreadGC2.dll diff --git a/det-yolov4-training/3rdparty/pthreads/bin/pthreadVC2.dll b/det-yolov4-tmi/3rdparty/pthreads/bin/pthreadVC2.dll similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/bin/pthreadVC2.dll rename to det-yolov4-tmi/3rdparty/pthreads/bin/pthreadVC2.dll diff --git a/det-yolov4-training/3rdparty/pthreads/include/pthread.h b/det-yolov4-tmi/3rdparty/pthreads/include/pthread.h similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/include/pthread.h rename to det-yolov4-tmi/3rdparty/pthreads/include/pthread.h diff --git a/det-yolov4-training/3rdparty/pthreads/include/sched.h b/det-yolov4-tmi/3rdparty/pthreads/include/sched.h similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/include/sched.h rename to det-yolov4-tmi/3rdparty/pthreads/include/sched.h diff --git a/det-yolov4-training/3rdparty/pthreads/include/semaphore.h b/det-yolov4-tmi/3rdparty/pthreads/include/semaphore.h similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/include/semaphore.h rename to det-yolov4-tmi/3rdparty/pthreads/include/semaphore.h diff --git a/det-yolov4-training/3rdparty/pthreads/lib/libpthreadGC2.a b/det-yolov4-tmi/3rdparty/pthreads/lib/libpthreadGC2.a similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/lib/libpthreadGC2.a rename to det-yolov4-tmi/3rdparty/pthreads/lib/libpthreadGC2.a diff --git a/det-yolov4-training/3rdparty/pthreads/lib/pthreadVC2.lib b/det-yolov4-tmi/3rdparty/pthreads/lib/pthreadVC2.lib similarity index 100% rename from det-yolov4-training/3rdparty/pthreads/lib/pthreadVC2.lib rename to det-yolov4-tmi/3rdparty/pthreads/lib/pthreadVC2.lib diff --git a/det-yolov4-training/3rdparty/stb/include/stb_image.h b/det-yolov4-tmi/3rdparty/stb/include/stb_image.h similarity index 100% rename from det-yolov4-training/3rdparty/stb/include/stb_image.h rename to det-yolov4-tmi/3rdparty/stb/include/stb_image.h diff --git a/det-yolov4-training/3rdparty/stb/include/stb_image_write.h b/det-yolov4-tmi/3rdparty/stb/include/stb_image_write.h similarity index 100% rename from det-yolov4-training/3rdparty/stb/include/stb_image_write.h rename to det-yolov4-tmi/3rdparty/stb/include/stb_image_write.h diff --git a/det-yolov4-training/CMakeLists.txt b/det-yolov4-tmi/CMakeLists.txt similarity index 100% rename from det-yolov4-training/CMakeLists.txt rename to det-yolov4-tmi/CMakeLists.txt diff --git a/det-yolov4-training/DarknetConfig.cmake.in b/det-yolov4-tmi/DarknetConfig.cmake.in similarity index 100% rename from det-yolov4-training/DarknetConfig.cmake.in rename to det-yolov4-tmi/DarknetConfig.cmake.in diff --git a/det-yolov4-training/LICENSE b/det-yolov4-tmi/LICENSE similarity index 100% rename from det-yolov4-training/LICENSE rename to det-yolov4-tmi/LICENSE diff --git a/det-yolov4-training/Makefile b/det-yolov4-tmi/Makefile similarity index 100% rename from det-yolov4-training/Makefile rename to det-yolov4-tmi/Makefile diff --git a/det-yolov4-training/README.md b/det-yolov4-tmi/README.md similarity index 100% rename from det-yolov4-training/README.md rename to det-yolov4-tmi/README.md diff --git a/det-yolov4-training/build.ps1 b/det-yolov4-tmi/build.ps1 similarity index 100% rename from det-yolov4-training/build.ps1 rename to det-yolov4-tmi/build.ps1 diff --git a/det-yolov4-training/calc_map.sh b/det-yolov4-tmi/calc_map.sh similarity index 100% rename from det-yolov4-training/calc_map.sh rename to det-yolov4-tmi/calc_map.sh diff --git a/det-yolov4-training/cfg/9k.labels b/det-yolov4-tmi/cfg/9k.labels similarity index 100% rename from det-yolov4-training/cfg/9k.labels rename to det-yolov4-tmi/cfg/9k.labels diff --git a/det-yolov4-training/cfg/9k.names b/det-yolov4-tmi/cfg/9k.names similarity index 100% rename from det-yolov4-training/cfg/9k.names rename to det-yolov4-tmi/cfg/9k.names diff --git a/det-yolov4-training/cfg/9k.tree b/det-yolov4-tmi/cfg/9k.tree similarity index 100% rename from det-yolov4-training/cfg/9k.tree rename to det-yolov4-tmi/cfg/9k.tree diff --git a/det-yolov4-training/cfg/Gaussian_yolov3_BDD.cfg b/det-yolov4-tmi/cfg/Gaussian_yolov3_BDD.cfg similarity index 100% rename from det-yolov4-training/cfg/Gaussian_yolov3_BDD.cfg rename to det-yolov4-tmi/cfg/Gaussian_yolov3_BDD.cfg diff --git a/det-yolov4-training/cfg/alexnet.cfg b/det-yolov4-tmi/cfg/alexnet.cfg similarity index 100% rename from det-yolov4-training/cfg/alexnet.cfg rename to det-yolov4-tmi/cfg/alexnet.cfg diff --git a/det-yolov4-training/cfg/cd53paspp-gamma.cfg b/det-yolov4-tmi/cfg/cd53paspp-gamma.cfg similarity index 100% rename from det-yolov4-training/cfg/cd53paspp-gamma.cfg rename to det-yolov4-tmi/cfg/cd53paspp-gamma.cfg diff --git a/det-yolov4-training/cfg/cifar.cfg b/det-yolov4-tmi/cfg/cifar.cfg similarity index 100% rename from det-yolov4-training/cfg/cifar.cfg rename to det-yolov4-tmi/cfg/cifar.cfg diff --git a/det-yolov4-training/cfg/cifar.test.cfg b/det-yolov4-tmi/cfg/cifar.test.cfg similarity index 100% rename from det-yolov4-training/cfg/cifar.test.cfg rename to det-yolov4-tmi/cfg/cifar.test.cfg diff --git a/det-yolov4-training/cfg/coco.data b/det-yolov4-tmi/cfg/coco.data similarity index 100% rename from det-yolov4-training/cfg/coco.data rename to det-yolov4-tmi/cfg/coco.data diff --git a/det-yolov4-training/cfg/coco.names b/det-yolov4-tmi/cfg/coco.names similarity index 100% rename from det-yolov4-training/cfg/coco.names rename to det-yolov4-tmi/cfg/coco.names diff --git a/det-yolov4-training/cfg/coco9k.map b/det-yolov4-tmi/cfg/coco9k.map similarity index 100% rename from det-yolov4-training/cfg/coco9k.map rename to det-yolov4-tmi/cfg/coco9k.map diff --git a/det-yolov4-training/cfg/combine9k.data b/det-yolov4-tmi/cfg/combine9k.data similarity index 100% rename from det-yolov4-training/cfg/combine9k.data rename to det-yolov4-tmi/cfg/combine9k.data diff --git a/det-yolov4-training/cfg/crnn.train.cfg b/det-yolov4-tmi/cfg/crnn.train.cfg similarity index 100% rename from det-yolov4-training/cfg/crnn.train.cfg rename to det-yolov4-tmi/cfg/crnn.train.cfg diff --git a/det-yolov4-training/cfg/csdarknet53-omega.cfg b/det-yolov4-tmi/cfg/csdarknet53-omega.cfg similarity index 100% rename from det-yolov4-training/cfg/csdarknet53-omega.cfg rename to det-yolov4-tmi/cfg/csdarknet53-omega.cfg diff --git a/det-yolov4-training/cfg/cspx-p7-mish-omega.cfg b/det-yolov4-tmi/cfg/cspx-p7-mish-omega.cfg similarity index 100% rename from det-yolov4-training/cfg/cspx-p7-mish-omega.cfg rename to det-yolov4-tmi/cfg/cspx-p7-mish-omega.cfg diff --git a/det-yolov4-training/cfg/cspx-p7-mish.cfg b/det-yolov4-tmi/cfg/cspx-p7-mish.cfg similarity index 100% rename from det-yolov4-training/cfg/cspx-p7-mish.cfg rename to det-yolov4-tmi/cfg/cspx-p7-mish.cfg diff --git a/det-yolov4-training/cfg/cspx-p7-mish_hp.cfg b/det-yolov4-tmi/cfg/cspx-p7-mish_hp.cfg similarity index 100% rename from det-yolov4-training/cfg/cspx-p7-mish_hp.cfg rename to det-yolov4-tmi/cfg/cspx-p7-mish_hp.cfg diff --git a/det-yolov4-training/cfg/csresnext50-panet-spp-original-optimal.cfg b/det-yolov4-tmi/cfg/csresnext50-panet-spp-original-optimal.cfg similarity index 100% rename from det-yolov4-training/cfg/csresnext50-panet-spp-original-optimal.cfg rename to det-yolov4-tmi/cfg/csresnext50-panet-spp-original-optimal.cfg diff --git a/det-yolov4-training/cfg/csresnext50-panet-spp.cfg b/det-yolov4-tmi/cfg/csresnext50-panet-spp.cfg similarity index 100% rename from det-yolov4-training/cfg/csresnext50-panet-spp.cfg rename to det-yolov4-tmi/cfg/csresnext50-panet-spp.cfg diff --git a/det-yolov4-training/cfg/darknet.cfg b/det-yolov4-tmi/cfg/darknet.cfg similarity index 100% rename from det-yolov4-training/cfg/darknet.cfg rename to det-yolov4-tmi/cfg/darknet.cfg diff --git a/det-yolov4-training/cfg/darknet19.cfg b/det-yolov4-tmi/cfg/darknet19.cfg similarity index 100% rename from det-yolov4-training/cfg/darknet19.cfg rename to det-yolov4-tmi/cfg/darknet19.cfg diff --git a/det-yolov4-training/cfg/darknet19_448.cfg b/det-yolov4-tmi/cfg/darknet19_448.cfg similarity index 100% rename from det-yolov4-training/cfg/darknet19_448.cfg rename to det-yolov4-tmi/cfg/darknet19_448.cfg diff --git a/det-yolov4-training/cfg/darknet53.cfg b/det-yolov4-tmi/cfg/darknet53.cfg similarity index 100% rename from det-yolov4-training/cfg/darknet53.cfg rename to det-yolov4-tmi/cfg/darknet53.cfg diff --git a/det-yolov4-training/cfg/darknet53_448_xnor.cfg b/det-yolov4-tmi/cfg/darknet53_448_xnor.cfg similarity index 100% rename from det-yolov4-training/cfg/darknet53_448_xnor.cfg rename to det-yolov4-tmi/cfg/darknet53_448_xnor.cfg diff --git a/det-yolov4-training/cfg/densenet201.cfg b/det-yolov4-tmi/cfg/densenet201.cfg similarity index 100% rename from det-yolov4-training/cfg/densenet201.cfg rename to det-yolov4-tmi/cfg/densenet201.cfg diff --git a/det-yolov4-training/cfg/efficientnet-lite3.cfg b/det-yolov4-tmi/cfg/efficientnet-lite3.cfg similarity index 100% rename from det-yolov4-training/cfg/efficientnet-lite3.cfg rename to det-yolov4-tmi/cfg/efficientnet-lite3.cfg diff --git a/det-yolov4-training/cfg/efficientnet_b0.cfg b/det-yolov4-tmi/cfg/efficientnet_b0.cfg similarity index 100% rename from det-yolov4-training/cfg/efficientnet_b0.cfg rename to det-yolov4-tmi/cfg/efficientnet_b0.cfg diff --git a/det-yolov4-training/cfg/enet-coco.cfg b/det-yolov4-tmi/cfg/enet-coco.cfg similarity index 100% rename from det-yolov4-training/cfg/enet-coco.cfg rename to det-yolov4-tmi/cfg/enet-coco.cfg diff --git a/det-yolov4-training/cfg/extraction.cfg b/det-yolov4-tmi/cfg/extraction.cfg similarity index 100% rename from det-yolov4-training/cfg/extraction.cfg rename to det-yolov4-tmi/cfg/extraction.cfg diff --git a/det-yolov4-training/cfg/extraction.conv.cfg b/det-yolov4-tmi/cfg/extraction.conv.cfg similarity index 100% rename from det-yolov4-training/cfg/extraction.conv.cfg rename to det-yolov4-tmi/cfg/extraction.conv.cfg diff --git a/det-yolov4-training/cfg/extraction22k.cfg b/det-yolov4-tmi/cfg/extraction22k.cfg similarity index 100% rename from det-yolov4-training/cfg/extraction22k.cfg rename to det-yolov4-tmi/cfg/extraction22k.cfg diff --git a/det-yolov4-training/cfg/go.test.cfg b/det-yolov4-tmi/cfg/go.test.cfg similarity index 100% rename from det-yolov4-training/cfg/go.test.cfg rename to det-yolov4-tmi/cfg/go.test.cfg diff --git a/det-yolov4-training/cfg/gru.cfg b/det-yolov4-tmi/cfg/gru.cfg similarity index 100% rename from det-yolov4-training/cfg/gru.cfg rename to det-yolov4-tmi/cfg/gru.cfg diff --git a/det-yolov4-training/cfg/imagenet.labels.list b/det-yolov4-tmi/cfg/imagenet.labels.list similarity index 100% rename from det-yolov4-training/cfg/imagenet.labels.list rename to det-yolov4-tmi/cfg/imagenet.labels.list diff --git a/det-yolov4-training/cfg/imagenet.shortnames.list b/det-yolov4-tmi/cfg/imagenet.shortnames.list similarity index 100% rename from det-yolov4-training/cfg/imagenet.shortnames.list rename to det-yolov4-tmi/cfg/imagenet.shortnames.list diff --git a/det-yolov4-training/cfg/imagenet1k.data b/det-yolov4-tmi/cfg/imagenet1k.data similarity index 100% rename from det-yolov4-training/cfg/imagenet1k.data rename to det-yolov4-tmi/cfg/imagenet1k.data diff --git a/det-yolov4-training/cfg/imagenet22k.dataset b/det-yolov4-tmi/cfg/imagenet22k.dataset similarity index 100% rename from det-yolov4-training/cfg/imagenet22k.dataset rename to det-yolov4-tmi/cfg/imagenet22k.dataset diff --git a/det-yolov4-training/cfg/imagenet9k.hierarchy.dataset b/det-yolov4-tmi/cfg/imagenet9k.hierarchy.dataset similarity index 100% rename from det-yolov4-training/cfg/imagenet9k.hierarchy.dataset rename to det-yolov4-tmi/cfg/imagenet9k.hierarchy.dataset diff --git a/det-yolov4-training/cfg/inet9k.map b/det-yolov4-tmi/cfg/inet9k.map similarity index 100% rename from det-yolov4-training/cfg/inet9k.map rename to det-yolov4-tmi/cfg/inet9k.map diff --git a/det-yolov4-training/cfg/jnet-conv.cfg b/det-yolov4-tmi/cfg/jnet-conv.cfg similarity index 100% rename from det-yolov4-training/cfg/jnet-conv.cfg rename to det-yolov4-tmi/cfg/jnet-conv.cfg diff --git a/det-yolov4-training/cfg/lstm.train.cfg b/det-yolov4-tmi/cfg/lstm.train.cfg similarity index 100% rename from det-yolov4-training/cfg/lstm.train.cfg rename to det-yolov4-tmi/cfg/lstm.train.cfg diff --git a/det-yolov4-training/cfg/openimages.data b/det-yolov4-tmi/cfg/openimages.data similarity index 100% rename from det-yolov4-training/cfg/openimages.data rename to det-yolov4-tmi/cfg/openimages.data diff --git a/det-yolov4-training/cfg/resnet101.cfg b/det-yolov4-tmi/cfg/resnet101.cfg similarity index 100% rename from det-yolov4-training/cfg/resnet101.cfg rename to det-yolov4-tmi/cfg/resnet101.cfg diff --git a/det-yolov4-training/cfg/resnet152.cfg b/det-yolov4-tmi/cfg/resnet152.cfg similarity index 100% rename from det-yolov4-training/cfg/resnet152.cfg rename to det-yolov4-tmi/cfg/resnet152.cfg diff --git a/det-yolov4-training/cfg/resnet152_trident.cfg b/det-yolov4-tmi/cfg/resnet152_trident.cfg similarity index 100% rename from det-yolov4-training/cfg/resnet152_trident.cfg rename to det-yolov4-tmi/cfg/resnet152_trident.cfg diff --git a/det-yolov4-training/cfg/resnet50.cfg b/det-yolov4-tmi/cfg/resnet50.cfg similarity index 100% rename from det-yolov4-training/cfg/resnet50.cfg rename to det-yolov4-tmi/cfg/resnet50.cfg diff --git a/det-yolov4-training/cfg/resnext152-32x4d.cfg b/det-yolov4-tmi/cfg/resnext152-32x4d.cfg similarity index 100% rename from det-yolov4-training/cfg/resnext152-32x4d.cfg rename to det-yolov4-tmi/cfg/resnext152-32x4d.cfg diff --git a/det-yolov4-training/cfg/rnn.cfg b/det-yolov4-tmi/cfg/rnn.cfg similarity index 100% rename from det-yolov4-training/cfg/rnn.cfg rename to det-yolov4-tmi/cfg/rnn.cfg diff --git a/det-yolov4-training/cfg/rnn.train.cfg b/det-yolov4-tmi/cfg/rnn.train.cfg similarity index 100% rename from det-yolov4-training/cfg/rnn.train.cfg rename to det-yolov4-tmi/cfg/rnn.train.cfg diff --git a/det-yolov4-training/cfg/strided.cfg b/det-yolov4-tmi/cfg/strided.cfg similarity index 100% rename from det-yolov4-training/cfg/strided.cfg rename to det-yolov4-tmi/cfg/strided.cfg diff --git a/det-yolov4-training/cfg/t1.test.cfg b/det-yolov4-tmi/cfg/t1.test.cfg similarity index 100% rename from det-yolov4-training/cfg/t1.test.cfg rename to det-yolov4-tmi/cfg/t1.test.cfg diff --git a/det-yolov4-training/cfg/tiny-yolo-voc.cfg b/det-yolov4-tmi/cfg/tiny-yolo-voc.cfg similarity index 100% rename from det-yolov4-training/cfg/tiny-yolo-voc.cfg rename to det-yolov4-tmi/cfg/tiny-yolo-voc.cfg diff --git a/det-yolov4-training/cfg/tiny-yolo.cfg b/det-yolov4-tmi/cfg/tiny-yolo.cfg similarity index 100% rename from det-yolov4-training/cfg/tiny-yolo.cfg rename to det-yolov4-tmi/cfg/tiny-yolo.cfg diff --git a/det-yolov4-training/cfg/tiny-yolo_xnor.cfg b/det-yolov4-tmi/cfg/tiny-yolo_xnor.cfg similarity index 100% rename from det-yolov4-training/cfg/tiny-yolo_xnor.cfg rename to det-yolov4-tmi/cfg/tiny-yolo_xnor.cfg diff --git a/det-yolov4-training/cfg/tiny.cfg b/det-yolov4-tmi/cfg/tiny.cfg similarity index 100% rename from det-yolov4-training/cfg/tiny.cfg rename to det-yolov4-tmi/cfg/tiny.cfg diff --git a/det-yolov4-training/cfg/vgg-16.cfg b/det-yolov4-tmi/cfg/vgg-16.cfg similarity index 100% rename from det-yolov4-training/cfg/vgg-16.cfg rename to det-yolov4-tmi/cfg/vgg-16.cfg diff --git a/det-yolov4-training/cfg/vgg-conv.cfg b/det-yolov4-tmi/cfg/vgg-conv.cfg similarity index 100% rename from det-yolov4-training/cfg/vgg-conv.cfg rename to det-yolov4-tmi/cfg/vgg-conv.cfg diff --git a/det-yolov4-training/cfg/voc.data b/det-yolov4-tmi/cfg/voc.data similarity index 100% rename from det-yolov4-training/cfg/voc.data rename to det-yolov4-tmi/cfg/voc.data diff --git a/det-yolov4-training/cfg/writing.cfg b/det-yolov4-tmi/cfg/writing.cfg similarity index 100% rename from det-yolov4-training/cfg/writing.cfg rename to det-yolov4-tmi/cfg/writing.cfg diff --git a/det-yolov4-training/cfg/yolo-voc.2.0.cfg b/det-yolov4-tmi/cfg/yolo-voc.2.0.cfg similarity index 100% rename from det-yolov4-training/cfg/yolo-voc.2.0.cfg rename to det-yolov4-tmi/cfg/yolo-voc.2.0.cfg diff --git a/det-yolov4-training/cfg/yolo-voc.cfg b/det-yolov4-tmi/cfg/yolo-voc.cfg similarity index 100% rename from det-yolov4-training/cfg/yolo-voc.cfg rename to det-yolov4-tmi/cfg/yolo-voc.cfg diff --git a/det-yolov4-training/cfg/yolo.2.0.cfg b/det-yolov4-tmi/cfg/yolo.2.0.cfg similarity index 100% rename from det-yolov4-training/cfg/yolo.2.0.cfg rename to det-yolov4-tmi/cfg/yolo.2.0.cfg diff --git a/det-yolov4-training/cfg/yolo.cfg b/det-yolov4-tmi/cfg/yolo.cfg similarity index 100% rename from det-yolov4-training/cfg/yolo.cfg rename to det-yolov4-tmi/cfg/yolo.cfg diff --git a/det-yolov4-training/cfg/yolo9000.cfg b/det-yolov4-tmi/cfg/yolo9000.cfg similarity index 100% rename from det-yolov4-training/cfg/yolo9000.cfg rename to det-yolov4-tmi/cfg/yolo9000.cfg diff --git a/det-yolov4-training/cfg/yolov1/tiny-coco.cfg b/det-yolov4-tmi/cfg/yolov1/tiny-coco.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/tiny-coco.cfg rename to det-yolov4-tmi/cfg/yolov1/tiny-coco.cfg diff --git a/det-yolov4-training/cfg/yolov1/tiny-yolo.cfg b/det-yolov4-tmi/cfg/yolov1/tiny-yolo.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/tiny-yolo.cfg rename to det-yolov4-tmi/cfg/yolov1/tiny-yolo.cfg diff --git a/det-yolov4-training/cfg/yolov1/xyolo.test.cfg b/det-yolov4-tmi/cfg/yolov1/xyolo.test.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/xyolo.test.cfg rename to det-yolov4-tmi/cfg/yolov1/xyolo.test.cfg diff --git a/det-yolov4-training/cfg/yolov1/yolo-coco.cfg b/det-yolov4-tmi/cfg/yolov1/yolo-coco.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/yolo-coco.cfg rename to det-yolov4-tmi/cfg/yolov1/yolo-coco.cfg diff --git a/det-yolov4-training/cfg/yolov1/yolo-small.cfg b/det-yolov4-tmi/cfg/yolov1/yolo-small.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/yolo-small.cfg rename to det-yolov4-tmi/cfg/yolov1/yolo-small.cfg diff --git a/det-yolov4-training/cfg/yolov1/yolo.cfg b/det-yolov4-tmi/cfg/yolov1/yolo.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/yolo.cfg rename to det-yolov4-tmi/cfg/yolov1/yolo.cfg diff --git a/det-yolov4-training/cfg/yolov1/yolo.train.cfg b/det-yolov4-tmi/cfg/yolov1/yolo.train.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/yolo.train.cfg rename to det-yolov4-tmi/cfg/yolov1/yolo.train.cfg diff --git a/det-yolov4-training/cfg/yolov1/yolo2.cfg b/det-yolov4-tmi/cfg/yolov1/yolo2.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov1/yolo2.cfg rename to det-yolov4-tmi/cfg/yolov1/yolo2.cfg diff --git a/det-yolov4-training/cfg/yolov2-tiny-voc.cfg b/det-yolov4-tmi/cfg/yolov2-tiny-voc.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov2-tiny-voc.cfg rename to det-yolov4-tmi/cfg/yolov2-tiny-voc.cfg diff --git a/det-yolov4-training/cfg/yolov2-tiny.cfg b/det-yolov4-tmi/cfg/yolov2-tiny.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov2-tiny.cfg rename to det-yolov4-tmi/cfg/yolov2-tiny.cfg diff --git a/det-yolov4-training/cfg/yolov2-voc.cfg b/det-yolov4-tmi/cfg/yolov2-voc.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov2-voc.cfg rename to det-yolov4-tmi/cfg/yolov2-voc.cfg diff --git a/det-yolov4-training/cfg/yolov2.cfg b/det-yolov4-tmi/cfg/yolov2.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov2.cfg rename to det-yolov4-tmi/cfg/yolov2.cfg diff --git a/det-yolov4-training/cfg/yolov3-openimages.cfg b/det-yolov4-tmi/cfg/yolov3-openimages.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-openimages.cfg rename to det-yolov4-tmi/cfg/yolov3-openimages.cfg diff --git a/det-yolov4-training/cfg/yolov3-spp.cfg b/det-yolov4-tmi/cfg/yolov3-spp.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-spp.cfg rename to det-yolov4-tmi/cfg/yolov3-spp.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny-prn.cfg b/det-yolov4-tmi/cfg/yolov3-tiny-prn.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny-prn.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny-prn.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny.cfg b/det-yolov4-tmi/cfg/yolov3-tiny.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny_3l.cfg b/det-yolov4-tmi/cfg/yolov3-tiny_3l.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny_3l.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny_3l.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny_obj.cfg b/det-yolov4-tmi/cfg/yolov3-tiny_obj.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny_obj.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny_obj.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny_occlusion_track.cfg b/det-yolov4-tmi/cfg/yolov3-tiny_occlusion_track.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny_occlusion_track.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny_occlusion_track.cfg diff --git a/det-yolov4-training/cfg/yolov3-tiny_xnor.cfg b/det-yolov4-tmi/cfg/yolov3-tiny_xnor.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-tiny_xnor.cfg rename to det-yolov4-tmi/cfg/yolov3-tiny_xnor.cfg diff --git a/det-yolov4-training/cfg/yolov3-voc.cfg b/det-yolov4-tmi/cfg/yolov3-voc.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-voc.cfg rename to det-yolov4-tmi/cfg/yolov3-voc.cfg diff --git a/det-yolov4-training/cfg/yolov3-voc.yolov3-giou-40.cfg b/det-yolov4-tmi/cfg/yolov3-voc.yolov3-giou-40.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3-voc.yolov3-giou-40.cfg rename to det-yolov4-tmi/cfg/yolov3-voc.yolov3-giou-40.cfg diff --git a/det-yolov4-training/cfg/yolov3.cfg b/det-yolov4-tmi/cfg/yolov3.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3.cfg rename to det-yolov4-tmi/cfg/yolov3.cfg diff --git a/det-yolov4-training/cfg/yolov3.coco-giou-12.cfg b/det-yolov4-tmi/cfg/yolov3.coco-giou-12.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3.coco-giou-12.cfg rename to det-yolov4-tmi/cfg/yolov3.coco-giou-12.cfg diff --git a/det-yolov4-training/cfg/yolov3_5l.cfg b/det-yolov4-tmi/cfg/yolov3_5l.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov3_5l.cfg rename to det-yolov4-tmi/cfg/yolov3_5l.cfg diff --git a/det-yolov4-training/cfg/yolov4-csp-swish.cfg b/det-yolov4-tmi/cfg/yolov4-csp-swish.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-csp-swish.cfg rename to det-yolov4-tmi/cfg/yolov4-csp-swish.cfg diff --git a/det-yolov4-training/cfg/yolov4-csp-x-swish-frozen.cfg b/det-yolov4-tmi/cfg/yolov4-csp-x-swish-frozen.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-csp-x-swish-frozen.cfg rename to det-yolov4-tmi/cfg/yolov4-csp-x-swish-frozen.cfg diff --git a/det-yolov4-training/cfg/yolov4-csp-x-swish.cfg b/det-yolov4-tmi/cfg/yolov4-csp-x-swish.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-csp-x-swish.cfg rename to det-yolov4-tmi/cfg/yolov4-csp-x-swish.cfg diff --git a/det-yolov4-training/cfg/yolov4-csp.cfg b/det-yolov4-tmi/cfg/yolov4-csp.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-csp.cfg rename to det-yolov4-tmi/cfg/yolov4-csp.cfg diff --git a/det-yolov4-training/cfg/yolov4-custom.cfg b/det-yolov4-tmi/cfg/yolov4-custom.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-custom.cfg rename to det-yolov4-tmi/cfg/yolov4-custom.cfg diff --git a/det-yolov4-training/cfg/yolov4-p5-frozen.cfg b/det-yolov4-tmi/cfg/yolov4-p5-frozen.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-p5-frozen.cfg rename to det-yolov4-tmi/cfg/yolov4-p5-frozen.cfg diff --git a/det-yolov4-training/cfg/yolov4-p5.cfg b/det-yolov4-tmi/cfg/yolov4-p5.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-p5.cfg rename to det-yolov4-tmi/cfg/yolov4-p5.cfg diff --git a/det-yolov4-training/cfg/yolov4-p6.cfg b/det-yolov4-tmi/cfg/yolov4-p6.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-p6.cfg rename to det-yolov4-tmi/cfg/yolov4-p6.cfg diff --git a/det-yolov4-training/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg b/det-yolov4-tmi/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg rename to det-yolov4-tmi/cfg/yolov4-sam-mish-csp-reorg-bfm.cfg diff --git a/det-yolov4-training/cfg/yolov4-tiny-3l.cfg b/det-yolov4-tmi/cfg/yolov4-tiny-3l.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-tiny-3l.cfg rename to det-yolov4-tmi/cfg/yolov4-tiny-3l.cfg diff --git a/det-yolov4-training/cfg/yolov4-tiny-custom.cfg b/det-yolov4-tmi/cfg/yolov4-tiny-custom.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-tiny-custom.cfg rename to det-yolov4-tmi/cfg/yolov4-tiny-custom.cfg diff --git a/det-yolov4-training/cfg/yolov4-tiny.cfg b/det-yolov4-tmi/cfg/yolov4-tiny.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-tiny.cfg rename to det-yolov4-tmi/cfg/yolov4-tiny.cfg diff --git a/det-yolov4-training/cfg/yolov4-tiny_contrastive.cfg b/det-yolov4-tmi/cfg/yolov4-tiny_contrastive.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4-tiny_contrastive.cfg rename to det-yolov4-tmi/cfg/yolov4-tiny_contrastive.cfg diff --git a/det-yolov4-training/cfg/yolov4.cfg b/det-yolov4-tmi/cfg/yolov4.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4.cfg rename to det-yolov4-tmi/cfg/yolov4.cfg diff --git a/det-yolov4-training/cfg/yolov4_iter1000.cfg b/det-yolov4-tmi/cfg/yolov4_iter1000.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4_iter1000.cfg rename to det-yolov4-tmi/cfg/yolov4_iter1000.cfg diff --git a/det-yolov4-training/cfg/yolov4x-mish.cfg b/det-yolov4-tmi/cfg/yolov4x-mish.cfg similarity index 100% rename from det-yolov4-training/cfg/yolov4x-mish.cfg rename to det-yolov4-tmi/cfg/yolov4x-mish.cfg diff --git a/det-yolov4-training/cmake/Modules/FindCUDNN.cmake b/det-yolov4-tmi/cmake/Modules/FindCUDNN.cmake similarity index 100% rename from det-yolov4-training/cmake/Modules/FindCUDNN.cmake rename to det-yolov4-tmi/cmake/Modules/FindCUDNN.cmake diff --git a/det-yolov4-training/cmake/Modules/FindPThreads4W.cmake b/det-yolov4-tmi/cmake/Modules/FindPThreads4W.cmake similarity index 100% rename from det-yolov4-training/cmake/Modules/FindPThreads4W.cmake rename to det-yolov4-tmi/cmake/Modules/FindPThreads4W.cmake diff --git a/det-yolov4-training/cmake/Modules/FindStb.cmake b/det-yolov4-tmi/cmake/Modules/FindStb.cmake similarity index 100% rename from det-yolov4-training/cmake/Modules/FindStb.cmake rename to det-yolov4-tmi/cmake/Modules/FindStb.cmake diff --git a/det-yolov4-training/config_and_train.py b/det-yolov4-tmi/config_and_train.py similarity index 100% rename from det-yolov4-training/config_and_train.py rename to det-yolov4-tmi/config_and_train.py diff --git a/det-yolov4-training/convert_label_ark2txt.py b/det-yolov4-tmi/convert_label_ark2txt.py similarity index 97% rename from det-yolov4-training/convert_label_ark2txt.py rename to det-yolov4-tmi/convert_label_ark2txt.py index 1043b53..ae54b63 100755 --- a/det-yolov4-training/convert_label_ark2txt.py +++ b/det-yolov4-tmi/convert_label_ark2txt.py @@ -21,9 +21,10 @@ def _convert_annotations(index_file_path: str, dst_annotations_dir: str) -> None files = f.readlines() files = [each.strip() for each in files] + N = len(files) for i, each_img_anno_path in enumerate(files): if i % 1000 == 0: - print(f"converted {i} image annotations") + print(f"converted {i}/{N} image annotations") # each_imgpath: asset path # each_txtfile: annotation path diff --git a/det-yolov4-training/convert_model_darknet2mxnet_yolov4.py b/det-yolov4-tmi/convert_model_darknet2mxnet_yolov4.py similarity index 100% rename from det-yolov4-training/convert_model_darknet2mxnet_yolov4.py rename to det-yolov4-tmi/convert_model_darknet2mxnet_yolov4.py diff --git a/det-yolov4-training/counters_per_class.txt b/det-yolov4-tmi/counters_per_class.txt similarity index 100% rename from det-yolov4-training/counters_per_class.txt rename to det-yolov4-tmi/counters_per_class.txt diff --git a/det-yolov4-training/Dockerfile b/det-yolov4-tmi/cuda101.dockerfile similarity index 82% rename from det-yolov4-training/Dockerfile rename to det-yolov4-tmi/cuda101.dockerfile index 61ce1f6..5a5a2b5 100644 --- a/det-yolov4-training/Dockerfile +++ b/det-yolov4-tmi/cuda101.dockerfile @@ -1,5 +1,8 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple + +ENV PYTHONPATH=. + WORKDIR /darknet RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update @@ -12,11 +15,12 @@ RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_o RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py -RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev COPY . /darknet -RUN cp /darknet/make_train_test_darknet.sh /usr/bin/start.sh -RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml RUN make -j + +RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml && cp /darknet/mining/*-template.yaml /img-man +RUN echo "python3 /darknet/start.py" > /usr/bin/start.sh CMD bash /usr/bin/start.sh diff --git a/det-yolov4-training/cuda112.dockerfile b/det-yolov4-tmi/cuda112.dockerfile similarity index 82% rename from det-yolov4-training/cuda112.dockerfile rename to det-yolov4-tmi/cuda112.dockerfile index 3e6884b..aac49de 100644 --- a/det-yolov4-training/cuda112.dockerfile +++ b/det-yolov4-tmi/cuda112.dockerfile @@ -1,5 +1,8 @@ FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 ARG PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple + +ENV PYTHONPATH=. + WORKDIR /darknet RUN sed -i 's#http://archive.ubuntu.com#https://mirrors.ustc.edu.cn#g' /etc/apt/sources.list RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && apt-get update @@ -12,12 +15,13 @@ RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_o RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py -RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev COPY . /darknet -RUN cp /darknet/make_train_test_darknet.sh /usr/bin/start.sh -RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml RUN make -j + +RUN mkdir /img-man && cp /darknet/training-template.yaml /img-man/training-template.yaml && cp /darknet/mining/*-template.yaml /img-man +RUN echo "python3 /darknet/start.py" > /usr/bin/start.sh CMD bash /usr/bin/start.sh diff --git a/det-yolov4-training/darknet.py b/det-yolov4-tmi/darknet.py similarity index 100% rename from det-yolov4-training/darknet.py rename to det-yolov4-tmi/darknet.py diff --git a/det-yolov4-training/darknet_images.py b/det-yolov4-tmi/darknet_images.py similarity index 100% rename from det-yolov4-training/darknet_images.py rename to det-yolov4-tmi/darknet_images.py diff --git a/det-yolov4-training/darknet_video.py b/det-yolov4-tmi/darknet_video.py similarity index 100% rename from det-yolov4-training/darknet_video.py rename to det-yolov4-tmi/darknet_video.py diff --git a/det-yolov4-training/data/9k.tree b/det-yolov4-tmi/data/9k.tree similarity index 100% rename from det-yolov4-training/data/9k.tree rename to det-yolov4-tmi/data/9k.tree diff --git a/det-yolov4-training/data/coco.names b/det-yolov4-tmi/data/coco.names similarity index 100% rename from det-yolov4-training/data/coco.names rename to det-yolov4-tmi/data/coco.names diff --git a/det-yolov4-training/data/coco9k.map b/det-yolov4-tmi/data/coco9k.map similarity index 100% rename from det-yolov4-training/data/coco9k.map rename to det-yolov4-tmi/data/coco9k.map diff --git a/det-yolov4-training/data/goal.txt b/det-yolov4-tmi/data/goal.txt similarity index 100% rename from det-yolov4-training/data/goal.txt rename to det-yolov4-tmi/data/goal.txt diff --git a/det-yolov4-training/data/imagenet.labels.list b/det-yolov4-tmi/data/imagenet.labels.list similarity index 100% rename from det-yolov4-training/data/imagenet.labels.list rename to det-yolov4-tmi/data/imagenet.labels.list diff --git a/det-yolov4-training/data/imagenet.shortnames.list b/det-yolov4-tmi/data/imagenet.shortnames.list similarity index 100% rename from det-yolov4-training/data/imagenet.shortnames.list rename to det-yolov4-tmi/data/imagenet.shortnames.list diff --git a/det-yolov4-training/data/labels/make_labels.py b/det-yolov4-tmi/data/labels/make_labels.py similarity index 100% rename from det-yolov4-training/data/labels/make_labels.py rename to det-yolov4-tmi/data/labels/make_labels.py diff --git a/det-yolov4-training/data/openimages.names b/det-yolov4-tmi/data/openimages.names similarity index 100% rename from det-yolov4-training/data/openimages.names rename to det-yolov4-tmi/data/openimages.names diff --git a/det-yolov4-training/data/voc.names b/det-yolov4-tmi/data/voc.names similarity index 100% rename from det-yolov4-training/data/voc.names rename to det-yolov4-tmi/data/voc.names diff --git a/det-yolov4-training/image_yolov3.sh b/det-yolov4-tmi/image_yolov3.sh similarity index 100% rename from det-yolov4-training/image_yolov3.sh rename to det-yolov4-tmi/image_yolov3.sh diff --git a/det-yolov4-training/image_yolov4.sh b/det-yolov4-tmi/image_yolov4.sh similarity index 100% rename from det-yolov4-training/image_yolov4.sh rename to det-yolov4-tmi/image_yolov4.sh diff --git a/det-yolov4-training/img.txt b/det-yolov4-tmi/img.txt similarity index 100% rename from det-yolov4-training/img.txt rename to det-yolov4-tmi/img.txt diff --git a/det-yolov4-training/include/darknet.h b/det-yolov4-tmi/include/darknet.h similarity index 100% rename from det-yolov4-training/include/darknet.h rename to det-yolov4-tmi/include/darknet.h diff --git a/det-yolov4-training/include/yolo_v2_class.hpp b/det-yolov4-tmi/include/yolo_v2_class.hpp similarity index 100% rename from det-yolov4-training/include/yolo_v2_class.hpp rename to det-yolov4-tmi/include/yolo_v2_class.hpp diff --git a/det-yolov4-training/json_mjpeg_streams.sh b/det-yolov4-tmi/json_mjpeg_streams.sh similarity index 100% rename from det-yolov4-training/json_mjpeg_streams.sh rename to det-yolov4-tmi/json_mjpeg_streams.sh diff --git a/det-yolov4-training/make_train_test_darknet.sh b/det-yolov4-tmi/make_train_test_darknet.sh similarity index 100% rename from det-yolov4-training/make_train_test_darknet.sh rename to det-yolov4-tmi/make_train_test_darknet.sh diff --git a/det-yolov4-mining/.dockerignore b/det-yolov4-tmi/mining/.dockerignore similarity index 100% rename from det-yolov4-mining/.dockerignore rename to det-yolov4-tmi/mining/.dockerignore diff --git a/det-yolov4-mining/README.md b/det-yolov4-tmi/mining/README.md similarity index 100% rename from det-yolov4-mining/README.md rename to det-yolov4-tmi/mining/README.md diff --git a/det-yolov4-mining/active_learning/__init__.py b/det-yolov4-tmi/mining/active_learning/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/__init__.py rename to det-yolov4-tmi/mining/active_learning/__init__.py diff --git a/det-yolov4-mining/active_learning/apis/__init__.py b/det-yolov4-tmi/mining/active_learning/apis/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/apis/__init__.py rename to det-yolov4-tmi/mining/active_learning/apis/__init__.py diff --git a/det-yolov4-mining/active_learning/apis/al_api.py b/det-yolov4-tmi/mining/active_learning/apis/al_api.py similarity index 100% rename from det-yolov4-mining/active_learning/apis/al_api.py rename to det-yolov4-tmi/mining/active_learning/apis/al_api.py diff --git a/det-yolov4-mining/active_learning/apis/docker_api.py b/det-yolov4-tmi/mining/active_learning/apis/docker_api.py similarity index 100% rename from det-yolov4-mining/active_learning/apis/docker_api.py rename to det-yolov4-tmi/mining/active_learning/apis/docker_api.py diff --git a/det-yolov4-mining/active_learning/dataset/__init__.py b/det-yolov4-tmi/mining/active_learning/dataset/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/dataset/__init__.py rename to det-yolov4-tmi/mining/active_learning/dataset/__init__.py diff --git a/det-yolov4-mining/active_learning/dataset/datareader.py b/det-yolov4-tmi/mining/active_learning/dataset/datareader.py similarity index 100% rename from det-yolov4-mining/active_learning/dataset/datareader.py rename to det-yolov4-tmi/mining/active_learning/dataset/datareader.py diff --git a/det-yolov4-mining/active_learning/dataset/labeled_dataset.py b/det-yolov4-tmi/mining/active_learning/dataset/labeled_dataset.py similarity index 100% rename from det-yolov4-mining/active_learning/dataset/labeled_dataset.py rename to det-yolov4-tmi/mining/active_learning/dataset/labeled_dataset.py diff --git a/det-yolov4-mining/active_learning/dataset/unlabeled_dataset.py b/det-yolov4-tmi/mining/active_learning/dataset/unlabeled_dataset.py similarity index 100% rename from det-yolov4-mining/active_learning/dataset/unlabeled_dataset.py rename to det-yolov4-tmi/mining/active_learning/dataset/unlabeled_dataset.py diff --git a/det-yolov4-mining/active_learning/model_inference/__init__.py b/det-yolov4-tmi/mining/active_learning/model_inference/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/model_inference/__init__.py rename to det-yolov4-tmi/mining/active_learning/model_inference/__init__.py diff --git a/det-yolov4-mining/active_learning/model_inference/centernet.py b/det-yolov4-tmi/mining/active_learning/model_inference/centernet.py similarity index 100% rename from det-yolov4-mining/active_learning/model_inference/centernet.py rename to det-yolov4-tmi/mining/active_learning/model_inference/centernet.py diff --git a/det-yolov4-mining/active_learning/model_inference/yolo_models.py b/det-yolov4-tmi/mining/active_learning/model_inference/yolo_models.py similarity index 100% rename from det-yolov4-mining/active_learning/model_inference/yolo_models.py rename to det-yolov4-tmi/mining/active_learning/model_inference/yolo_models.py diff --git a/det-yolov4-mining/active_learning/strategy/__init__.py b/det-yolov4-tmi/mining/active_learning/strategy/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/__init__.py rename to det-yolov4-tmi/mining/active_learning/strategy/__init__.py diff --git a/det-yolov4-mining/active_learning/strategy/aldd.py b/det-yolov4-tmi/mining/active_learning/strategy/aldd.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/aldd.py rename to det-yolov4-tmi/mining/active_learning/strategy/aldd.py diff --git a/det-yolov4-mining/active_learning/strategy/aldd_yolo.py b/det-yolov4-tmi/mining/active_learning/strategy/aldd_yolo.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/aldd_yolo.py rename to det-yolov4-tmi/mining/active_learning/strategy/aldd_yolo.py diff --git a/det-yolov4-mining/active_learning/strategy/cald.py b/det-yolov4-tmi/mining/active_learning/strategy/cald.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/cald.py rename to det-yolov4-tmi/mining/active_learning/strategy/cald.py diff --git a/det-yolov4-mining/active_learning/strategy/data_augment.py b/det-yolov4-tmi/mining/active_learning/strategy/data_augment.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/data_augment.py rename to det-yolov4-tmi/mining/active_learning/strategy/data_augment.py diff --git a/det-yolov4-mining/active_learning/strategy/random_strategy.py b/det-yolov4-tmi/mining/active_learning/strategy/random_strategy.py similarity index 100% rename from det-yolov4-mining/active_learning/strategy/random_strategy.py rename to det-yolov4-tmi/mining/active_learning/strategy/random_strategy.py diff --git a/det-yolov4-mining/active_learning/utils/__init__.py b/det-yolov4-tmi/mining/active_learning/utils/__init__.py similarity index 100% rename from det-yolov4-mining/active_learning/utils/__init__.py rename to det-yolov4-tmi/mining/active_learning/utils/__init__.py diff --git a/det-yolov4-mining/active_learning/utils/al_log.py b/det-yolov4-tmi/mining/active_learning/utils/al_log.py similarity index 100% rename from det-yolov4-mining/active_learning/utils/al_log.py rename to det-yolov4-tmi/mining/active_learning/utils/al_log.py diff --git a/det-yolov4-mining/active_learning/utils/operator.py b/det-yolov4-tmi/mining/active_learning/utils/operator.py similarity index 100% rename from det-yolov4-mining/active_learning/utils/operator.py rename to det-yolov4-tmi/mining/active_learning/utils/operator.py diff --git a/det-yolov4-mining/al_main.py b/det-yolov4-tmi/mining/al_main.py similarity index 100% rename from det-yolov4-mining/al_main.py rename to det-yolov4-tmi/mining/al_main.py diff --git a/det-yolov4-mining/combined_class.txt b/det-yolov4-tmi/mining/combined_class.txt similarity index 100% rename from det-yolov4-mining/combined_class.txt rename to det-yolov4-tmi/mining/combined_class.txt diff --git a/det-yolov4-mining/docker_main.py b/det-yolov4-tmi/mining/docker_main.py similarity index 88% rename from det-yolov4-mining/docker_main.py rename to det-yolov4-tmi/mining/docker_main.py index 3eb4641..5f65377 100644 --- a/det-yolov4-mining/docker_main.py +++ b/det-yolov4-tmi/mining/docker_main.py @@ -9,8 +9,8 @@ import write_result -def _load_config() -> dict: - with open("/in/config.yaml", "r", encoding='utf8') as f: +def _load_config(config_file) -> dict: + with open(config_file, "r", encoding='utf8') as f: config = yaml.safe_load(f) # set default task id @@ -34,16 +34,17 @@ def _load_config() -> dict: if __name__ == '__main__': - config = _load_config() + config = _load_config("/in/config.yaml") - run_infer = int(config['run_infer']) - run_mining = int(config['run_mining']) + env_config = _load_config("/in/env.yaml") + run_infer = env_config['run_infer']=='true' + run_mining = env_config['run_mining']=='true' if not run_infer and not run_mining: raise ValueError('both run_infer and run_mining set to 0, abort') - monitor_process.run_mining = run_mining - monitor_process.run_infer = run_infer + monitor_process.run_mining = int(run_mining) + monitor_process.run_infer = int(run_infer) log_writer = LogWriter(monitor_path="/out/monitor.txt", monitor_pure_path="/out/monitor-log.txt", diff --git a/det-yolov4-mining/docker_readme.md b/det-yolov4-tmi/mining/docker_readme.md similarity index 100% rename from det-yolov4-mining/docker_readme.md rename to det-yolov4-tmi/mining/docker_readme.md diff --git a/det-yolov4-mining/infer-template.yaml b/det-yolov4-tmi/mining/infer-template.yaml similarity index 100% rename from det-yolov4-mining/infer-template.yaml rename to det-yolov4-tmi/mining/infer-template.yaml diff --git a/det-yolov4-mining/mining-template.yaml b/det-yolov4-tmi/mining/mining-template.yaml similarity index 95% rename from det-yolov4-mining/mining-template.yaml rename to det-yolov4-tmi/mining/mining-template.yaml index e02770f..aeee009 100644 --- a/det-yolov4-mining/mining-template.yaml +++ b/det-yolov4-tmi/mining/mining-template.yaml @@ -13,7 +13,7 @@ model_type: detection strategy: aldd_yolo image_height: 608 image_width: 608 -batch_size: 16 +batch_size: 4 anchors: '12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401' confidence_thresh: 0.1 nms_thresh: 0.45 @@ -23,4 +23,4 @@ max_boxes: 50 # model_params_path: [] # task_id: cycle-node-mined-0 # class_names: -# - expose_rubbish \ No newline at end of file +# - expose_rubbish diff --git a/det-yolov4-mining/monitor_process.py b/det-yolov4-tmi/mining/monitor_process.py similarity index 100% rename from det-yolov4-mining/monitor_process.py rename to det-yolov4-tmi/mining/monitor_process.py diff --git a/det-yolov4-mining/start.sh b/det-yolov4-tmi/mining/start.sh similarity index 100% rename from det-yolov4-mining/start.sh rename to det-yolov4-tmi/mining/start.sh diff --git a/det-yolov4-mining/test_api.py b/det-yolov4-tmi/mining/test_api.py similarity index 100% rename from det-yolov4-mining/test_api.py rename to det-yolov4-tmi/mining/test_api.py diff --git a/det-yolov4-mining/test_centernet.py b/det-yolov4-tmi/mining/test_centernet.py similarity index 100% rename from det-yolov4-mining/test_centernet.py rename to det-yolov4-tmi/mining/test_centernet.py diff --git a/det-yolov4-mining/tools/al_strategsy_union.py b/det-yolov4-tmi/mining/tools/al_strategsy_union.py similarity index 100% rename from det-yolov4-mining/tools/al_strategsy_union.py rename to det-yolov4-tmi/mining/tools/al_strategsy_union.py diff --git a/det-yolov4-mining/tools/imagenet_hard_negative.py b/det-yolov4-tmi/mining/tools/imagenet_hard_negative.py similarity index 100% rename from det-yolov4-mining/tools/imagenet_hard_negative.py rename to det-yolov4-tmi/mining/tools/imagenet_hard_negative.py diff --git a/det-yolov4-mining/tools/plot_dataset_class_hist.py b/det-yolov4-tmi/mining/tools/plot_dataset_class_hist.py similarity index 100% rename from det-yolov4-mining/tools/plot_dataset_class_hist.py rename to det-yolov4-tmi/mining/tools/plot_dataset_class_hist.py diff --git a/det-yolov4-mining/tools/visualize_aldd.py b/det-yolov4-tmi/mining/tools/visualize_aldd.py similarity index 100% rename from det-yolov4-mining/tools/visualize_aldd.py rename to det-yolov4-tmi/mining/tools/visualize_aldd.py diff --git a/det-yolov4-mining/tools/visualize_cald.py b/det-yolov4-tmi/mining/tools/visualize_cald.py similarity index 100% rename from det-yolov4-mining/tools/visualize_cald.py rename to det-yolov4-tmi/mining/tools/visualize_cald.py diff --git a/det-yolov4-mining/write_result.py b/det-yolov4-tmi/mining/write_result.py similarity index 100% rename from det-yolov4-mining/write_result.py rename to det-yolov4-tmi/mining/write_result.py diff --git a/det-yolov4-training/net_cam_v3.sh b/det-yolov4-tmi/net_cam_v3.sh similarity index 100% rename from det-yolov4-training/net_cam_v3.sh rename to det-yolov4-tmi/net_cam_v3.sh diff --git a/det-yolov4-training/net_cam_v4.sh b/det-yolov4-tmi/net_cam_v4.sh similarity index 100% rename from det-yolov4-training/net_cam_v4.sh rename to det-yolov4-tmi/net_cam_v4.sh diff --git a/det-yolov4-training/src/.editorconfig b/det-yolov4-tmi/src/.editorconfig similarity index 100% rename from det-yolov4-training/src/.editorconfig rename to det-yolov4-tmi/src/.editorconfig diff --git a/det-yolov4-training/src/activation_kernels.cu b/det-yolov4-tmi/src/activation_kernels.cu similarity index 100% rename from det-yolov4-training/src/activation_kernels.cu rename to det-yolov4-tmi/src/activation_kernels.cu diff --git a/det-yolov4-training/src/activation_layer.c b/det-yolov4-tmi/src/activation_layer.c similarity index 100% rename from det-yolov4-training/src/activation_layer.c rename to det-yolov4-tmi/src/activation_layer.c diff --git a/det-yolov4-training/src/activation_layer.h b/det-yolov4-tmi/src/activation_layer.h similarity index 100% rename from det-yolov4-training/src/activation_layer.h rename to det-yolov4-tmi/src/activation_layer.h diff --git a/det-yolov4-training/src/activations.c b/det-yolov4-tmi/src/activations.c similarity index 100% rename from det-yolov4-training/src/activations.c rename to det-yolov4-tmi/src/activations.c diff --git a/det-yolov4-training/src/activations.h b/det-yolov4-tmi/src/activations.h similarity index 100% rename from det-yolov4-training/src/activations.h rename to det-yolov4-tmi/src/activations.h diff --git a/det-yolov4-training/src/art.c b/det-yolov4-tmi/src/art.c similarity index 100% rename from det-yolov4-training/src/art.c rename to det-yolov4-tmi/src/art.c diff --git a/det-yolov4-training/src/avgpool_layer.c b/det-yolov4-tmi/src/avgpool_layer.c similarity index 100% rename from det-yolov4-training/src/avgpool_layer.c rename to det-yolov4-tmi/src/avgpool_layer.c diff --git a/det-yolov4-training/src/avgpool_layer.h b/det-yolov4-tmi/src/avgpool_layer.h similarity index 100% rename from det-yolov4-training/src/avgpool_layer.h rename to det-yolov4-tmi/src/avgpool_layer.h diff --git a/det-yolov4-training/src/avgpool_layer_kernels.cu b/det-yolov4-tmi/src/avgpool_layer_kernels.cu similarity index 100% rename from det-yolov4-training/src/avgpool_layer_kernels.cu rename to det-yolov4-tmi/src/avgpool_layer_kernels.cu diff --git a/det-yolov4-training/src/batchnorm_layer.c b/det-yolov4-tmi/src/batchnorm_layer.c similarity index 100% rename from det-yolov4-training/src/batchnorm_layer.c rename to det-yolov4-tmi/src/batchnorm_layer.c diff --git a/det-yolov4-training/src/batchnorm_layer.h b/det-yolov4-tmi/src/batchnorm_layer.h similarity index 100% rename from det-yolov4-training/src/batchnorm_layer.h rename to det-yolov4-tmi/src/batchnorm_layer.h diff --git a/det-yolov4-training/src/blas.c b/det-yolov4-tmi/src/blas.c similarity index 100% rename from det-yolov4-training/src/blas.c rename to det-yolov4-tmi/src/blas.c diff --git a/det-yolov4-training/src/blas.h b/det-yolov4-tmi/src/blas.h similarity index 100% rename from det-yolov4-training/src/blas.h rename to det-yolov4-tmi/src/blas.h diff --git a/det-yolov4-training/src/blas_kernels.cu b/det-yolov4-tmi/src/blas_kernels.cu similarity index 100% rename from det-yolov4-training/src/blas_kernels.cu rename to det-yolov4-tmi/src/blas_kernels.cu diff --git a/det-yolov4-training/src/box.c b/det-yolov4-tmi/src/box.c similarity index 100% rename from det-yolov4-training/src/box.c rename to det-yolov4-tmi/src/box.c diff --git a/det-yolov4-training/src/box.h b/det-yolov4-tmi/src/box.h similarity index 100% rename from det-yolov4-training/src/box.h rename to det-yolov4-tmi/src/box.h diff --git a/det-yolov4-training/src/captcha.c b/det-yolov4-tmi/src/captcha.c similarity index 100% rename from det-yolov4-training/src/captcha.c rename to det-yolov4-tmi/src/captcha.c diff --git a/det-yolov4-training/src/cifar.c b/det-yolov4-tmi/src/cifar.c similarity index 100% rename from det-yolov4-training/src/cifar.c rename to det-yolov4-tmi/src/cifar.c diff --git a/det-yolov4-training/src/classifier.c b/det-yolov4-tmi/src/classifier.c similarity index 100% rename from det-yolov4-training/src/classifier.c rename to det-yolov4-tmi/src/classifier.c diff --git a/det-yolov4-training/src/classifier.h b/det-yolov4-tmi/src/classifier.h similarity index 100% rename from det-yolov4-training/src/classifier.h rename to det-yolov4-tmi/src/classifier.h diff --git a/det-yolov4-training/src/coco.c b/det-yolov4-tmi/src/coco.c similarity index 100% rename from det-yolov4-training/src/coco.c rename to det-yolov4-tmi/src/coco.c diff --git a/det-yolov4-training/src/col2im.c b/det-yolov4-tmi/src/col2im.c similarity index 100% rename from det-yolov4-training/src/col2im.c rename to det-yolov4-tmi/src/col2im.c diff --git a/det-yolov4-training/src/col2im.h b/det-yolov4-tmi/src/col2im.h similarity index 100% rename from det-yolov4-training/src/col2im.h rename to det-yolov4-tmi/src/col2im.h diff --git a/det-yolov4-training/src/col2im_kernels.cu b/det-yolov4-tmi/src/col2im_kernels.cu similarity index 100% rename from det-yolov4-training/src/col2im_kernels.cu rename to det-yolov4-tmi/src/col2im_kernels.cu diff --git a/det-yolov4-training/src/compare.c b/det-yolov4-tmi/src/compare.c similarity index 100% rename from det-yolov4-training/src/compare.c rename to det-yolov4-tmi/src/compare.c diff --git a/det-yolov4-training/src/connected_layer.c b/det-yolov4-tmi/src/connected_layer.c similarity index 100% rename from det-yolov4-training/src/connected_layer.c rename to det-yolov4-tmi/src/connected_layer.c diff --git a/det-yolov4-training/src/connected_layer.h b/det-yolov4-tmi/src/connected_layer.h similarity index 100% rename from det-yolov4-training/src/connected_layer.h rename to det-yolov4-tmi/src/connected_layer.h diff --git a/det-yolov4-training/src/conv_lstm_layer.c b/det-yolov4-tmi/src/conv_lstm_layer.c similarity index 100% rename from det-yolov4-training/src/conv_lstm_layer.c rename to det-yolov4-tmi/src/conv_lstm_layer.c diff --git a/det-yolov4-training/src/conv_lstm_layer.h b/det-yolov4-tmi/src/conv_lstm_layer.h similarity index 100% rename from det-yolov4-training/src/conv_lstm_layer.h rename to det-yolov4-tmi/src/conv_lstm_layer.h diff --git a/det-yolov4-training/src/convolutional_kernels.cu b/det-yolov4-tmi/src/convolutional_kernels.cu similarity index 100% rename from det-yolov4-training/src/convolutional_kernels.cu rename to det-yolov4-tmi/src/convolutional_kernels.cu diff --git a/det-yolov4-training/src/convolutional_layer.c b/det-yolov4-tmi/src/convolutional_layer.c similarity index 100% rename from det-yolov4-training/src/convolutional_layer.c rename to det-yolov4-tmi/src/convolutional_layer.c diff --git a/det-yolov4-training/src/convolutional_layer.h b/det-yolov4-tmi/src/convolutional_layer.h similarity index 100% rename from det-yolov4-training/src/convolutional_layer.h rename to det-yolov4-tmi/src/convolutional_layer.h diff --git a/det-yolov4-training/src/cost_layer.c b/det-yolov4-tmi/src/cost_layer.c similarity index 100% rename from det-yolov4-training/src/cost_layer.c rename to det-yolov4-tmi/src/cost_layer.c diff --git a/det-yolov4-training/src/cost_layer.h b/det-yolov4-tmi/src/cost_layer.h similarity index 100% rename from det-yolov4-training/src/cost_layer.h rename to det-yolov4-tmi/src/cost_layer.h diff --git a/det-yolov4-training/src/cpu_gemm.c b/det-yolov4-tmi/src/cpu_gemm.c similarity index 100% rename from det-yolov4-training/src/cpu_gemm.c rename to det-yolov4-tmi/src/cpu_gemm.c diff --git a/det-yolov4-training/src/crnn_layer.c b/det-yolov4-tmi/src/crnn_layer.c similarity index 100% rename from det-yolov4-training/src/crnn_layer.c rename to det-yolov4-tmi/src/crnn_layer.c diff --git a/det-yolov4-training/src/crnn_layer.h b/det-yolov4-tmi/src/crnn_layer.h similarity index 100% rename from det-yolov4-training/src/crnn_layer.h rename to det-yolov4-tmi/src/crnn_layer.h diff --git a/det-yolov4-training/src/crop_layer.c b/det-yolov4-tmi/src/crop_layer.c similarity index 100% rename from det-yolov4-training/src/crop_layer.c rename to det-yolov4-tmi/src/crop_layer.c diff --git a/det-yolov4-training/src/crop_layer.h b/det-yolov4-tmi/src/crop_layer.h similarity index 100% rename from det-yolov4-training/src/crop_layer.h rename to det-yolov4-tmi/src/crop_layer.h diff --git a/det-yolov4-training/src/crop_layer_kernels.cu b/det-yolov4-tmi/src/crop_layer_kernels.cu similarity index 100% rename from det-yolov4-training/src/crop_layer_kernels.cu rename to det-yolov4-tmi/src/crop_layer_kernels.cu diff --git a/det-yolov4-training/src/csharp/CMakeLists.txt b/det-yolov4-tmi/src/csharp/CMakeLists.txt similarity index 100% rename from det-yolov4-training/src/csharp/CMakeLists.txt rename to det-yolov4-tmi/src/csharp/CMakeLists.txt diff --git a/det-yolov4-training/src/csharp/YoloCSharpWrapper.cs b/det-yolov4-tmi/src/csharp/YoloCSharpWrapper.cs similarity index 100% rename from det-yolov4-training/src/csharp/YoloCSharpWrapper.cs rename to det-yolov4-tmi/src/csharp/YoloCSharpWrapper.cs diff --git a/det-yolov4-training/src/dark_cuda.c b/det-yolov4-tmi/src/dark_cuda.c similarity index 100% rename from det-yolov4-training/src/dark_cuda.c rename to det-yolov4-tmi/src/dark_cuda.c diff --git a/det-yolov4-training/src/dark_cuda.h b/det-yolov4-tmi/src/dark_cuda.h similarity index 100% rename from det-yolov4-training/src/dark_cuda.h rename to det-yolov4-tmi/src/dark_cuda.h diff --git a/det-yolov4-training/src/darknet.c b/det-yolov4-tmi/src/darknet.c similarity index 100% rename from det-yolov4-training/src/darknet.c rename to det-yolov4-tmi/src/darknet.c diff --git a/det-yolov4-training/src/darkunistd.h b/det-yolov4-tmi/src/darkunistd.h similarity index 100% rename from det-yolov4-training/src/darkunistd.h rename to det-yolov4-tmi/src/darkunistd.h diff --git a/det-yolov4-training/src/data.c b/det-yolov4-tmi/src/data.c similarity index 100% rename from det-yolov4-training/src/data.c rename to det-yolov4-tmi/src/data.c diff --git a/det-yolov4-training/src/data.h b/det-yolov4-tmi/src/data.h similarity index 100% rename from det-yolov4-training/src/data.h rename to det-yolov4-tmi/src/data.h diff --git a/det-yolov4-training/src/deconvolutional_kernels.cu b/det-yolov4-tmi/src/deconvolutional_kernels.cu similarity index 100% rename from det-yolov4-training/src/deconvolutional_kernels.cu rename to det-yolov4-tmi/src/deconvolutional_kernels.cu diff --git a/det-yolov4-training/src/deconvolutional_layer.c b/det-yolov4-tmi/src/deconvolutional_layer.c similarity index 100% rename from det-yolov4-training/src/deconvolutional_layer.c rename to det-yolov4-tmi/src/deconvolutional_layer.c diff --git a/det-yolov4-training/src/deconvolutional_layer.h b/det-yolov4-tmi/src/deconvolutional_layer.h similarity index 100% rename from det-yolov4-training/src/deconvolutional_layer.h rename to det-yolov4-tmi/src/deconvolutional_layer.h diff --git a/det-yolov4-training/src/demo.c b/det-yolov4-tmi/src/demo.c similarity index 100% rename from det-yolov4-training/src/demo.c rename to det-yolov4-tmi/src/demo.c diff --git a/det-yolov4-training/src/demo.h b/det-yolov4-tmi/src/demo.h similarity index 100% rename from det-yolov4-training/src/demo.h rename to det-yolov4-tmi/src/demo.h diff --git a/det-yolov4-training/src/detection_layer.c b/det-yolov4-tmi/src/detection_layer.c similarity index 100% rename from det-yolov4-training/src/detection_layer.c rename to det-yolov4-tmi/src/detection_layer.c diff --git a/det-yolov4-training/src/detection_layer.h b/det-yolov4-tmi/src/detection_layer.h similarity index 100% rename from det-yolov4-training/src/detection_layer.h rename to det-yolov4-tmi/src/detection_layer.h diff --git a/det-yolov4-training/src/detector.c b/det-yolov4-tmi/src/detector.c similarity index 100% rename from det-yolov4-training/src/detector.c rename to det-yolov4-tmi/src/detector.c diff --git a/det-yolov4-training/src/dice.c b/det-yolov4-tmi/src/dice.c similarity index 100% rename from det-yolov4-training/src/dice.c rename to det-yolov4-tmi/src/dice.c diff --git a/det-yolov4-training/src/dropout_layer.c b/det-yolov4-tmi/src/dropout_layer.c similarity index 100% rename from det-yolov4-training/src/dropout_layer.c rename to det-yolov4-tmi/src/dropout_layer.c diff --git a/det-yolov4-training/src/dropout_layer.h b/det-yolov4-tmi/src/dropout_layer.h similarity index 100% rename from det-yolov4-training/src/dropout_layer.h rename to det-yolov4-tmi/src/dropout_layer.h diff --git a/det-yolov4-training/src/dropout_layer_kernels.cu b/det-yolov4-tmi/src/dropout_layer_kernels.cu similarity index 100% rename from det-yolov4-training/src/dropout_layer_kernels.cu rename to det-yolov4-tmi/src/dropout_layer_kernels.cu diff --git a/det-yolov4-training/src/gaussian_yolo_layer.c b/det-yolov4-tmi/src/gaussian_yolo_layer.c similarity index 100% rename from det-yolov4-training/src/gaussian_yolo_layer.c rename to det-yolov4-tmi/src/gaussian_yolo_layer.c diff --git a/det-yolov4-training/src/gaussian_yolo_layer.h b/det-yolov4-tmi/src/gaussian_yolo_layer.h similarity index 100% rename from det-yolov4-training/src/gaussian_yolo_layer.h rename to det-yolov4-tmi/src/gaussian_yolo_layer.h diff --git a/det-yolov4-training/src/gemm.c b/det-yolov4-tmi/src/gemm.c similarity index 100% rename from det-yolov4-training/src/gemm.c rename to det-yolov4-tmi/src/gemm.c diff --git a/det-yolov4-training/src/gemm.h b/det-yolov4-tmi/src/gemm.h similarity index 100% rename from det-yolov4-training/src/gemm.h rename to det-yolov4-tmi/src/gemm.h diff --git a/det-yolov4-training/src/getopt.c b/det-yolov4-tmi/src/getopt.c similarity index 100% rename from det-yolov4-training/src/getopt.c rename to det-yolov4-tmi/src/getopt.c diff --git a/det-yolov4-training/src/getopt.h b/det-yolov4-tmi/src/getopt.h similarity index 100% rename from det-yolov4-training/src/getopt.h rename to det-yolov4-tmi/src/getopt.h diff --git a/det-yolov4-training/src/gettimeofday.c b/det-yolov4-tmi/src/gettimeofday.c similarity index 100% rename from det-yolov4-training/src/gettimeofday.c rename to det-yolov4-tmi/src/gettimeofday.c diff --git a/det-yolov4-training/src/gettimeofday.h b/det-yolov4-tmi/src/gettimeofday.h similarity index 100% rename from det-yolov4-training/src/gettimeofday.h rename to det-yolov4-tmi/src/gettimeofday.h diff --git a/det-yolov4-training/src/go.c b/det-yolov4-tmi/src/go.c similarity index 100% rename from det-yolov4-training/src/go.c rename to det-yolov4-tmi/src/go.c diff --git a/det-yolov4-training/src/gru_layer.c b/det-yolov4-tmi/src/gru_layer.c similarity index 100% rename from det-yolov4-training/src/gru_layer.c rename to det-yolov4-tmi/src/gru_layer.c diff --git a/det-yolov4-training/src/gru_layer.h b/det-yolov4-tmi/src/gru_layer.h similarity index 100% rename from det-yolov4-training/src/gru_layer.h rename to det-yolov4-tmi/src/gru_layer.h diff --git a/det-yolov4-training/src/http_stream.cpp b/det-yolov4-tmi/src/http_stream.cpp similarity index 100% rename from det-yolov4-training/src/http_stream.cpp rename to det-yolov4-tmi/src/http_stream.cpp diff --git a/det-yolov4-training/src/http_stream.h b/det-yolov4-tmi/src/http_stream.h similarity index 100% rename from det-yolov4-training/src/http_stream.h rename to det-yolov4-tmi/src/http_stream.h diff --git a/det-yolov4-training/src/httplib.h b/det-yolov4-tmi/src/httplib.h similarity index 100% rename from det-yolov4-training/src/httplib.h rename to det-yolov4-tmi/src/httplib.h diff --git a/det-yolov4-training/src/im2col.c b/det-yolov4-tmi/src/im2col.c similarity index 100% rename from det-yolov4-training/src/im2col.c rename to det-yolov4-tmi/src/im2col.c diff --git a/det-yolov4-training/src/im2col.h b/det-yolov4-tmi/src/im2col.h similarity index 100% rename from det-yolov4-training/src/im2col.h rename to det-yolov4-tmi/src/im2col.h diff --git a/det-yolov4-training/src/im2col_kernels.cu b/det-yolov4-tmi/src/im2col_kernels.cu similarity index 100% rename from det-yolov4-training/src/im2col_kernels.cu rename to det-yolov4-tmi/src/im2col_kernels.cu diff --git a/det-yolov4-training/src/image.c b/det-yolov4-tmi/src/image.c similarity index 100% rename from det-yolov4-training/src/image.c rename to det-yolov4-tmi/src/image.c diff --git a/det-yolov4-training/src/image.h b/det-yolov4-tmi/src/image.h similarity index 100% rename from det-yolov4-training/src/image.h rename to det-yolov4-tmi/src/image.h diff --git a/det-yolov4-training/src/image_opencv.cpp b/det-yolov4-tmi/src/image_opencv.cpp similarity index 100% rename from det-yolov4-training/src/image_opencv.cpp rename to det-yolov4-tmi/src/image_opencv.cpp diff --git a/det-yolov4-training/src/image_opencv.h b/det-yolov4-tmi/src/image_opencv.h similarity index 100% rename from det-yolov4-training/src/image_opencv.h rename to det-yolov4-tmi/src/image_opencv.h diff --git a/det-yolov4-training/src/layer.c b/det-yolov4-tmi/src/layer.c similarity index 100% rename from det-yolov4-training/src/layer.c rename to det-yolov4-tmi/src/layer.c diff --git a/det-yolov4-training/src/layer.h b/det-yolov4-tmi/src/layer.h similarity index 100% rename from det-yolov4-training/src/layer.h rename to det-yolov4-tmi/src/layer.h diff --git a/det-yolov4-training/src/list.c b/det-yolov4-tmi/src/list.c similarity index 100% rename from det-yolov4-training/src/list.c rename to det-yolov4-tmi/src/list.c diff --git a/det-yolov4-training/src/list.h b/det-yolov4-tmi/src/list.h similarity index 100% rename from det-yolov4-training/src/list.h rename to det-yolov4-tmi/src/list.h diff --git a/det-yolov4-training/src/local_layer.c b/det-yolov4-tmi/src/local_layer.c similarity index 100% rename from det-yolov4-training/src/local_layer.c rename to det-yolov4-tmi/src/local_layer.c diff --git a/det-yolov4-training/src/local_layer.h b/det-yolov4-tmi/src/local_layer.h similarity index 100% rename from det-yolov4-training/src/local_layer.h rename to det-yolov4-tmi/src/local_layer.h diff --git a/det-yolov4-training/src/lstm_layer.c b/det-yolov4-tmi/src/lstm_layer.c similarity index 100% rename from det-yolov4-training/src/lstm_layer.c rename to det-yolov4-tmi/src/lstm_layer.c diff --git a/det-yolov4-training/src/lstm_layer.h b/det-yolov4-tmi/src/lstm_layer.h similarity index 100% rename from det-yolov4-training/src/lstm_layer.h rename to det-yolov4-tmi/src/lstm_layer.h diff --git a/det-yolov4-training/src/matrix.c b/det-yolov4-tmi/src/matrix.c similarity index 100% rename from det-yolov4-training/src/matrix.c rename to det-yolov4-tmi/src/matrix.c diff --git a/det-yolov4-training/src/matrix.h b/det-yolov4-tmi/src/matrix.h similarity index 100% rename from det-yolov4-training/src/matrix.h rename to det-yolov4-tmi/src/matrix.h diff --git a/det-yolov4-training/src/maxpool_layer.c b/det-yolov4-tmi/src/maxpool_layer.c similarity index 100% rename from det-yolov4-training/src/maxpool_layer.c rename to det-yolov4-tmi/src/maxpool_layer.c diff --git a/det-yolov4-training/src/maxpool_layer.h b/det-yolov4-tmi/src/maxpool_layer.h similarity index 100% rename from det-yolov4-training/src/maxpool_layer.h rename to det-yolov4-tmi/src/maxpool_layer.h diff --git a/det-yolov4-training/src/maxpool_layer_kernels.cu b/det-yolov4-tmi/src/maxpool_layer_kernels.cu similarity index 100% rename from det-yolov4-training/src/maxpool_layer_kernels.cu rename to det-yolov4-tmi/src/maxpool_layer_kernels.cu diff --git a/det-yolov4-training/src/network.c b/det-yolov4-tmi/src/network.c similarity index 100% rename from det-yolov4-training/src/network.c rename to det-yolov4-tmi/src/network.c diff --git a/det-yolov4-training/src/network.h b/det-yolov4-tmi/src/network.h similarity index 100% rename from det-yolov4-training/src/network.h rename to det-yolov4-tmi/src/network.h diff --git a/det-yolov4-training/src/network_kernels.cu b/det-yolov4-tmi/src/network_kernels.cu similarity index 100% rename from det-yolov4-training/src/network_kernels.cu rename to det-yolov4-tmi/src/network_kernels.cu diff --git a/det-yolov4-training/src/nightmare.c b/det-yolov4-tmi/src/nightmare.c similarity index 100% rename from det-yolov4-training/src/nightmare.c rename to det-yolov4-tmi/src/nightmare.c diff --git a/det-yolov4-training/src/normalization_layer.c b/det-yolov4-tmi/src/normalization_layer.c similarity index 100% rename from det-yolov4-training/src/normalization_layer.c rename to det-yolov4-tmi/src/normalization_layer.c diff --git a/det-yolov4-training/src/normalization_layer.h b/det-yolov4-tmi/src/normalization_layer.h similarity index 100% rename from det-yolov4-training/src/normalization_layer.h rename to det-yolov4-tmi/src/normalization_layer.h diff --git a/det-yolov4-training/src/option_list.c b/det-yolov4-tmi/src/option_list.c similarity index 100% rename from det-yolov4-training/src/option_list.c rename to det-yolov4-tmi/src/option_list.c diff --git a/det-yolov4-training/src/option_list.h b/det-yolov4-tmi/src/option_list.h similarity index 100% rename from det-yolov4-training/src/option_list.h rename to det-yolov4-tmi/src/option_list.h diff --git a/det-yolov4-training/src/parser.c b/det-yolov4-tmi/src/parser.c similarity index 100% rename from det-yolov4-training/src/parser.c rename to det-yolov4-tmi/src/parser.c diff --git a/det-yolov4-training/src/parser.h b/det-yolov4-tmi/src/parser.h similarity index 100% rename from det-yolov4-training/src/parser.h rename to det-yolov4-tmi/src/parser.h diff --git a/det-yolov4-training/src/region_layer.c b/det-yolov4-tmi/src/region_layer.c similarity index 100% rename from det-yolov4-training/src/region_layer.c rename to det-yolov4-tmi/src/region_layer.c diff --git a/det-yolov4-training/src/region_layer.h b/det-yolov4-tmi/src/region_layer.h similarity index 100% rename from det-yolov4-training/src/region_layer.h rename to det-yolov4-tmi/src/region_layer.h diff --git a/det-yolov4-training/src/reorg_layer.c b/det-yolov4-tmi/src/reorg_layer.c similarity index 100% rename from det-yolov4-training/src/reorg_layer.c rename to det-yolov4-tmi/src/reorg_layer.c diff --git a/det-yolov4-training/src/reorg_layer.h b/det-yolov4-tmi/src/reorg_layer.h similarity index 100% rename from det-yolov4-training/src/reorg_layer.h rename to det-yolov4-tmi/src/reorg_layer.h diff --git a/det-yolov4-training/src/reorg_old_layer.c b/det-yolov4-tmi/src/reorg_old_layer.c similarity index 100% rename from det-yolov4-training/src/reorg_old_layer.c rename to det-yolov4-tmi/src/reorg_old_layer.c diff --git a/det-yolov4-training/src/reorg_old_layer.h b/det-yolov4-tmi/src/reorg_old_layer.h similarity index 100% rename from det-yolov4-training/src/reorg_old_layer.h rename to det-yolov4-tmi/src/reorg_old_layer.h diff --git a/det-yolov4-training/src/representation_layer.c b/det-yolov4-tmi/src/representation_layer.c similarity index 100% rename from det-yolov4-training/src/representation_layer.c rename to det-yolov4-tmi/src/representation_layer.c diff --git a/det-yolov4-training/src/representation_layer.h b/det-yolov4-tmi/src/representation_layer.h similarity index 100% rename from det-yolov4-training/src/representation_layer.h rename to det-yolov4-tmi/src/representation_layer.h diff --git a/det-yolov4-training/src/rnn.c b/det-yolov4-tmi/src/rnn.c similarity index 100% rename from det-yolov4-training/src/rnn.c rename to det-yolov4-tmi/src/rnn.c diff --git a/det-yolov4-training/src/rnn_layer.c b/det-yolov4-tmi/src/rnn_layer.c similarity index 100% rename from det-yolov4-training/src/rnn_layer.c rename to det-yolov4-tmi/src/rnn_layer.c diff --git a/det-yolov4-training/src/rnn_layer.h b/det-yolov4-tmi/src/rnn_layer.h similarity index 100% rename from det-yolov4-training/src/rnn_layer.h rename to det-yolov4-tmi/src/rnn_layer.h diff --git a/det-yolov4-training/src/rnn_vid.c b/det-yolov4-tmi/src/rnn_vid.c similarity index 100% rename from det-yolov4-training/src/rnn_vid.c rename to det-yolov4-tmi/src/rnn_vid.c diff --git a/det-yolov4-training/src/route_layer.c b/det-yolov4-tmi/src/route_layer.c similarity index 100% rename from det-yolov4-training/src/route_layer.c rename to det-yolov4-tmi/src/route_layer.c diff --git a/det-yolov4-training/src/route_layer.h b/det-yolov4-tmi/src/route_layer.h similarity index 100% rename from det-yolov4-training/src/route_layer.h rename to det-yolov4-tmi/src/route_layer.h diff --git a/det-yolov4-training/src/sam_layer.c b/det-yolov4-tmi/src/sam_layer.c similarity index 100% rename from det-yolov4-training/src/sam_layer.c rename to det-yolov4-tmi/src/sam_layer.c diff --git a/det-yolov4-training/src/sam_layer.h b/det-yolov4-tmi/src/sam_layer.h similarity index 100% rename from det-yolov4-training/src/sam_layer.h rename to det-yolov4-tmi/src/sam_layer.h diff --git a/det-yolov4-training/src/scale_channels_layer.c b/det-yolov4-tmi/src/scale_channels_layer.c similarity index 100% rename from det-yolov4-training/src/scale_channels_layer.c rename to det-yolov4-tmi/src/scale_channels_layer.c diff --git a/det-yolov4-training/src/scale_channels_layer.h b/det-yolov4-tmi/src/scale_channels_layer.h similarity index 100% rename from det-yolov4-training/src/scale_channels_layer.h rename to det-yolov4-tmi/src/scale_channels_layer.h diff --git a/det-yolov4-training/src/shortcut_layer.c b/det-yolov4-tmi/src/shortcut_layer.c similarity index 100% rename from det-yolov4-training/src/shortcut_layer.c rename to det-yolov4-tmi/src/shortcut_layer.c diff --git a/det-yolov4-training/src/shortcut_layer.h b/det-yolov4-tmi/src/shortcut_layer.h similarity index 100% rename from det-yolov4-training/src/shortcut_layer.h rename to det-yolov4-tmi/src/shortcut_layer.h diff --git a/det-yolov4-training/src/softmax_layer.c b/det-yolov4-tmi/src/softmax_layer.c similarity index 100% rename from det-yolov4-training/src/softmax_layer.c rename to det-yolov4-tmi/src/softmax_layer.c diff --git a/det-yolov4-training/src/softmax_layer.h b/det-yolov4-tmi/src/softmax_layer.h similarity index 100% rename from det-yolov4-training/src/softmax_layer.h rename to det-yolov4-tmi/src/softmax_layer.h diff --git a/det-yolov4-training/src/super.c b/det-yolov4-tmi/src/super.c similarity index 100% rename from det-yolov4-training/src/super.c rename to det-yolov4-tmi/src/super.c diff --git a/det-yolov4-training/src/swag.c b/det-yolov4-tmi/src/swag.c similarity index 100% rename from det-yolov4-training/src/swag.c rename to det-yolov4-tmi/src/swag.c diff --git a/det-yolov4-training/src/tag.c b/det-yolov4-tmi/src/tag.c similarity index 100% rename from det-yolov4-training/src/tag.c rename to det-yolov4-tmi/src/tag.c diff --git a/det-yolov4-training/src/tree.c b/det-yolov4-tmi/src/tree.c similarity index 100% rename from det-yolov4-training/src/tree.c rename to det-yolov4-tmi/src/tree.c diff --git a/det-yolov4-training/src/tree.h b/det-yolov4-tmi/src/tree.h similarity index 100% rename from det-yolov4-training/src/tree.h rename to det-yolov4-tmi/src/tree.h diff --git a/det-yolov4-training/src/upsample_layer.c b/det-yolov4-tmi/src/upsample_layer.c similarity index 100% rename from det-yolov4-training/src/upsample_layer.c rename to det-yolov4-tmi/src/upsample_layer.c diff --git a/det-yolov4-training/src/upsample_layer.h b/det-yolov4-tmi/src/upsample_layer.h similarity index 100% rename from det-yolov4-training/src/upsample_layer.h rename to det-yolov4-tmi/src/upsample_layer.h diff --git a/det-yolov4-training/src/utils.c b/det-yolov4-tmi/src/utils.c similarity index 100% rename from det-yolov4-training/src/utils.c rename to det-yolov4-tmi/src/utils.c diff --git a/det-yolov4-training/src/utils.h b/det-yolov4-tmi/src/utils.h similarity index 100% rename from det-yolov4-training/src/utils.h rename to det-yolov4-tmi/src/utils.h diff --git a/det-yolov4-training/src/version.h b/det-yolov4-tmi/src/version.h similarity index 100% rename from det-yolov4-training/src/version.h rename to det-yolov4-tmi/src/version.h diff --git a/det-yolov4-training/src/version.h.in b/det-yolov4-tmi/src/version.h.in similarity index 100% rename from det-yolov4-training/src/version.h.in rename to det-yolov4-tmi/src/version.h.in diff --git a/det-yolov4-training/src/voxel.c b/det-yolov4-tmi/src/voxel.c similarity index 100% rename from det-yolov4-training/src/voxel.c rename to det-yolov4-tmi/src/voxel.c diff --git a/det-yolov4-training/src/writing.c b/det-yolov4-tmi/src/writing.c similarity index 100% rename from det-yolov4-training/src/writing.c rename to det-yolov4-tmi/src/writing.c diff --git a/det-yolov4-training/src/yolo.c b/det-yolov4-tmi/src/yolo.c similarity index 100% rename from det-yolov4-training/src/yolo.c rename to det-yolov4-tmi/src/yolo.c diff --git a/det-yolov4-training/src/yolo_console_dll.cpp b/det-yolov4-tmi/src/yolo_console_dll.cpp similarity index 100% rename from det-yolov4-training/src/yolo_console_dll.cpp rename to det-yolov4-tmi/src/yolo_console_dll.cpp diff --git a/det-yolov4-training/src/yolo_layer.c b/det-yolov4-tmi/src/yolo_layer.c similarity index 100% rename from det-yolov4-training/src/yolo_layer.c rename to det-yolov4-tmi/src/yolo_layer.c diff --git a/det-yolov4-training/src/yolo_layer.h b/det-yolov4-tmi/src/yolo_layer.h similarity index 100% rename from det-yolov4-training/src/yolo_layer.h rename to det-yolov4-tmi/src/yolo_layer.h diff --git a/det-yolov4-training/src/yolo_v2_class.cpp b/det-yolov4-tmi/src/yolo_v2_class.cpp similarity index 100% rename from det-yolov4-training/src/yolo_v2_class.cpp rename to det-yolov4-tmi/src/yolo_v2_class.cpp diff --git a/det-yolov4-tmi/start.py b/det-yolov4-tmi/start.py new file mode 100644 index 0000000..67da850 --- /dev/null +++ b/det-yolov4-tmi/start.py @@ -0,0 +1,24 @@ +import logging +import subprocess +import sys + +import yaml + + +def start() -> int: + with open("/in/env.yaml", "r", encoding='utf8') as f: + config = yaml.safe_load(f) + + logging.info(f"config is {config}") + if config['run_training']: + cmd = 'bash /darknet/make_train_test_darknet.sh' + cwd = '/darknet' + else: + cmd = 'python3 docker_main.py' + cwd = '/darknet/mining' + subprocess.run(cmd, check=True, shell=True, cwd=cwd) + + return 0 + +if __name__ == '__main__': + sys.exit(start()) diff --git a/det-yolov4-training/train.sh b/det-yolov4-tmi/train.sh similarity index 100% rename from det-yolov4-training/train.sh rename to det-yolov4-tmi/train.sh diff --git a/det-yolov4-training/train_watcher.py b/det-yolov4-tmi/train_watcher.py similarity index 100% rename from det-yolov4-training/train_watcher.py rename to det-yolov4-tmi/train_watcher.py diff --git a/det-yolov4-training/train_yolov3.sh b/det-yolov4-tmi/train_yolov3.sh similarity index 100% rename from det-yolov4-training/train_yolov3.sh rename to det-yolov4-tmi/train_yolov3.sh diff --git a/det-yolov4-training/training-template.yaml b/det-yolov4-tmi/training-template.yaml similarity index 96% rename from det-yolov4-training/training-template.yaml rename to det-yolov4-tmi/training-template.yaml index 17c32f7..5e75eaf 100644 --- a/det-yolov4-training/training-template.yaml +++ b/det-yolov4-tmi/training-template.yaml @@ -4,7 +4,7 @@ image_width: 608 learning_rate: 0.0013 max_batches: 20000 warmup_iterations: 1000 -batch: 64 +batch: 4 subdivisions: 32 shm_size: '16G' # class_names: diff --git a/det-yolov4-training/video_yolov3.sh b/det-yolov4-tmi/video_yolov3.sh similarity index 100% rename from det-yolov4-training/video_yolov3.sh rename to det-yolov4-tmi/video_yolov3.sh diff --git a/det-yolov4-training/video_yolov4.sh b/det-yolov4-tmi/video_yolov4.sh similarity index 100% rename from det-yolov4-training/video_yolov4.sh rename to det-yolov4-tmi/video_yolov4.sh diff --git a/det-yolov4-training/warm_up_training.py b/det-yolov4-tmi/warm_up_training.py similarity index 100% rename from det-yolov4-training/warm_up_training.py rename to det-yolov4-tmi/warm_up_training.py From 88a66e802789acc144eae7a4bb7a2c2ab27fe5df Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 3 Aug 2022 13:51:15 +0800 Subject: [PATCH 092/150] update readme --- README.MD | 149 +++----------- README_zh-CN.MD | 203 +++++++++++++++++++ det-mmdetection-tmi/mmdet/utils/util_ymir.py | 2 +- det-yolov4-tmi/cuda101.dockerfile | 1 + det-yolov4-tmi/mining/docker_main.py | 11 +- det-yolov5-tmi/utils/ymir_yolov5.py | 31 +-- 6 files changed, 258 insertions(+), 139 deletions(-) create mode 100644 README_zh-CN.MD diff --git a/README.MD b/README.MD index b03b375..085f419 100644 --- a/README.MD +++ b/README.MD @@ -1,4 +1,4 @@ -# ymir-executor 使用文档 +# ymir-executor documentation [English](./README.MD) | [简体中文](./README_zh-CN.MD) - [ymir](https://github.com/IndustryEssentials/ymir) @@ -7,28 +7,32 @@ - [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu111-tmi + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi ``` - [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi ``` - [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi ``` - [detectron2](https://github.com/yzbx/ymir-detectron2) - [change log](https://github.com/yzbx/ymir-detectron2/blob/master/README.md) - - ymir1.0.0的镜像与ymir1.1.0兼容 - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.0.0-detectron2-tmi + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi ``` - [yolov7](https://github.com/yzbx/ymir-yolov7) @@ -39,42 +43,39 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` -## det-yolov4-training +## det-yolov4-tmi -- yolov4的训练镜像,采用mxnet与darknet框架,默认的 `Dockerfile` cuda版本为`10.1`,无法在高版本显卡如GTX3080/GTX3090上运行,需要修改dockerfile将cuda版本提升为11.1以上,参考 `cuda112.dockerfile` 进行构建。 +- yolov4 training, mining and infer docker image, use `mxnet` and `darknet` framework ``` - cd det-yolov4-training - # cuda101-yolov4-training - docker build -t ymir-executor/yolov4:cuda101-training -f Dockerfile . + cd det-yolov4-tmi + docker build -t ymir-executor/yolov4:cuda101-tmi -f cuda101.dockerfile . - # cuda112-yolov4-training - docker build -t ymir-executor/yolov4:cuda112-training -f cuda112.dockerfile . + docker build -t ymir-executor/yolov4:cuda112-tmi -f cuda112.dockerfile . ``` -## det-yolov4-mining - -- yolov4挖掘与推理镜像,与det-yolov4-training对应 +## det-yolov5-tmi -``` -cd det-yolov4-mining +- [change log](./det-yolov5-tmi/README.md) -docker build -t ymir-executor/yolov4:cuda101-mi -f Dockerfile . +- yolov5 training, mining and infer docker image, use `pytorch` framework -docker build -t ymir-executor/yolov4:cuda112-mi -f cuda112.dockerfile . ``` +cd det-yolov5-tmi +docker build -t ymir-executor/yolov5:cuda102-tmi -f cuda102.dockerfile . -## det-yolov5-tmi +docker build -t ymir-executor/yolov5:cuda111-tmi -f cuda111.dockerfile . +``` -- [change log](./det-yolov5-tmi/README.md) +## det-mmdetection-tmi -- yolov5训练、挖掘及推理镜像,镜像构建时会从github上下载权重, 如果访问github不稳定, 建议提前将模型权重下载并在构建时复制到镜像中. +- [change log](./det-mmdetection-tmi/README.md) ``` -cd det-yolov5-tmi -docker build -t ymir-executor/ymir1.1.0:cuda102-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda102.dockerfile . +cd det-mmdetection-tmi +docker build -t ymir-executor/mmdet:cu102-tmi -f docker/Dockerfile.cuda102 . -docker build -t ymir-executor/ymir1.1.0:cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . +docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ``` ## live-code-executor @@ -91,100 +92,12 @@ docker build -t ymir-executor/live-code:torch-tmi -f torch.dockerfile docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ``` -## det-mmdetection-tmi - -- [change log](./det-mmdetection-tmi/README.md) - -``` -cd det-mmdetection-tmi -docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi -f docker/Dockerfile.cuda102 --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 . - -docker build -t youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi -f docker/Dockerfile.cuda111 --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 . -``` - -## 如何制作自己的ymir-executor - -- [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) - -- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) ymir镜像开发辅助库 - -## 如何导入预训练模型 - -- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) - - - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 - ---- - -# FAQ - -## apt 或 pip 安装慢或出错 - - - 采用国内源,如在docker file 中添加如下命令 - - ``` - RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list - - RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple - ``` - -## docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 - - - 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. - - ``` - cd ymir-executor/det-yolov5-tmi - - docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev - ``` - -## 镜像运行完`/in`与`/out`目录中的文件被清理 - - - ymir系统为节省空间,会在任务`成功结束`后删除其中不必要的文件,如果不想删除,可以在部署ymir时,修改文件`ymir/command/mir/tools/command_run_in_out.py`,注释其中的`_cleanup(work_dir=work_dir)`。注意需要重新构建后端镜像 - - ``` - cd ymir - docker build -t industryessentials/ymir-backend --build-arg PIP_SOURCE=https://pypi.mirrors.ustc.edu.cn/simple --build-arg SERVER_MODE='dev' -f Dockerfile.backend . - - docker-compose down -v && docker-compose up -d - ``` - -## 训练镜像如何调试 - - ![](./debug.png) - - - 先通过失败任务的tensorboard链接拿到任务id,如`t000000100000175245d1656933456` - - - 进入ymir部署目录 `ymir-workplace/sandbox/work_dir/TaskTypeTraining/t000000100000175245d1656933456/sub_task/t000000100000175245d1656933456`, `ls` 可以看到以下结果 - - ``` - # ls - in out task_config.yaml - ``` - - - 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 - - ``` - docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v : bash - - # 以/home/ymir/ymir-workplace作为ymir部署目录为例 - docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash - ``` - - - 进入到docker 容器中后, 执行镜像默认的命令, 如dockerfile中写的 `CMD bash /usr/bin/start.sh` - - ``` - bash /usr/bin/start.sh - ``` - - - 推理与挖掘镜像调试同理,注意对应目录均为`ymir-workplace/sandbox/work_dir/TaskTypeMining` - -## 模型精度/速度如何权衡与提升 +## how to custom ymir-executor - - 模型精度与数据集大小、数据集质量、学习率、batch size、 迭代次数、模型结构、数据增强方式、损失函数等相关,在此不做展开,详情参考: +- [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) - - [Object Detection in 20 Years: A Survey](https://arxiv.org/abs/1905.05055) +- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) - - [Paper with Code: Object Detection](https://paperswithcode.com/task/object-detection) +## how to import pretrained model weights - - [awesome object detection](https://github.com/amusi/awesome-object-detection) +- [import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) diff --git a/README_zh-CN.MD b/README_zh-CN.MD new file mode 100644 index 0000000..d05ffab --- /dev/null +++ b/README_zh-CN.MD @@ -0,0 +1,203 @@ +# ymir-executor 使用文档 [English](./README.MD) | [简体中文](./README_zh-CN.MD) + +- [ymir](https://github.com/IndustryEssentials/ymir) + +## ymir-1.1.0 官方镜像 + +- [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi + ``` + +- [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi + ``` + +- [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi + ``` + +- [detectron2](https://github.com/yzbx/ymir-detectron2) + + - [change log](https://github.com/yzbx/ymir-detectron2/blob/master/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi + ``` + +- [yolov7](https://github.com/yzbx/ymir-yolov7) + + - [change log](https://github.com/yzbx/ymir-yolov7/blob/main/ymir/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi + ``` + +## det-yolov4-tmi + +- yolov4的训练、挖掘与推理镜像,采用mxnet与darknet框架 + + ``` + cd det-yolov4-tmi + docker build -t ymir-executor/yolov4:cuda101-tmi -f cuda101.dockerfile . + + docker build -t ymir-executor/yolov4:cuda112-tmi -f cuda112.dockerfile . + ``` + +## det-yolov5-tmi + +- [change log](./det-yolov5-tmi/README.md) + +- yolov5训练、挖掘及推理镜像,采用pytorch框架,镜像构建时会从github上下载权重, 如果访问github不稳定, 建议提前将模型权重下载并在构建时复制到镜像中. + +``` +cd det-yolov5-tmi +docker build -t ymir-executor/yolov5:cuda102-tmi -f cuda102.dockerfile . + +docker build -t ymir-executor/yolov5:cuda111-tmi -f cuda111.dockerfile . +``` + +## det-mmdetection-tmi + +- [change log](./det-mmdetection-tmi/README.md) + +``` +cd det-mmdetection-tmi +docker build -t ymir-executor/mmdet:cu102-tmi -f docker/Dockerfile.cuda102 . + +docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . +``` + +## live-code-executor + +- 可以通过`git_url`, `commit id` 或 `tag` 从网上clone代码到镜像并运行, 不推荐使用`branch`, 因为这样拉取的代码可能随时间变化, 实验结果不具备可重复性. + +- 参考 [live-code](https://github.com/IndustryEssentials/ymir-remote-git) + +``` +cd live-code-executor + +docker build -t ymir-executor/live-code:torch-tmi -f torch.dockerfile + +docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile +``` + +## 如何制作自己的ymir-executor + +- [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) + +- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) ymir镜像开发辅助库 + +## 如何导入预训练模型 + +- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) + + - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 + +--- + +# FAQ + +## 关于cuda版本 + +- 推荐安装11.2以上的cuda版本, 使用11.1及以上的镜像 + +- GTX3080/GTX3090系统不支持11.1以下的cuda,只能使用cuda11.1及以上的镜像 + +## apt 或 pip 安装慢或出错 + +- 采用国内源,如在docker file 中添加如下命令 + + ``` + RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list + + RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple + ``` + +## docker build 的时候出错,找不到相应docker file或`COPY/ADD`时出错 + +- 回到项目根目录或docker file对应根目录,确保docker file 中`COPY/ADD`的文件与文件夹能够访问,以yolov5为例. + + ``` + cd ymir-executor/det-yolov5-tmi + + docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev + ``` + +## 镜像运行完`/in`与`/out`目录中的文件被清理 + +- ymir系统为节省空间,会在任务`成功结束`后删除其中不必要的文件,如果不想删除,可以在部署ymir后,修改镜像`industryessentials/ymir-backend`中的`/usr/local/lib/python3.8/dist-packages/mir/tools/command_run_in_out.py`,注释其中所有的`_cleanup(work_dir=work_dir)`, 将修改覆盖到镜像`industryessentials/ymir-backend:latest`并重启ymir + + ``` + $ docker ps |grep backend + + 580c2f1dae1b industryessentials/ymir-backend ... + 5490c294982f industryessentials/ymir-backend-redis ... + + $ docker run -it --rm industryessentials/ymir-backend:latest bash + $ vim /usr/local/lib/python3.8/dist-packages/mir/tools/command_run_in_out.py + ``` + 注释所有的`_cleanup(work_dir=work_dir)`之后,不要立即退出容器,切换到另一个终端 + ``` + $ docker ps |grep backend + + dced73e51429 industryessentials/ymir-backend # use the latest one + 580c2f1dae1b industryessentials/ymir-backend ... + 5490c294982f industryessentials/ymir-backend-redis ... + + $ docker commit dced73e51429 industryessentials/ymir-backend:latest + ``` + 保存改动后,再切换回之前的终端,退出容器,重启ymir即可 + + +## 训练镜像如何调试 + +![](./debug.png) + +- 先修改镜像`industryessentials/ymir-backend`,注释其中所有的`_cleanup(work_dir=work_dir)`,保存`/in`和`/out`目录 + +- 再通过失败任务的tensorboard链接拿到任务id,如`t000000100000175245d1656933456` + +- 进入ymir部署目录 `ymir-workplace/sandbox/work_dir/TaskTypeTraining/t000000100000175245d1656933456/sub_task/t000000100000175245d1656933456`, `ls` 可以看到以下结果 + + ``` + # ls + in out task_config.yaml + ``` + +- 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 + + ``` + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v : bash + + # 以/home/ymir/ymir-workplace作为ymir部署目录为例 + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash + ``` + +- 进入到docker 容器中后, 执行镜像默认的命令, 如dockerfile中写的 `CMD bash /usr/bin/start.sh` + + ``` + bash /usr/bin/start.sh + ``` + +- 推理与挖掘镜像调试同理,注意对应目录均为`ymir-workplace/sandbox/work_dir/TaskTypeMining` + +## 模型精度/速度如何权衡与提升 + +- 模型精度与数据集大小、数据集质量、学习率、batch size、 迭代次数、模型结构、数据增强方式、损失函数等相关,在此不做展开,详情参考: + + - [Object Detection in 20 Years: A Survey](https://arxiv.org/abs/1905.05055) + + - [Paper with Code: Object Detection](https://paperswithcode.com/task/object-detection) + + - [awesome object detection](https://github.com/amusi/awesome-object-detection) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index aac1df8..810914b 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -3,13 +3,13 @@ """ import glob import logging -import yaml import os import os.path as osp from enum import IntEnum from typing import Any, List, Optional import mmcv +import yaml from easydict import EasyDict as edict from mmcv import Config from nptyping import NDArray, Shape, UInt8 diff --git a/det-yolov4-tmi/cuda101.dockerfile b/det-yolov4-tmi/cuda101.dockerfile index 5a5a2b5..53aa01b 100644 --- a/det-yolov4-tmi/cuda101.dockerfile +++ b/det-yolov4-tmi/cuda101.dockerfile @@ -16,6 +16,7 @@ RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm + ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev COPY . /darknet diff --git a/det-yolov4-tmi/mining/docker_main.py b/det-yolov4-tmi/mining/docker_main.py index 5f65377..359d066 100644 --- a/det-yolov4-tmi/mining/docker_main.py +++ b/det-yolov4-tmi/mining/docker_main.py @@ -36,15 +36,16 @@ def _load_config(config_file) -> dict: if __name__ == '__main__': config = _load_config("/in/config.yaml") - env_config = _load_config("/in/env.yaml") - run_infer = env_config['run_infer']=='true' - run_mining = env_config['run_mining']=='true' + with open("/in/env.yaml", "r", encoding='utf8') as f: + env_config = yaml.safe_load(f) + run_infer = int(env_config['run_infer']) + run_mining = int(env_config['run_mining']) if not run_infer and not run_mining: raise ValueError('both run_infer and run_mining set to 0, abort') - monitor_process.run_mining = int(run_mining) - monitor_process.run_infer = int(run_infer) + monitor_process.run_mining = run_mining + monitor_process.run_infer = run_infer log_writer = LogWriter(monitor_path="/out/monitor.txt", monitor_pure_path="/out/monitor-log.txt", diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index be78660..c2bd681 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -8,20 +8,19 @@ from enum import IntEnum from typing import Any, Dict, List, Tuple -from easydict import EasyDict as edict import numpy as np import torch import yaml -from nptyping import NDArray, Shape, UInt8 -from packaging.version import Version -from ymir_exc import env -from ymir_exc import result_writer as rw - +from easydict import EasyDict as edict from models.common import DetectMultiBackend from models.experimental import attempt_download +from nptyping import NDArray, Shape, UInt8 +from packaging.version import Version from utils.augmentations import letterbox from utils.general import check_img_size, non_max_suppression, scale_coords from utils.torch_utils import select_device +from ymir_exc import env +from ymir_exc import result_writer as rw class YmirStage(IntEnum): @@ -85,17 +84,16 @@ def get_weight_file(cfg: edict) -> str: else: model_params_path = cfg.param.model_params_path - model_dir = osp.join(cfg.ymir.input.root_dir, - cfg.ymir.input.models_dir) - model_params_path = [p for p in model_params_path if osp.exists(osp.join(model_dir, p))] + model_dir = cfg.ymir.input.models_dir + model_params_path = [osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt')] # choose weight file by priority, best.pt > xxx.pt - if 'best.pt' in model_params_path: - return osp.join(model_dir, 'best.pt') - else: - for f in model_params_path: - if f.endswith('.pt'): - return osp.join(model_dir, f) + for p in model_params_path: + if p.endswith('best.pt'): + return p + + if len(model_params_path) > 0: + return max(model_params_path, key=osp.getctime) return "" @@ -142,6 +140,9 @@ def __init__(self, cfg: edict): def init_detector(self, device: torch.device) -> DetectMultiBackend: weights = get_weight_file(self.cfg) + if not weights: + raise Exception("no weights file specified!") + data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') model = DetectMultiBackend(weights=weights, device=device, From 769be08f9c8f50ffb66dd7dfee0a0aba8b5c9f8b Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 3 Aug 2022 17:40:29 +0800 Subject: [PATCH 093/150] update readme and dockerfile --- README.MD | 14 ---------- README_zh-CN.MD | 17 +++++++++-- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 9 ++---- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 9 ++---- det-yolov5-tmi/cuda102.dockerfile | 11 ++------ det-yolov5-tmi/cuda111.dockerfile | 11 ++------ det-yolov5-tmi/utils/ymir_yolov5.py | 28 +++++++++---------- live-code-executor/mxnet.dockerfile | 2 +- live-code-executor/torch.dockerfile | 1 + 9 files changed, 41 insertions(+), 61 deletions(-) diff --git a/README.MD b/README.MD index 085f419..54a8b94 100644 --- a/README.MD +++ b/README.MD @@ -78,20 +78,6 @@ docker build -t ymir-executor/mmdet:cu102-tmi -f docker/Dockerfile.cuda102 . docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ``` -## live-code-executor - -- 可以通过`git_url`, `commit id` 或 `tag` 从网上clone代码到镜像并运行, 不推荐使用`branch`, 因为这样拉取的代码可能随时间变化, 实验结果不具备可重复性. - -- 参考 [live-code](https://github.com/IndustryEssentials/ymir-remote-git) - -``` -cd live-code-executor - -docker build -t ymir-executor/live-code:torch-tmi -f torch.dockerfile - -docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile -``` - ## how to custom ymir-executor - [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index d05ffab..5505b24 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -131,7 +131,7 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ``` cd ymir-executor/det-yolov5-tmi - docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile --build-arg SERVER_MODE=dev + docker build -t ymir-executor/yolov5:cuda111 . -f cuda111.dockerfile ``` ## 镜像运行完`/in`与`/out`目录中的文件被清理 @@ -162,9 +162,16 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 训练镜像如何调试 +- 一般性的错误在`ymir-workplace/ymir-data/logs`下查看 + +``` +tail -f -n 100 ymir_controller.log +tail -f -n 100 ymir_app.log +``` + ![](./debug.png) -- 先修改镜像`industryessentials/ymir-backend`,注释其中所有的`_cleanup(work_dir=work_dir)`,保存`/in`和`/out`目录 +- 先修改镜像`industryessentials/ymir-backend`,注释其中所有的`_cleanup(work_dir=work_dir)`,保存`/in`和`/out`目录下的文件 - 再通过失败任务的tensorboard链接拿到任务id,如`t000000100000175245d1656933456` @@ -173,6 +180,12 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ``` # ls in out task_config.yaml + + # ls out + monitor.txt ymir-executor-out.log + + # ls in + assets config.yaml env.yaml ... ``` - 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index dd73fb5..517acd0 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -6,7 +6,6 @@ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel # mmcv>=1.3.17, <=1.5.0 ARG MMCV="1.4.3" -ARG SERVER_MODE=prod ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" @@ -27,13 +26,9 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ && rm -rf /var/lib/apt/lists/* # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) -RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ - if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi \ +RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html \ + && pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ && conda clean --all # Install det-mmdetection-tmi diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index e4320d4..fbf2508 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -6,7 +6,6 @@ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime # mmcv>=1.3.17, <=1.5.0 ARG MMCV="1.4.3" -ARG SERVER_MODE=prod ARG YMIR="1.1.0" ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" @@ -25,13 +24,9 @@ RUN apt-get update && apt-get install -y build-essential ffmpeg libsm6 libxext6 && rm -rf /var/lib/apt/lists/* # Install ymir-exc sdk and MMCV -RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \ - if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi \ +RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html \ + && pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ && conda clean --all # Install det-mmdetection-tmi diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index 031859d..e8ab497 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -3,8 +3,6 @@ ARG CUDA="10.2" ARG CUDNN="7" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime -# support SERVER_MODE=dev or prod -ARG SERVER_MODE=prod # support YMIR=1.0.0, 1.1.0 or 1.2.0 ARG YMIR="1.1.0" @@ -16,16 +14,13 @@ ENV YMIR_VERSION=${YMIR} # Install linux package RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ - libgl1-mesa-glx curl wget zip \ + libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ + build-essential ninja-build \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # install ymir-exc sdk -RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi +RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" # Copy file from host to docker and install requirements COPY . /app diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index c238bd5..6cfff64 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -4,8 +4,6 @@ ARG CUDNN="8" # cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime -# support SERVER_MODE=dev or prod -ARG SERVER_MODE=prod # support YMIR=1.0.0, 1.1.0 or 1.2.0 ARG YMIR="1.1.0" @@ -18,16 +16,13 @@ ENV YMIR_VERSION=$YMIR # Install linux package RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ - libgl1-mesa-glx curl wget zip \ + libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ + build-essential ninja-build \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # install ymir-exc sdk -RUN if [ "${SERVER_MODE}" = "dev" ]; then \ - pip install "git+https://github.com/IndustryEssentials/ymir.git/@dev#egg=ymir-exc&subdirectory=docker_executor/sample_executor/ymir_exc"; \ - else \ - pip install ymir-exc; \ - fi +RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" # Copy file from host to docker and install requirements COPY . /app diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index c2bd681..fec095b 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -85,7 +85,8 @@ def get_weight_file(cfg: edict) -> str: model_params_path = cfg.param.model_params_path model_dir = cfg.ymir.input.models_dir - model_params_path = [osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt')] + model_params_path = [osp.join(model_dir, p) + for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt')] # choose weight file by priority, best.pt > xxx.pt for p in model_params_path: @@ -233,15 +234,15 @@ def write_ymir_training_result(cfg: edict, weight_file: str = "") -> int: YMIR_VERSION = os.getenv('YMIR_VERSION', '1.2.0') if Version(YMIR_VERSION) >= Version('1.2.0'): - _write_latest_ymir_training_result(cfg, map50, epoch, weight_file) + _write_latest_ymir_training_result(cfg, float(map50), epoch, weight_file) else: - _write_ancient_ymir_training_result(cfg, map50) + _write_ancient_ymir_training_result(cfg, float(map50)) def _write_latest_ymir_training_result(cfg: edict, - map50: float, - epoch: int, - weight_file: str) -> int: + map50: float, + epoch: int, + weight_file: str) -> int: """ for ymir>=1.2.0 cfg: ymir config @@ -266,10 +267,10 @@ def _write_latest_ymir_training_result(cfg: edict, training_result_file = cfg.ymir.output.training_result_file if osp.exists(training_result_file): - with open(cfg.ymir.output.training_result_file, 'r') as f: + with open(training_result_file, 'r') as f: training_result = yaml.safe_load(stream=f) - map50 = max(training_result.get('map',0.0), map50) + map50 = max(training_result.get('map', 0.0), map50) rw.write_model_stage(stage_name=f"{model}_last_and_best", files=files, mAP=float(map50)) @@ -284,18 +285,17 @@ def _write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] training_result_file = cfg.ymir.output.training_result_file if osp.exists(training_result_file): - with open(cfg.ymir.output.training_result_file, 'r') as f: + with open(training_result_file, 'r') as f: training_result = yaml.safe_load(stream=f) training_result['model'] = files - training_result['map'] = max(training_result.get('map', 0), map50) + training_result['map'] = max(float(training_result.get('map', 0)), map50) else: training_result = { 'model': files, - 'map': map50, - 'stage_name': f'{cfg.param.model}' + 'map': float(map50), + 'stage_name': cfg.param.model } - env_config = env.get_current_env() - with open(env_config.output.training_result_file, 'w') as f: + with open(training_result_file, 'w') as f: yaml.safe_dump(training_result, f) diff --git a/live-code-executor/mxnet.dockerfile b/live-code-executor/mxnet.dockerfile index a12e29d..ed08fff 100644 --- a/live-code-executor/mxnet.dockerfile +++ b/live-code-executor/mxnet.dockerfile @@ -16,7 +16,7 @@ ENV PATH /opt/conda/bin:$PATH RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC && \ apt-get update && \ apt-get install -y git gcc wget curl zip libglib2.0-0 libgl1-mesa-glx \ - libsm6 libxext6 libxrender-dev && \ + libsm6 libxext6 libxrender-dev build-essential && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ wget "${MINICONDA_URL}" -O miniconda.sh -q && \ diff --git a/live-code-executor/torch.dockerfile b/live-code-executor/torch.dockerfile index df43f85..4fd9a90 100644 --- a/live-code-executor/torch.dockerfile +++ b/live-code-executor/torch.dockerfile @@ -16,6 +16,7 @@ ENV LANG=C.UTF-8 # install linux package RUN apt-get update && apt-get install -y git curl wget zip gcc \ libglib2.0-0 libgl1-mesa-glx libsm6 libxext6 libxrender-dev \ + build-essential \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 8c6b3dcb70986a0de7cab8c846eb9dec09a2e738 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 8 Aug 2022 10:10:08 +0800 Subject: [PATCH 094/150] remove redundant --- det-yolov5-tmi/start.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 4f0648f..a483cc5 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -118,8 +118,6 @@ def _run_training(cfg: edict) -> None: logging.info(f'export onnx weight: {command}') subprocess.run(command.split(), check=True) - # save hyperparameter - shutil.copy(f'models/{model}.yaml', f'{models_dir}/{model}.yaml') write_ymir_training_result(cfg) # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) From 19b40d15785e7b0f62dffd6954f0afa218c23893 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 8 Aug 2022 10:15:17 +0800 Subject: [PATCH 095/150] sort imports --- det-yolov5-tmi/start.py | 10 ++++------ det-yolov5-tmi/utils/ymir_yolov5.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index a483cc5..c46f6a0 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -1,20 +1,18 @@ import logging import os -import os.path as osp -import shutil import subprocess import sys import cv2 from easydict import EasyDict as edict +from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, + download_weight_file, get_merged_config, + get_weight_file, get_ymir_process, + write_ymir_training_result) from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw -from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, - download_weight_file, get_merged_config, - get_weight_file, get_ymir_process, write_ymir_training_result) - def start() -> int: cfg = get_merged_config() diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index fec095b..f63a1c4 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -6,7 +6,7 @@ import os.path as osp import shutil from enum import IntEnum -from typing import Any, Dict, List, Tuple +from typing import Any, List import numpy as np import torch From bf4e4ad7d25ba1f114a9c6943ef33f041e3650bd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 8 Aug 2022 11:55:40 +0800 Subject: [PATCH 096/150] update train.py --- det-yolov5-tmi/train.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index d28fdb8..ac9abd6 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -206,7 +206,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear # Epochs start_epoch = ckpt['epoch'] + 1 if resume: - assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' + assert start_epoch > 0, f'{weights} training from {start_epoch} to {epochs} epochs is finished, nothing to resume.' if epochs < start_epoch: LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") epochs += ckpt['epoch'] # finetune additional epochs @@ -296,7 +296,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear # ymir monitor if epoch % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=epoch / (epochs - start_epoch + 1)) + percent = get_ymir_process(stage=YmirStage.TASK, p=(epoch - start_epoch + 1) / (epochs - start_epoch + 1)) monitor.write_monitor_logger(percent=percent) # Update image weights (optional, single-GPU only) @@ -523,12 +523,17 @@ def main(opt, callbacks=Callbacks()): check_git_status() check_requirements(exclude=['thop']) + ymir_cfg = get_merged_config() # Resume if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path + ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run(ymir_cfg.ymir.input.root_dir) # specified or most recent path assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' - with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: - opt = argparse.Namespace(**yaml.safe_load(f)) # replace + + opt_file = Path(ckpt).parent / 'opt.yaml' + if opt_file.exists(): + with open(opt_file, errors='ignore') as f: + opt = argparse.Namespace(**yaml.safe_load(f)) # replace + os.makedirs(opt.save_dir, exist_ok=True) opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate LOGGER.info(f'Resuming training from {ckpt}') else: @@ -539,8 +544,8 @@ def main(opt, callbacks=Callbacks()): if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) - ymir_cfg = get_merged_config() - opt.ymir_cfg = ymir_cfg + + opt.ymir_cfg = ymir_cfg # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) From 17ee0ed9fba32fead01164bff21f1c38e6a7d15b Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 9 Aug 2022 12:30:04 +0800 Subject: [PATCH 097/150] update readme --- README.MD | 27 +++++++++++++++++++++++---- README_zh-CN.MD | 26 ++++++++++++++++++++++---- det-yolov5-tmi/train.py | 19 ++++++++++++------- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/README.MD b/README.MD index 54a8b94..703bde6 100644 --- a/README.MD +++ b/README.MD @@ -2,6 +2,8 @@ - [ymir](https://github.com/IndustryEssentials/ymir) +- [wiki](https://github.com/yzbx/ymir-executor-fork/wiki) + ## ymir-1.1.0 official image - [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) @@ -14,6 +16,8 @@ - [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) + - [change log](./det-yolov5-tmi/README.md) + ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi @@ -21,6 +25,9 @@ ``` - [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) + + - [change log](./det-mmdetection-tmi/README.md) + ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi @@ -43,6 +50,22 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` +- overview + + | docker image | [finetune](https://github.com/yzbx/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weights | + | - | - | - | - | - | - | - | + | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | + | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | + | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | + | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | + | detectron2 | ? | ✔️ | ✔️ | pytorch | ❌ | online | + + - online pretrained weights may download through network + + - local pretrained weights have copied to docker images when building image + +--- + ## det-yolov4-tmi - yolov4 training, mining and infer docker image, use `mxnet` and `darknet` framework @@ -56,8 +79,6 @@ ## det-yolov5-tmi -- [change log](./det-yolov5-tmi/README.md) - - yolov5 training, mining and infer docker image, use `pytorch` framework ``` @@ -69,8 +90,6 @@ docker build -t ymir-executor/yolov5:cuda111-tmi -f cuda111.dockerfile . ## det-mmdetection-tmi -- [change log](./det-mmdetection-tmi/README.md) - ``` cd det-mmdetection-tmi docker build -t ymir-executor/mmdet:cu102-tmi -f docker/Dockerfile.cuda102 . diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 5505b24..0460da4 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -2,6 +2,8 @@ - [ymir](https://github.com/IndustryEssentials/ymir) +- [wiki](https://github.com/yzbx/ymir-executor-fork/wiki) + ## ymir-1.1.0 官方镜像 - [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) @@ -14,6 +16,8 @@ - [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) + - [change log](./det-yolov5-tmi/README.md) + ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi @@ -21,6 +25,9 @@ ``` - [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) + + - [change log](./det-mmdetection-tmi/README.md) + ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi @@ -43,6 +50,21 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` +- 比较 + + | docker image | [finetune](https://github.com/yzbx/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weight | + | - | - | - | - | - | - | - | + | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | + | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | + | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | + | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | + | detectron2 | ? | ✔️ | ✔️ | pytorch | ❌ | online | + + - online 预训练权重可能在训练时通过网络下载 + + - local 预训练权重在构建镜像时复制到了镜像 +--- + ## det-yolov4-tmi - yolov4的训练、挖掘与推理镜像,采用mxnet与darknet框架 @@ -56,8 +78,6 @@ ## det-yolov5-tmi -- [change log](./det-yolov5-tmi/README.md) - - yolov5训练、挖掘及推理镜像,采用pytorch框架,镜像构建时会从github上下载权重, 如果访问github不稳定, 建议提前将模型权重下载并在构建时复制到镜像中. ``` @@ -69,8 +89,6 @@ docker build -t ymir-executor/yolov5:cuda111-tmi -f cuda111.dockerfile . ## det-mmdetection-tmi -- [change log](./det-mmdetection-tmi/README.md) - ``` cd det-mmdetection-tmi docker build -t ymir-executor/mmdet:cu102-tmi -f docker/Dockerfile.cuda102 . diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index ac9abd6..e8d3fe6 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -12,7 +12,6 @@ $ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch """ -from ymir_exc import monitor import argparse import math import os @@ -32,6 +31,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP from torch.optim import SGD, Adam, AdamW, lr_scheduler from tqdm import tqdm +from ymir_exc import monitor FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory @@ -47,17 +47,22 @@ from utils.callbacks import Callbacks from utils.datasets import create_dataloader from utils.downloads import attempt_download -from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, - check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, - intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, - print_args, print_mutation, strip_optimizer) +from utils.general import (LOGGER, check_dataset, check_file, check_git_status, + check_img_size, check_requirements, check_suffix, + check_version, check_yaml, colorstr, get_latest_run, + increment_path, init_seeds, intersect_dicts, + labels_to_class_weights, labels_to_image_weights, + methods, one_cycle, print_args, print_mutation, + strip_optimizer) from utils.loggers import Loggers from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.loss import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -from utils.ymir_yolov5 import write_ymir_training_result, YmirStage, get_ymir_process, get_merged_config +from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, + select_device, torch_distributed_zero_first) +from utils.ymir_yolov5 import (YmirStage, get_merged_config, get_ymir_process, + write_ymir_training_result) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) From e28dfbcc9f8cceafbd2386a7560ef661b0348a0e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 16 Aug 2022 16:31:24 +0800 Subject: [PATCH 098/150] remove ddp destroy_process_group() on train end, https://github.com/ultralytics/yolov5/pull/8935 --- README.MD | 2 +- README_zh-CN.MD | 2 +- det-yolov5-tmi/train.py | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/README.MD b/README.MD index 703bde6..50ce730 100644 --- a/README.MD +++ b/README.MD @@ -58,7 +58,7 @@ | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - | detectron2 | ? | ✔️ | ✔️ | pytorch | ❌ | online | + | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - online pretrained weights may download through network diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 0460da4..6f375ec 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -58,7 +58,7 @@ | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - | detectron2 | ? | ✔️ | ✔️ | pytorch | ❌ | online | + | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - online 预训练权重可能在训练时通过网络下载 diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index e8d3fe6..bc7a182 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -568,9 +568,6 @@ def main(opt, callbacks=Callbacks()): # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) - if WORLD_SIZE > 1 and RANK == 0: - LOGGER.info('Destroying process group... ') - dist.destroy_process_group() # Evolve hyperparameters (optional) else: From fb66ee90c0edaac12cf2fd841b5fb2c607444b43 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 22 Aug 2022 14:00:08 +0800 Subject: [PATCH 099/150] add multi-gpu data loader --- README_zh-CN.MD | 4 + det-yolov5-tmi/cuda111.dockerfile | 9 ++- det-yolov5-tmi/mining/ymir_mining.py | 110 +++++++++++++++++++++++++++ det-yolov5-tmi/start.py | 71 ++++++++++------- det-yolov5-tmi/utils/ymir_yolov5.py | 80 +++++++++---------- 5 files changed, 200 insertions(+), 74 deletions(-) create mode 100644 det-yolov5-tmi/mining/ymir_mining.py diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 6f375ec..83e7f57 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -232,3 +232,7 @@ tail -f -n 100 ymir_app.log - [Paper with Code: Object Detection](https://paperswithcode.com/task/object-detection) - [awesome object detection](https://github.com/amusi/awesome-object-detection) + + - [voc2012 object detection leadboard](http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=4) + + - [coco object detection leadboard](https://cocodataset.org/#detection-leaderboard) diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index 6cfff64..5d1e165 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -21,13 +21,14 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# install ymir-exc sdk -RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" +COPY ./requirements.txt /workspace/ +# install ymir-exc sdk and requirements +RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install -r /workspace/requirements.txt # Copy file from host to docker and install requirements COPY . /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ - && pip install -r /app/requirements.txt +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ # Download pretrained weight and font file RUN cd /app && bash data/scripts/download_weights.sh \ diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py new file mode 100644 index 0000000..5f7d319 --- /dev/null +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -0,0 +1,110 @@ +"""run.py: +img --(model)--> pred --(augmentation)--> (aug1_pred, aug2_pred, ..., augN_pred) +img --(augmentation)--> aug1_img --(model)--> pred1 +img --(augmentation)--> aug2_img --(model)--> pred2 +... +img --(augmentation)--> augN_img --(model)--> predN + +dataload(img) --(model)--> pred +dataload(img, pred) --(augmentation1)--> (aug1_img, aug1_pred) --(model)--> pred1 + +1. split dataset with DDP sampler +2. use DDP model to infer sampled dataloader +3. gather infer result + +""" +import os +import torch +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.utils.data as td +from functools import partial +from typing import List, Any +import cv2 +from utils.augmentations import letterbox +import numpy as np +from ymir_exc.util import get_merged_config +from utils.ymir_yolov5 import YmirYolov5 + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def load_image_file(img_file: str, img_size, stride): + img = cv2.imread(img_file) + img1 = letterbox(img, img_size, stride=stride, auto=True)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + # img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + # img1.unsqueeze_(dim=0) # expand for batch dim + return img1 + + +class YmirDataset(td.Dataset): + + def __init__(self, images: List[str], annotations: List[Any] = None, augmentations=None, load_fn=None): + super().__init__() + self.annotations = annotations + self.images = images + self.augmentations = augmentations + self.load_fn = load_fn + + def __getitem__(self, index): + + return self.load_fn(self.images[index]) + + def __len__(self): + return len(self.images) + + +def run(rank, size): + """ Distributed function to be implemented later. """ + cfg = get_merged_config() + model = YmirYolov5(cfg) + + load_fn = partial(load_image_file, img_size=model.img_size, stride=model.stride) + + with open(cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + # origin dataset + origin_dataset = YmirDataset(images, load_fn=load_fn) + + sampler = None if rank == -1 else td.distributed.DistributedSampler(origin_dataset) + origin_dataset_loader = td.Dataloader(origin_dataset, + batch_size=4, + shuffle=False, + sampler=sampler, + num_workers=0, + pip_memory=True, + drop_last=False) + + + for batch in origin_dataset_loader: + + + +def init_process(rank, size, fn, backend='gloo'): + """ Initialize the distributed environment. """ + os.environ['MASTER_ADDR'] = '127.0.0.1' + os.environ['MASTER_PORT'] = '29500' + dist.init_process_group(backend, rank=rank, world_size=size) + fn(rank, size) + + +if __name__ == "__main__": + size = 2 + processes = [] + mp.set_start_method("spawn") + for rank in range(size): + p = mp.Process(target=init_process, args=(rank, size, run)) + p.start() + processes.append(p) + + for p in processes: + p.join() diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index c46f6a0..01be78a 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -5,10 +5,10 @@ import cv2 from easydict import EasyDict as edict +from models.experimental import attempt_download from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, - download_weight_file, get_merged_config, - get_weight_file, get_ymir_process, - write_ymir_training_result) + get_merged_config, get_weight_file, + get_ymir_process, write_ymir_training_result) from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw @@ -51,7 +51,8 @@ def _run_training(cfg: edict) -> None: out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) # 2. training model epochs = cfg.param.epochs @@ -75,7 +76,7 @@ def _run_training(cfg: edict) -> None: weights = get_weight_file(cfg) if not weights: # download pretrained weight - weights = download_weight_file(model) + weights = attempt_download(f'{model}.pt') models_dir = cfg.ymir.output.models_dir @@ -86,18 +87,18 @@ def _run_training(cfg: edict) -> None: device = gpu_id else: device = gpu_id - commands += f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split() - - commands += ['train.py', - '--epochs', str(epochs), - '--batch-size', str(batch_size), - '--data', f'{out_dir}/data.yaml', - '--project', '/out', - '--cfg', f'models/{model}.yaml', - '--name', 'models', '--weights', weights, - '--img-size', str(img_size), - '--save-period', str(save_period), - '--device', device] + commands += f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split( + ) + + commands += [ + 'train.py', '--epochs', + str(epochs), '--batch-size', + str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', '/out', + '--cfg', f'models/{model}.yaml', '--name', 'models', '--weights', + weights, '--img-size', + str(img_size), '--save-period', + str(save_period), '--device', device + ] if gpu_count > 1 and sync_bn: commands.append("--sync-bn") @@ -108,7 +109,8 @@ def _run_training(cfg: edict) -> None: logging.info(f'start training: {commands}') subprocess.run(commands, check=True) - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) # 3. convert to onnx and save model weight to design directory opset = cfg.param.opset @@ -126,14 +128,20 @@ def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process( - stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) command = 'python3 mining/mining_cald.py' logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) - monitor.write_monitor_logger(percent=get_ymir_process( - stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.POSTPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: @@ -141,8 +149,11 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process( - stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) N = dr.items_count(env.DatasetType.CANDIDATE) infer_result = dict() @@ -157,12 +168,18 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=task_idx, + task_num=task_num) monitor.write_monitor_logger(percent=percent) rw.write_infer_result(infer_result=infer_result) - monitor.write_monitor_logger(percent=get_ymir_process( - stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) if __name__ == '__main__': diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index f63a1c4..7257ed1 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -13,11 +13,11 @@ import yaml from easydict import EasyDict as edict from models.common import DetectMultiBackend -from models.experimental import attempt_download +from torch.nn.parallel import DistributedDataParallel as DDP from nptyping import NDArray, Shape, UInt8 from packaging.version import Version from utils.augmentations import letterbox -from utils.general import check_img_size, non_max_suppression, scale_coords +from utils.general import check_img_size, non_max_suppression, scale_coords, check_version from utils.torch_utils import select_device from ymir_exc import env from ymir_exc import result_writer as rw @@ -25,7 +25,7 @@ class YmirStage(IntEnum): PREPROCESS = 1 # convert dataset - TASK = 2 # training/mining/infer + TASK = 2 # training/mining/infer POSTPROCESS = 3 # export model @@ -85,8 +85,9 @@ def get_weight_file(cfg: edict) -> str: model_params_path = cfg.param.model_params_path model_dir = cfg.ymir.input.models_dir - model_params_path = [osp.join(model_dir, p) - for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt')] + model_params_path = [ + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt') + ] # choose weight file by priority, best.pt > xxx.pt for p in model_params_path: @@ -99,12 +100,7 @@ def get_weight_file(cfg: edict) -> str: return "" -def download_weight_file(model_name): - weights = attempt_download(f'{model_name}.pt') - return weights - - -class YmirYolov5(): +class YmirYolov5(object): """ used for mining and inference to init detector and predict. """ @@ -145,11 +141,23 @@ def init_detector(self, device: torch.device) -> DetectMultiBackend: raise Exception("no weights file specified!") data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') - model = DetectMultiBackend(weights=weights, - device=device, - dnn=False, # not use opencv dnn for onnx inference - data=data_yaml) # dataset.yaml path - + model = DetectMultiBackend( + weights=weights, + device=device, + dnn=False, # not use opencv dnn for onnx inference + data=data_yaml) # dataset.yaml path + + if ddp: + LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html + RANK = int(os.getenv('RANK', -1)) + # WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + cuda = device.type != 'cpu' + if cuda and RANK != -1: + if check_version(torch.__version__, '1.11.0'): + model.model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, + static_graph=True) # type: ignore + else: + model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # type: ignore return model def predict(self, img: CV_IMAGE) -> NDArray: @@ -200,23 +208,22 @@ def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: for i in range(result.shape[0]): xmin, ymin, xmax, ymax, conf, cls = result[i, :6].tolist() - ann = rw.Annotation(class_name=self.class_names[int(cls)], score=conf, box=rw.Box( - x=int(xmin), y=int(ymin), w=int(xmax - xmin), h=int(ymax - ymin))) + ann = rw.Annotation(class_name=self.class_names[int(cls)], + score=conf, + box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), h=int(ymax - ymin))) anns.append(ann) return anns -def convert_ymir_to_yolov5(cfg: edict) -> None: +def convert_ymir_to_yolov5(cfg: edict): """ convert ymir format dataset to yolov5 format generate data.yaml for training/mining/infer """ - data = dict(path=cfg.ymir.output.root_dir, - nc=len(cfg.param.class_names), - names=cfg.param.class_names) + data = dict(path=cfg.ymir.output.root_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) for split, prefix in zip(['train', 'val', 'test'], ['training', 'val', 'candidate']): src_file = getattr(cfg.ymir.input, f'{prefix}_index_file') if osp.exists(src_file): @@ -228,10 +235,7 @@ def convert_ymir_to_yolov5(cfg: edict) -> None: fw.write(yaml.safe_dump(data)) -def write_ymir_training_result(cfg: edict, - map50: float = 0.0, - epoch: int = 0, - weight_file: str = "") -> int: +def write_ymir_training_result(cfg: edict, map50: float = 0.0, epoch: int = 0, weight_file: str = ""): YMIR_VERSION = os.getenv('YMIR_VERSION', '1.2.0') if Version(YMIR_VERSION) >= Version('1.2.0'): _write_latest_ymir_training_result(cfg, float(map50), epoch, weight_file) @@ -239,10 +243,7 @@ def write_ymir_training_result(cfg: edict, _write_ancient_ymir_training_result(cfg, float(map50)) -def _write_latest_ymir_training_result(cfg: edict, - map50: float, - epoch: int, - weight_file: str) -> int: +def _write_latest_ymir_training_result(cfg: edict, map50: float, epoch: int, weight_file: str) -> int: """ for ymir>=1.2.0 cfg: ymir config @@ -257,13 +258,12 @@ def _write_latest_ymir_training_result(cfg: edict, model = cfg.param.model # use `rw.write_training_result` to save training result if weight_file: - rw.write_model_stage(stage_name=f"{model}_{epoch}", - files=[osp.basename(weight_file)], - mAP=float(map50)) + rw.write_model_stage(stage_name=f"{model}_{epoch}", files=[osp.basename(weight_file)], mAP=float(map50)) else: # save other files with - files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*')) - if not f.endswith('.pt')] + ['last.pt', 'best.pt'] + files = [ + osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*')) if not f.endswith('.pt') + ] + ['last.pt', 'best.pt'] training_result_file = cfg.ymir.output.training_result_file if osp.exists(training_result_file): @@ -271,9 +271,7 @@ def _write_latest_ymir_training_result(cfg: edict, training_result = yaml.safe_load(stream=f) map50 = max(training_result.get('map', 0.0), map50) - rw.write_model_stage(stage_name=f"{model}_last_and_best", - files=files, - mAP=float(map50)) + rw.write_model_stage(stage_name=f"{model}_last_and_best", files=files, mAP=float(map50)) return 0 @@ -291,11 +289,7 @@ def _write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: training_result['model'] = files training_result['map'] = max(float(training_result.get('map', 0)), map50) else: - training_result = { - 'model': files, - 'map': float(map50), - 'stage_name': cfg.param.model - } + training_result = {'model': files, 'map': float(map50), 'stage_name': cfg.param.model} with open(training_result_file, 'w') as f: yaml.safe_dump(training_result, f) From a5d81c8c7647870d9abeac7c279c147084a284c3 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 25 Aug 2022 18:59:58 +0800 Subject: [PATCH 100/150] add multi-gpu infer and mining --- det-mmdetection-tmi/ymir_infer.py | 2 +- det-yolov5-tmi/infer-template.yaml | 3 + det-yolov5-tmi/mining-template.yaml | 3 + det-yolov5-tmi/mining/mining_cald.py | 7 +- det-yolov5-tmi/mining/util.py | 132 +++++++++++++ det-yolov5-tmi/mining/ymir_infer.py | 128 +++++++++++++ det-yolov5-tmi/mining/ymir_mining.py | 255 +++++++++++++++++--------- det-yolov5-tmi/mypy.ini | 4 +- det-yolov5-tmi/start.py | 155 +++++++--------- det-yolov5-tmi/train.py | 22 +-- det-yolov5-tmi/training-template.yaml | 2 +- det-yolov5-tmi/utils/ymir_yolov5.py | 159 ++++++---------- 12 files changed, 569 insertions(+), 303 deletions(-) create mode 100644 det-yolov5-tmi/mining/util.py create mode 100644 det-yolov5-tmi/mining/ymir_infer.py diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index ecec19e..661b2ea 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -100,7 +100,7 @@ def __init__(self, cfg: edict): cfg_options = parse_option(options) if options else None # current infer can only use one gpu!!! - gpu_ids = cfg.param.gpu_id + gpu_ids = cfg.param.get('gpu_id', '0') gpu_id = gpu_ids.split(',')[0] # build the model from a config file and a checkpoint file self.model = init_detector( diff --git a/det-yolov5-tmi/infer-template.yaml b/det-yolov5-tmi/infer-template.yaml index 89dcc96..008375c 100644 --- a/det-yolov5-tmi/infer-template.yaml +++ b/det-yolov5-tmi/infer-template.yaml @@ -10,3 +10,6 @@ img_size: 640 conf_thres: 0.25 iou_thres: 0.45 +batch_size_per_gpu: 16 +num_workers_per_gpu: 4 +ddp: False diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 20106dc..78e13e7 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -10,3 +10,6 @@ img_size: 640 conf_thres: 0.25 iou_thres: 0.45 +batch_size_per_gpu: 16 +num_workers_per_gpu: 4 +ddp: False diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index 0fde401..0e08660 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -6,17 +6,18 @@ from typing import Dict, List, Tuple import cv2 -from easydict import EasyDict as edict import numpy as np +from easydict import EasyDict as edict from nptyping import NDArray from scipy.stats import entropy from tqdm import tqdm from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5, YmirStage, get_ymir_process, get_merged_config +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: @@ -49,7 +50,7 @@ def __init__(self, cfg: edict): def mining(self) -> List: N = dr.items_count(env.DatasetType.CANDIDATE) - monitor_gap = max(1, N // 100) + monitor_gap = max(1, N // 1000) idx = -1 beta = 1.3 mining_result = [] diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py new file mode 100644 index 0000000..41c7c73 --- /dev/null +++ b/det-yolov5-tmi/mining/util.py @@ -0,0 +1,132 @@ +"""run.py: +img --(model)--> pred --(augmentation)--> (aug1_pred, aug2_pred, ..., augN_pred) +img --(augmentation)--> aug1_img --(model)--> pred1 +img --(augmentation)--> aug2_img --(model)--> pred2 +... +img --(augmentation)--> augN_img --(model)--> predN + +dataload(img) --(model)--> pred +dataload(img, pred) --(augmentation1)--> (aug1_img, aug1_pred) --(model)--> pred1 + +1. split dataset with DDP sampler +2. use DDP model to infer sampled dataloader +3. gather infer result + +""" +import os +from typing import Any, List + +import cv2 +import numpy as np +import torch.utils.data as td +from scipy.stats import entropy +from torch.utils.data._utils.collate import default_collate + +from mining.data_augment import cutout, horizontal_flip, resize, rotate +from mining.mining_cald import get_ious +from utils.augmentations import letterbox + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def preprocess(img, img_size, stride): + img1 = letterbox(img, img_size, stride=stride, auto=False)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + # img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + return img1 + + +def load_image_file(img_file: str, img_size, stride): + img = cv2.imread(img_file) + img1 = letterbox(img, img_size, stride=stride, auto=False)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + # img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + # img1.unsqueeze_(dim=0) # expand for batch dim + return dict(image=img1, origin_shape=img.shape[0:2], image_file=img_file) + # return img1 + + +def load_image_file_with_ann(image_info: dict, img_size, stride): + img_file = image_info['image_file'] + # xyxy(int) conf(float) class_index(int) + bboxes = image_info['results'][:, :4].astype(np.int32) + img = cv2.imread(img_file) + aug_dict = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) + + data = dict(image_file=img_file, origin_shape=img.shape[0:2]) + for key in aug_dict: + aug_img, aug_bbox = aug_dict[key](img, bboxes) + preprocess_aug_img = preprocess(aug_img, img_size, stride) + data[f'image_{key}'] = preprocess_aug_img + data[f'bboxes_{key}'] = aug_bbox + data[f'origin_shape_{key}'] = aug_img.shape[0:2] + + data.update(image_info) + return data + + +def collate_fn_with_fake_ann(batch): + new_batch = dict() + for key in ['flip', 'cutout', 'rotate', 'resize']: + new_batch[f'bboxes_{key}_list'] = [data[f'bboxes_{key}'] for data in batch] + + new_batch[f'image_{key}'] = default_collate([data[f'image_{key}'] for data in batch]) + + new_batch[f'origin_shape_{key}'] = default_collate([data[f'origin_shape_{key}'] for data in batch]) + + new_batch['results_list'] = [data['results'] for data in batch] + new_batch['image_file'] = [data['image_file'] for data in batch] + + return new_batch + + +def update_consistency(consistency, consistency_per_aug, beta, pred_bboxes_key, pred_conf_key, aug_bboxes_key, + aug_conf): + cls_scores_aug = 1 - pred_conf_key + cls_scores = 1 - aug_conf + + consistency_per_aug = 2.0 + ious = get_ious(pred_bboxes_key, aug_bboxes_key) + aug_idxs = np.argmax(ious, axis=0) + for origin_idx, aug_idx in enumerate(aug_idxs): + max_iou = ious[aug_idx, origin_idx] + if max_iou == 0: + consistency_per_aug = min(consistency_per_aug, beta) + p = cls_scores_aug[aug_idx] + q = cls_scores[origin_idx] + m = (p + q) / 2. + js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + if js < 0: + js = 0 + consistency_box = max_iou + consistency_cls = 0.5 * (aug_conf[origin_idx] + pred_conf_key[aug_idx]) * (1 - js) + consistency_per_inst = abs(consistency_box + consistency_cls - beta) + consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) + + consistency += consistency_per_aug + return consistency + + +class YmirDataset(td.Dataset): + def __init__(self, images: List[Any], load_fn=None): + super().__init__() + self.images = images + self.load_fn = load_fn + + def __getitem__(self, index): + return self.load_fn(self.images[index]) + + def __len__(self): + return len(self.images) diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py new file mode 100644 index 0000000..9f459f0 --- /dev/null +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -0,0 +1,128 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. save splited result with `torch.save(results, f'results_{RANK}.pt')` +3. merge result +""" +import os +import warnings +from functools import partial + +import torch +import torch.distributed as dist +import torch.utils.data as td +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +from mining.util import YmirDataset, load_image_file +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg, ymir_yolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu + gpu_count = ymir_yolov5.gpu_count + num_workers_per_gpu = min([ + os.cpu_count() // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + # origin dataset + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=False, + drop_last=False) + + results = [] + dataset_size = len(images_rank) + monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) + + if idx % monitor_gap == 0: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + + preprocess_image_shape = batch['image'].shape[2:] + for idx, det in enumerate(pred): # per image + result_per_image = [] + if len(det): + origin_image_shape = (batch['origin_shape'][0][idx], batch['origin_shape'][1][idx]) + image_file = batch['image_file'][idx] + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + results.append(dict(image_file=image_file, result=result_per_image)) + + torch.save(results, f'/out/infer_results_{RANK}.pt') + + +def main(): + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='infer') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + torch.cuda.set_device(gpu) + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/infer_results_{rank}.pt')) + + torch.save(results, '/out/infer_results_all_rank.pt') + + ymir_infer_result = dict() + for result in results: + for img_data in result: + img_file = img_data['image_file'] + anns = [] + for each_det in img_data['result']: + each_det_np = each_det.data.cpu().numpy() + for i in range(each_det_np.shape[0]): + xmin, ymin, xmax, ymax, conf, cls = each_det_np[i, :6].tolist() + if conf < ymir_yolov5.conf_thres: + continue + if int(cls) >= len(ymir_yolov5.class_names): + warnings.warn(f'class index {int(cls)} out of range for {ymir_yolov5.class_names}') + continue + ann = rw.Annotation(class_name=ymir_yolov5.class_names[int(cls)], + score=conf, + box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), + h=int(ymax - ymin))) + anns.append(ann) + ymir_infer_result[img_file] = anns + rw.write_infer_result(infer_result=ymir_infer_result) + + print(f'rank: {RANK}, start destroy process group') + dist.destroy_process_group() + + +if __name__ == '__main__': + main() diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index 5f7d319..e58264b 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -1,29 +1,24 @@ -"""run.py: -img --(model)--> pred --(augmentation)--> (aug1_pred, aug2_pred, ..., augN_pred) -img --(augmentation)--> aug1_img --(model)--> pred1 -img --(augmentation)--> aug2_img --(model)--> pred2 -... -img --(augmentation)--> augN_img --(model)--> predN - -dataload(img) --(model)--> pred -dataload(img, pred) --(augmentation1)--> (aug1_img, aug1_pred) --(model)--> pred1 - -1. split dataset with DDP sampler -2. use DDP model to infer sampled dataloader -3. gather infer result - +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result """ import os +from functools import partial + +import numpy as np import torch import torch.distributed as dist -import torch.multiprocessing as mp import torch.utils.data as td -from functools import partial -from typing import List, Any -import cv2 -from utils.augmentations import letterbox -import numpy as np -from ymir_exc.util import get_merged_config +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, + update_consistency) +from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html @@ -31,80 +26,160 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -def load_image_file(img_file: str, img_size, stride): - img = cv2.imread(img_file) - img1 = letterbox(img, img_size, stride=stride, auto=True)[0] - - # preprocess: convert data format - img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img1 = np.ascontiguousarray(img1) - # img1 = torch.from_numpy(img1).to(self.device) - - img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 - # img1.unsqueeze_(dim=0) # expand for batch dim - return img1 - - -class YmirDataset(td.Dataset): - - def __init__(self, images: List[str], annotations: List[Any] = None, augmentations=None, load_fn=None): - super().__init__() - self.annotations = annotations - self.images = images - self.augmentations = augmentations - self.load_fn = load_fn +def run(ymir_cfg, ymir_yolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) - def __getitem__(self, index): + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu + gpu_count = ymir_yolov5.gpu_count + num_workers_per_gpu = min([ + os.cpu_count() // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) - return self.load_fn(self.images[index]) - - def __len__(self): - return len(self.images) - - -def run(rank, size): - """ Distributed function to be implemented later. """ - cfg = get_merged_config() - model = YmirYolov5(cfg) - - load_fn = partial(load_image_file, img_size=model.img_size, stride=model.stride) - - with open(cfg.ymir.input.candidate_index_file, 'r') as f: + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] # origin dataset - origin_dataset = YmirDataset(images, load_fn=load_fn) - - sampler = None if rank == -1 else td.distributed.DistributedSampler(origin_dataset) - origin_dataset_loader = td.Dataloader(origin_dataset, - batch_size=4, + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, shuffle=False, - sampler=sampler, - num_workers=0, - pip_memory=True, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=False, drop_last=False) - - for batch in origin_dataset_loader: - - - -def init_process(rank, size, fn, backend='gloo'): - """ Initialize the distributed environment. """ - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29500' - dist.init_process_group(backend, rank=rank, world_size=size) - fn(rank, size) - - -if __name__ == "__main__": - size = 2 - processes = [] - mp.set_start_method("spawn") - for rank in range(size): - p = mp.Process(target=init_process, args=(rank, size, run)) - p.start() - processes.append(p) - - for p in processes: - p.join() + results = [] + mining_results = dict() + beta = 1.3 + dataset_size = len(images_rank) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) + + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx / dataset_size) + preprocess_image_shape = batch['image'].shape[2:] + for inner_idx, det in enumerate(pred): # per image + result_per_image = [] + if len(det): + origin_image_shape = (batch['origin_shape'][0][inner_idx], batch['origin_shape'][1][inner_idx]) + image_file = batch['image_file'][inner_idx] + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + else: + mining_results[image_file] = -beta + continue + + results_per_image = torch.cat(result_per_image, dim=0).data.cpu().numpy() + results.append(dict(image_file=image_file, origin_shape=origin_image_shape, results=results_per_image)) + + aug_load_fn = partial(load_image_file_with_ann, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + aug_dataset = YmirDataset(results, load_fn=aug_load_fn) + aug_dataset_loader = td.DataLoader(aug_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + collate_fn=collate_fn_with_fake_ann, + num_workers=num_workers_per_gpu, + pin_memory=False, + drop_last=False) + + dataset_size = len(results) + monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) + pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader + for idx, batch in enumerate(pbar): + if idx % monitor_gap == 0: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + + batch_consistency = [0.0 for _ in range(len(batch['image_file']))] + aug_keys = ['flip', 'cutout', 'rotate', 'resize'] + + pred_result = dict() + for key in aug_keys: + with torch.no_grad(): + pred_result[key] = ymir_yolov5.forward(batch[f'image_{key}'].float().to(device), nms=True) + + for inner_idx in range(len(batch['image_file'])): + for key in aug_keys: + preprocess_image_shape = batch[f'image_{key}'].shape[2:] + result_per_image = [] + det = pred_result[key][inner_idx] + if len(det) == 0: + # no result for the image with augmentation f'{key}' + batch_consistency[inner_idx] += beta + continue + + # prediction result from origin image + fake_ann = batch['results_list'][inner_idx] + # bboxes = fake_ann[:, :4].data.cpu().numpy().astype(np.int32) + conf = fake_ann[:, 4] + + # augmentated bbox from bboxes, aug_conf = conf + aug_bboxes_key = batch[f'bboxes_{key}_list'][inner_idx].astype(np.int32) + + origin_image_shape = (batch[f'origin_shape_{key}'][0][inner_idx], + batch[f'origin_shape_{key}'][1][inner_idx]) + + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + + pred_bboxes_key = det[:, :4].data.cpu().numpy().astype(np.int32) + pred_conf_key = det[:, 4].data.cpu().numpy() + batch_consistency[inner_idx] = update_consistency(consistency=batch_consistency[inner_idx], + consistency_per_aug=2.0, + beta=beta, + pred_bboxes_key=pred_bboxes_key, + pred_conf_key=pred_conf_key, + aug_bboxes_key=aug_bboxes_key, + aug_conf=conf) + + for inner_idx in range(len(batch['image_file'])): + batch_consistency[inner_idx] /= len(aug_keys) + image_file = batch['image_file'][inner_idx] + mining_results[image_file] = batch_consistency[inner_idx] + + torch.save(mining_results, f'mining_results_{RANK}.pt') + + +def main(): + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + torch.cuda.set_device(gpu) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'mining_results_{rank}.pt')) + + torch.save(results, 'mining_results_all_rank.pt') + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + + print(f'rank: {RANK}, start destroy process group') + dist.destroy_process_group() + + +if __name__ == '__main__': + main() diff --git a/det-yolov5-tmi/mypy.ini b/det-yolov5-tmi/mypy.ini index 85e751a..bb96738 100644 --- a/det-yolov5-tmi/mypy.ini +++ b/det-yolov5-tmi/mypy.ini @@ -1,8 +1,8 @@ [mypy] ignore_missing_imports = True disallow_untyped_defs = False -files = [mining/*.py, utils/ymir_yolov5.py, start.py, train.py] -exclude = [utils/general.py] +exclude = [utils/general.py, models/*.py, utils/*.py] +files = mining/*.py, utils/ymir_yolov5.py, start.py, train.py [mypy-torch.*] ignore_errors = True diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 01be78a..6b75b55 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -5,13 +5,14 @@ import cv2 from easydict import EasyDict as edict -from models.experimental import attempt_download -from utils.ymir_yolov5 import (YmirStage, YmirYolov5, convert_ymir_to_yolov5, - get_merged_config, get_weight_file, - get_ymir_process, write_ymir_training_result) from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw +from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process, + write_ymir_training_result) + +from models.experimental import attempt_download +from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file def start() -> int: @@ -51,27 +52,20 @@ def _run_training(cfg: edict) -> None: out_dir = cfg.ymir.output.root_dir convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) # 2. training model - epochs = cfg.param.epochs - batch_size = cfg.param.batch_size - model = cfg.param.model - img_size = cfg.param.img_size - save_period = max(1, min(epochs // 10, int(cfg.param.save_period))) - args_options = cfg.param.args_options - gpu_id = str(cfg.param.gpu_id) - gpu_count = len(gpu_id.split(',')) if gpu_id else 0 - port = int(cfg.param.get('port', 29500)) - sync_bn = cfg.param.get('sync_bn', False) - if isinstance(sync_bn, str): - if sync_bn.lower() in ['f', 'false']: - sync_bn = False - elif sync_bn.lower() in ['t', 'true']: - sync_bn = True - else: - raise Exception(f'unknown bool str sync_bn = {sync_bn}') + epochs: int = int(cfg.param.epochs) + batch_size_per_gpu: int = int(cfg.param.batch_size_per_gpu) + model: str = cfg.param.model + img_size: int = int(cfg.param.img_size) + save_period: int = max(1, min(epochs // 10, int(cfg.param.save_period))) + args_options: str = cfg.param.args_options + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 + batch_size: int = batch_size_per_gpu * max(1, gpu_count) + port: int = int(cfg.param.get('port', 29500)) + sync_bn: bool = get_bool(cfg, key='sync_bn', default_value=False) weights = get_weight_file(cfg) if not weights: @@ -79,38 +73,34 @@ def _run_training(cfg: edict) -> None: weights = attempt_download(f'{model}.pt') models_dir = cfg.ymir.output.models_dir + project = os.path.dirname(models_dir) + name = os.path.basename(models_dir) + assert os.path.join(project, name) == models_dir commands = ['python3'] - if gpu_count == 0: - device = 'cpu' - elif gpu_count == 1: - device = gpu_id - else: - device = gpu_id - commands += f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split( - ) + device = gpu_id or 'cpu' + if gpu_count > 1: + commands.extend(f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split()) - commands += [ + commands.extend([ 'train.py', '--epochs', str(epochs), '--batch-size', - str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', '/out', - '--cfg', f'models/{model}.yaml', '--name', 'models', '--weights', - weights, '--img-size', + str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', project, '--cfg', f'models/{model}.yaml', + '--name', name, '--weights', weights, '--img-size', str(img_size), '--save-period', str(save_period), '--device', device - ] + ]) if gpu_count > 1 and sync_bn: commands.append("--sync-bn") if args_options: - commands += args_options.split() + commands.extend(args_options.split()) logging.info(f'start training: {commands}') subprocess.run(commands, check=True) - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) # 3. convert to onnx and save model weight to design directory opset = cfg.param.opset @@ -118,68 +108,57 @@ def _run_training(cfg: edict) -> None: logging.info(f'export onnx weight: {command}') subprocess.run(command.split(), check=True) - write_ymir_training_result(cfg) + write_ymir_training_result(cfg, map50=0, files=[], id='last') # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: - # generate data.yaml for mining - out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg) - logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 - command = 'python3 mining/mining_cald.py' + if gpu_count <= 1: + command = 'python3 mining/mining_cald.py' + else: + port = find_free_port() + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining.py' # noqa logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.POSTPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) + percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: - # generate data.yaml for infer - out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg) - logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) - - N = dr.items_count(env.DatasetType.CANDIDATE) - infer_result = dict() - model = YmirYolov5(cfg) - idx = -1 - - monitor_gap = max(1, N // 100) - for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): - img = cv2.imread(asset_path) - result = model.infer(img) - infer_result[asset_path] = result - idx += 1 - - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, - p=idx / N, - task_idx=task_idx, - task_num=task_num) - monitor.write_monitor_logger(percent=percent) - - rw.write_infer_result(infer_result=infer_result) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 + + if gpu_count <= 1: + N = dr.items_count(env.DatasetType.CANDIDATE) + infer_result = dict() + model = YmirYolov5(cfg) + idx = -1 + + monitor_gap = max(1, N // 100) + for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): + img = cv2.imread(asset_path) + result = model.infer(img) + infer_result[asset_path] = result + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) + monitor.write_monitor_logger(percent=percent) + + rw.write_infer_result(infer_result=infer_result) + else: + port = find_free_port() + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_infer.py' # noqa + + logging.info(f'mining: {command}') + subprocess.run(command.split(), check=True) + monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) if __name__ == '__main__': diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index bc7a182..0d208bf 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -39,6 +39,8 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process, write_ymir_training_result + import val # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model @@ -47,22 +49,16 @@ from utils.callbacks import Callbacks from utils.datasets import create_dataloader from utils.downloads import attempt_download -from utils.general import (LOGGER, check_dataset, check_file, check_git_status, - check_img_size, check_requirements, check_suffix, - check_version, check_yaml, colorstr, get_latest_run, - increment_path, init_seeds, intersect_dicts, - labels_to_class_weights, labels_to_image_weights, - methods, one_cycle, print_args, print_mutation, - strip_optimizer) +from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, + check_suffix, check_version, check_yaml, colorstr, get_latest_run, increment_path, + init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, + one_cycle, print_args, print_mutation, strip_optimizer) from utils.loggers import Loggers from utils.loggers.wandb.wandb_utils import check_wandb_resume from utils.loss import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels -from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, - select_device, torch_distributed_zero_first) -from utils.ymir_yolov5 import (YmirStage, get_merged_config, get_ymir_process, - write_ymir_training_result) +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -423,7 +419,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') weight_file = str(w / f'epoch{epoch}.pt') - write_ymir_training_result(ymir_cfg, map50=results[2], epoch=epoch, weight_file=weight_file) + write_ymir_training_result(ymir_cfg, map50=results[2], id=str(epoch), files=[weight_file]) del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) @@ -472,7 +468,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear torch.cuda.empty_cache() # save the best and last weight file with other files in models_dir if RANK in [-1, 0]: - write_ymir_training_result(ymir_cfg, map50=best_fitness, epoch=epochs, weight_file='') + write_ymir_training_result(ymir_cfg, map50=best_fitness, id=str(epochs), files=[]) return results diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index 763f66a..ac9a91f 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -10,7 +10,7 @@ shm_size: '32G' export_format: 'ark:raw' model: 'yolov5s' -batch_size: 16 +batch_size_per_gpu: 16 epochs: 300 img_size: 640 opset: 11 diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 7257ed1..0b43505 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -5,122 +5,73 @@ import os import os.path as osp import shutil -from enum import IntEnum from typing import Any, List import numpy as np import torch import yaml from easydict import EasyDict as edict -from models.common import DetectMultiBackend -from torch.nn.parallel import DistributedDataParallel as DDP from nptyping import NDArray, Shape, UInt8 from packaging.version import Version -from utils.augmentations import letterbox -from utils.general import check_img_size, non_max_suppression, scale_coords, check_version -from utils.torch_utils import select_device -from ymir_exc import env +from ymir_exc import monitor from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_weight_files, get_ymir_process - -class YmirStage(IntEnum): - PREPROCESS = 1 # convert dataset - TASK = 2 # training/mining/infer - POSTPROCESS = 3 # export model - +from models.common import DetectMultiBackend +from utils.augmentations import letterbox +from utils.general import check_img_size, non_max_suppression, scale_coords +from utils.torch_utils import select_device BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def get_ymir_process(stage: YmirStage, p: float, task_idx: int = 0, task_num: int = 1) -> float: - """ - stage: pre-process/task/post-process - p: percent for stage - task_idx: index for multiple tasks like mining (task_idx=0) and infer (task_idx=1) - task_num: the total number of multiple tasks. - """ - # const value for ymir process - PREPROCESS_PERCENT = 0.1 - TASK_PERCENT = 0.8 - POSTPROCESS_PERCENT = 0.1 - - if p < 0 or p > 1.0: - raise Exception(f'p not in [0,1], p={p}') - - ratio = 1.0 / task_num - init = task_idx / task_num - - if stage == YmirStage.PREPROCESS: - return init + PREPROCESS_PERCENT * p * ratio - elif stage == YmirStage.TASK: - return init + (PREPROCESS_PERCENT + TASK_PERCENT * p) * ratio - elif stage == YmirStage.POSTPROCESS: - return init + (PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p) * ratio - else: - raise NotImplementedError(f'unknown stage {stage}') - - -def get_merged_config() -> edict: - """ - merge ymir_config and executor_config - """ - merged_cfg = edict() - # the hyperparameter information - merged_cfg.param = env.get_executor_config() - - # the ymir path information - merged_cfg.ymir = env.get_current_env() - return merged_cfg - - def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority find weight file in cfg.param.model_params_path or cfg.param.model_params_path """ - if cfg.ymir.run_training: - model_params_path = cfg.param.get('pretrained_model_params', []) - else: - model_params_path = cfg.param.model_params_path - - model_dir = cfg.ymir.input.models_dir - model_params_path = [ - osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith('.pt') - ] - + weight_files = get_weight_files(cfg, suffix=('.pt')) # choose weight file by priority, best.pt > xxx.pt - for p in model_params_path: + for p in weight_files: if p.endswith('best.pt'): return p - if len(model_params_path) > 0: - return max(model_params_path, key=osp.getctime) + if len(weight_files) > 0: + return max(weight_files, key=osp.getctime) return "" -class YmirYolov5(object): +class YmirYolov5(torch.nn.Module): """ used for mining and inference to init detector and predict. """ - - def __init__(self, cfg: edict): + def __init__(self, cfg: edict, task='infer'): + super().__init__() self.cfg = cfg if cfg.ymir.run_mining and cfg.ymir.run_infer: # multiple task, run mining first, infer later - infer_task_idx = 1 - task_num = 2 + if task == 'infer': + self.task_idx = 1 + elif task == 'mining': + self.task_idx = 0 + else: + raise Exception(f'unknown task {task}') + + self.task_num = 2 else: - infer_task_idx = 0 - task_num = 1 - - self.task_idx = infer_task_idx - self.task_num = task_num - - device = select_device(cfg.param.get('gpu_id', 'cpu')) - + self.task_idx = 0 + self.task_num = 1 + + self.gpu_id: str = str(cfg.param.get('gpu_id', '0')) + device = select_device(self.gpu_id) + self.gpu_count: int = len(self.gpu_id.split(',')) if self.gpu_id else 0 + self.batch_size_per_gpu = int(cfg.param.get('batch_size_per_gpu', 4)) + self.num_workers_per_gpu = int(cfg.param.get('num_workers_per_gpu', 4)) + self.batch_size: int = self.batch_size_per_gpu * self.gpu_count self.model = self.init_detector(device) + self.model.eval() self.device = device self.class_names = cfg.param.class_names self.stride = self.model.stride @@ -128,36 +79,39 @@ def __init__(self, cfg: edict): self.iou_thres = float(cfg.param.iou_thres) img_size = int(cfg.param.img_size) - imgsz = (img_size, img_size) + imgsz = [img_size, img_size] imgsz = check_img_size(imgsz, s=self.stride) self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup self.img_size = imgsz + def forward(self, x, nms=False): + pred = self.model(x) + if not nms: + return pred + + # postprocess + conf_thres = self.conf_thres + iou_thres = self.iou_thres + classes = None # not filter class_idx in results + agnostic_nms = False + max_det = 100 + + pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + return pred + def init_detector(self, device: torch.device) -> DetectMultiBackend: weights = get_weight_file(self.cfg) if not weights: raise Exception("no weights file specified!") - data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') model = DetectMultiBackend( weights=weights, device=device, dnn=False, # not use opencv dnn for onnx inference - data=data_yaml) # dataset.yaml path - - if ddp: - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html - RANK = int(os.getenv('RANK', -1)) - # WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - cuda = device.type != 'cpu' - if cuda and RANK != -1: - if check_version(torch.__version__, '1.11.0'): - model.model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, - static_graph=True) # type: ignore - else: - model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # type: ignore + data=None) # dataset.yaml path + return model def predict(self, img: CV_IMAGE) -> NDArray: @@ -175,16 +129,7 @@ def predict(self, img: CV_IMAGE) -> NDArray: img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 img1.unsqueeze_(dim=0) # expand for batch dim - pred = self.model(img1) - - # postprocess - conf_thres = self.conf_thres - iou_thres = self.iou_thres - classes = None # not filter class_idx in results - agnostic_nms = False - max_det = 1000 - - pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + pred = self.forward(img1, nms=True) result = [] for det in pred: @@ -216,6 +161,10 @@ def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: return anns + def write_monitor_logger(self, stage: YmirStage, p: float): + monitor.write_monitor_logger( + percent=get_ymir_process(stage=stage, p=p, task_idx=self.task_idx, task_num=self.task_num)) + def convert_ymir_to_yolov5(cfg: edict): """ From 946a04fc79de59e65303d67a5456aa015ff4e77c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 25 Aug 2022 19:08:16 +0800 Subject: [PATCH 101/150] change mining result save directory --- det-yolov5-tmi/mining/ymir_mining.py | 6 +++--- det-yolov5-tmi/start.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index e58264b..677f2ee 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -146,7 +146,7 @@ def run(ymir_cfg, ymir_yolov5): image_file = batch['image_file'][inner_idx] mining_results[image_file] = batch_consistency[inner_idx] - torch.save(mining_results, f'mining_results_{RANK}.pt') + torch.save(mining_results, f'/out/mining_results_{RANK}.pt') def main(): @@ -167,9 +167,9 @@ def main(): if RANK in [0, -1]: results = [] for rank in range(WORLD_SIZE): - results.append(torch.load(f'mining_results_{rank}.pt')) + results.append(torch.load(f'/out/mining_results_{rank}.pt')) - torch.save(results, 'mining_results_all_rank.pt') + torch.save(results, '/out/mining_results_all_rank.pt') ymir_mining_result = [] for result in results: diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 6b75b55..858bf0c 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -154,7 +154,7 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: port = find_free_port() command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_infer.py' # noqa - logging.info(f'mining: {command}') + logging.info(f'infer: {command}') subprocess.run(command.split(), check=True) monitor.write_monitor_logger( From 3242af355f22444c377e5f5dfb1b6a7d9b047070 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 26 Aug 2022 15:01:10 +0800 Subject: [PATCH 102/150] use data.yaml for onnx model --- det-yolov5-tmi/infer-template.yaml | 2 +- det-yolov5-tmi/mining-template.yaml | 2 +- det-yolov5-tmi/mining/ymir_infer.py | 17 +++++++++------ det-yolov5-tmi/mining/ymir_mining.py | 24 +++++++++++---------- det-yolov5-tmi/start.py | 19 +++++++++++++++++ det-yolov5-tmi/utils/ymir_yolov5.py | 32 ++++++++++++++-------------- 6 files changed, 60 insertions(+), 36 deletions(-) diff --git a/det-yolov5-tmi/infer-template.yaml b/det-yolov5-tmi/infer-template.yaml index 008375c..329887a 100644 --- a/det-yolov5-tmi/infer-template.yaml +++ b/det-yolov5-tmi/infer-template.yaml @@ -12,4 +12,4 @@ conf_thres: 0.25 iou_thres: 0.45 batch_size_per_gpu: 16 num_workers_per_gpu: 4 -ddp: False +pin_memory: False diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 78e13e7..1ae6d29 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -12,4 +12,4 @@ conf_thres: 0.25 iou_thres: 0.45 batch_size_per_gpu: 16 num_workers_per_gpu: 4 -ddp: False +pin_memory: False diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 9f459f0..827dc8a 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -4,12 +4,14 @@ 3. merge result """ import os +import sys import warnings from functools import partial import torch import torch.distributed as dist import torch.utils.data as td +from easydict import EasyDict as edict from tqdm import tqdm from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -23,7 +25,7 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -def run(ymir_cfg, ymir_yolov5): +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) device = torch.device('cuda', gpu) @@ -32,8 +34,9 @@ def run(ymir_cfg, ymir_yolov5): load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu gpu_count = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 num_workers_per_gpu = min([ - os.cpu_count() // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, ymir_yolov5.num_workers_per_gpu ]) @@ -48,7 +51,7 @@ def run(ymir_cfg, ymir_yolov5): shuffle=False, sampler=None, num_workers=num_workers_per_gpu, - pin_memory=False, + pin_memory=ymir_yolov5.pin_memory, drop_last=False) results = [] @@ -76,7 +79,7 @@ def run(ymir_cfg, ymir_yolov5): torch.save(results, f'/out/infer_results_{RANK}.pt') -def main(): +def main() -> int: ymir_cfg = get_merged_config() ymir_yolov5 = YmirYolov5(ymir_cfg, task='infer') @@ -89,6 +92,7 @@ def main(): run(ymir_cfg, ymir_yolov5) + # wait all process to save the infer result dist.barrier() if RANK in [0, -1]: @@ -96,8 +100,6 @@ def main(): for rank in range(WORLD_SIZE): results.append(torch.load(f'/out/infer_results_{rank}.pt')) - torch.save(results, '/out/infer_results_all_rank.pt') - ymir_infer_result = dict() for result in results: for img_data in result: @@ -122,7 +124,8 @@ def main(): print(f'rank: {RANK}, start destroy process group') dist.destroy_process_group() + return 0 if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index 677f2ee..14fc7aa 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -6,12 +6,14 @@ 5. merge mining result """ import os +import sys from functools import partial import numpy as np import torch import torch.distributed as dist import torch.utils.data as td +from easydict import EasyDict as edict from tqdm import tqdm from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -26,17 +28,18 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -def run(ymir_cfg, ymir_yolov5): +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) device = torch.device('cuda', gpu) ymir_yolov5.to(device) load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu - gpu_count = ymir_yolov5.gpu_count + batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu + gpu_count: int = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 num_workers_per_gpu = min([ - os.cpu_count() // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, ymir_yolov5.num_workers_per_gpu ]) @@ -51,7 +54,7 @@ def run(ymir_cfg, ymir_yolov5): shuffle=False, sampler=None, num_workers=num_workers_per_gpu, - pin_memory=False, + pin_memory=ymir_yolov5.pin_memory, drop_last=False) results = [] @@ -63,7 +66,7 @@ def run(ymir_cfg, ymir_yolov5): with torch.no_grad(): pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx / dataset_size) + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) preprocess_image_shape = batch['image'].shape[2:] for inner_idx, det in enumerate(pred): # per image result_per_image = [] @@ -88,7 +91,7 @@ def run(ymir_cfg, ymir_yolov5): sampler=None, collate_fn=collate_fn_with_fake_ann, num_workers=num_workers_per_gpu, - pin_memory=False, + pin_memory=ymir_yolov5.pin_memory, drop_last=False) dataset_size = len(results) @@ -149,7 +152,7 @@ def run(ymir_cfg, ymir_yolov5): torch.save(mining_results, f'/out/mining_results_{RANK}.pt') -def main(): +def main() -> int: ymir_cfg = get_merged_config() ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') @@ -169,8 +172,6 @@ def main(): for rank in range(WORLD_SIZE): results.append(torch.load(f'/out/mining_results_{rank}.pt')) - torch.save(results, '/out/mining_results_all_rank.pt') - ymir_mining_result = [] for result in results: for img_file, score in result.items(): @@ -179,7 +180,8 @@ def main(): print(f'rank: {RANK}, start destroy process group') dist.destroy_process_group() + return 0 if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 858bf0c..9e2dfa1 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -114,6 +114,15 @@ def _run_training(cfg: edict) -> None: def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: + # generate data.yaml for mining + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 @@ -129,6 +138,16 @@ def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: + # generate data.yaml for infer + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, + p=1.0, + task_idx=task_idx, + task_num=task_num)) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 0b43505..4093100 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -15,7 +15,7 @@ from packaging.version import Version from ymir_exc import monitor from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_weight_files, get_ymir_process +from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process from models.common import DetectMultiBackend from utils.augmentations import letterbox @@ -67,37 +67,36 @@ def __init__(self, cfg: edict, task='infer'): self.gpu_id: str = str(cfg.param.get('gpu_id', '0')) device = select_device(self.gpu_id) self.gpu_count: int = len(self.gpu_id.split(',')) if self.gpu_id else 0 - self.batch_size_per_gpu = int(cfg.param.get('batch_size_per_gpu', 4)) - self.num_workers_per_gpu = int(cfg.param.get('num_workers_per_gpu', 4)) + self.batch_size_per_gpu: int = int(cfg.param.get('batch_size_per_gpu', 4)) + self.num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) + self.pin_memory: bool = get_bool(cfg, 'pin_memory', False) self.batch_size: int = self.batch_size_per_gpu * self.gpu_count self.model = self.init_detector(device) self.model.eval() self.device = device - self.class_names = cfg.param.class_names + self.class_names: List[str] = cfg.param.class_names self.stride = self.model.stride - self.conf_thres = float(cfg.param.conf_thres) - self.iou_thres = float(cfg.param.iou_thres) + self.conf_thres: float = float(cfg.param.conf_thres) + self.iou_thres: float = float(cfg.param.iou_thres) img_size = int(cfg.param.img_size) imgsz = [img_size, img_size] imgsz = check_img_size(imgsz, s=self.stride) self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup - self.img_size = imgsz + self.img_size: List[int] = imgsz def forward(self, x, nms=False): pred = self.model(x) if not nms: return pred - # postprocess - conf_thres = self.conf_thres - iou_thres = self.iou_thres - classes = None # not filter class_idx in results - agnostic_nms = False - max_det = 100 - - pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) + pred = non_max_suppression(pred, + conf_thres=self.conf_thres, + iou_thres=self.iou_thres, + classes=None, # not filter class_idx + agnostic=False, + max_det=100) return pred def init_detector(self, device: torch.device) -> DetectMultiBackend: @@ -106,11 +105,12 @@ def init_detector(self, device: torch.device) -> DetectMultiBackend: if not weights: raise Exception("no weights file specified!") + data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') model = DetectMultiBackend( weights=weights, device=device, dnn=False, # not use opencv dnn for onnx inference - data=None) # dataset.yaml path + data=data_yaml) # dataset.yaml path return model From 8573696b5eefb781222d124a6e3033712d1cc951 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 26 Aug 2022 17:51:15 +0800 Subject: [PATCH 103/150] fix mining bug for multi-gpu --- det-yolov5-tmi/mining/ymir_mining.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index 14fc7aa..917ea44 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -70,9 +70,9 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): preprocess_image_shape = batch['image'].shape[2:] for inner_idx, det in enumerate(pred): # per image result_per_image = [] + image_file = batch['image_file'][inner_idx] if len(det): origin_image_shape = (batch['origin_shape'][0][inner_idx], batch['origin_shape'][1][inner_idx]) - image_file = batch['image_file'][inner_idx] # Rescale boxes from img_size to img size det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() result_per_image.append(det) From 01989aeec64b8bc2ac4cd54f8a2991bda21a4554 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 30 Aug 2022 17:07:39 +0800 Subject: [PATCH 104/150] support mutli-gpu mining --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 6 +- det-mmdetection-tmi/start.py | 17 +++- det-mmdetection-tmi/training-template.yaml | 4 +- det-mmdetection-tmi/ymir_infer.py | 71 ++++++-------- det-mmdetection-tmi/ymir_mining.py | 97 +++++++++++++------- det-yolov5-tmi/mining/ymir_mining.py | 5 +- 6 files changed, 115 insertions(+), 85 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 810914b..982a1d4 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -132,7 +132,7 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_cfg.log_config['hooks'][1].update(tensorboard_logger) # modify evaluation and interval - interval = max(1, mmdet_cfg.runner.max_epochs//30) + interval = max(1, mmdet_cfg.runner.max_epochs // 30) mmdet_cfg.evaluation.interval = interval mmdet_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') # TODO Whether to evaluating the AP for each class @@ -146,9 +146,9 @@ def get_weight_file(cfg: edict) -> str: find weight file in cfg.param.pretrained_model_params or cfg.param.model_params_path """ if cfg.ymir.run_training: - model_params_path: List = cfg.param.get('pretrained_model_params', []) + model_params_path: List[str] = cfg.param.get('pretrained_model_params', []) else: - model_params_path: List = cfg.param.get('model_params_path', []) + model_params_path = cfg.param.get('model_params_path', []) model_dir = cfg.ymir.input.models_dir model_params_path = [ diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 12a6f9c..b570b2d 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -4,9 +4,8 @@ import sys from easydict import EasyDict as edict - -from mmdet.utils.util_ymir import get_merged_config from ymir_exc import monitor +from ymir_exc.util import find_free_port, get_merged_config def start(cfg: edict) -> int: @@ -16,7 +15,7 @@ def start(cfg: edict) -> int: _run_training() elif cfg.ymir.run_mining or cfg.ymir.run_infer: if cfg.ymir.run_mining: - _run_mining() + _run_mining(cfg) if cfg.ymir.run_infer: _run_infer() else: @@ -35,12 +34,20 @@ def _run_training() -> None: logging.info("training finished") -def _run_mining() -> None: - command = 'python3 ymir_mining.py' +def _run_mining(cfg: edict) -> None: + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count = len(gpu_id.split(',')) + if gpu_count <= 1: + command = 'python3 ymir_mining.py' + else: + port = find_free_port() + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} ymir_mining.py' # noqa + logging.info(f'start mining: {command}') subprocess.run(command.split(), check=True) logging.info("mining finished") + def _run_infer() -> None: command = 'python3 ymir_infer.py' logging.info(f'start infer: {command}') diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 37b2da9..d4c191f 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -1,7 +1,7 @@ shm_size: '32G' export_format: 'ark:raw' -samples_per_gpu: 2 -workers_per_gpu: 2 +samples_per_gpu: 16 +workers_per_gpu: 16 max_epochs: 300 config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' args_options: '' diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 661b2ea..9920ca2 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -8,12 +8,10 @@ import numpy as np from easydict import EasyDict as edict from mmcv import DictAction +from mmdet.apis import inference_detector, init_detector +from mmdet.utils.util_ymir import YmirStage, get_merged_config, get_weight_file, get_ymir_process from nptyping import NDArray, Shape from tqdm import tqdm - -from mmdet.apis import inference_detector, init_detector -from mmdet.utils.util_ymir import (YmirStage, get_merged_config, - get_weight_file, get_ymir_process) from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw @@ -23,58 +21,51 @@ def parse_option(cfg_options: str) -> dict: parser = argparse.ArgumentParser(description='parse cfg options') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + parser.add_argument('--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') args = parser.parse_args(f'--cfg-options {cfg_options}'.split()) return args.cfg_options -def mmdet_result_to_ymir(results: List[DETECTION_RESULT], - class_names: List[str]) -> List[rw.Annotation]: +def mmdet_result_to_ymir(results: List[DETECTION_RESULT], class_names: List[str]) -> List[rw.Annotation]: ann_list = [] for idx, result in enumerate(results): for line in result: if any(np.isinf(line)): continue x1, y1, x2, y2, score = line - ann = rw.Annotation( - class_name=class_names[idx], - score=score, - box=rw.Box(x=round(x1), - y=round(y1), - w=round(x2-x1), - h=round(y2-y1)) - ) + ann = rw.Annotation(class_name=class_names[idx], + score=score, + box=rw.Box(x=round(x1), y=round(y1), w=round(x2 - x1), h=round(y2 - y1))) ann_list.append(ann) return ann_list def get_config_file(cfg): if cfg.ymir.run_training: - model_params_path: List = cfg.param.get('pretrained_model_params',[]) + model_params_path: List = cfg.param.get('pretrained_model_params', []) else: - model_params_path: List = cfg.param.get('model_params_path',[]) + model_params_path: List = cfg.param.get('model_params_path', []) model_dir = cfg.ymir.input.models_dir config_files = [ - osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.py'))] + osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.py')) + ] if len(config_files) > 0: if len(config_files) > 1: warnings.warn(f'multiple config file found! use {config_files[0]}') return config_files[0] else: - raise Exception( - f'no config_file found in {model_dir} and {model_params_path}') + raise Exception(f'no config_file found in {model_dir} and {model_params_path}') class YmirModel: @@ -90,8 +81,8 @@ def __init__(self, cfg: edict): infer_task_idx = 0 task_num = 1 - self.task_idx=infer_task_idx - self.task_num=task_num + self.task_idx = infer_task_idx + self.task_num = task_num # Specify the path to model config and checkpoint file config_file = get_config_file(cfg) @@ -103,8 +94,7 @@ def __init__(self, cfg: edict): gpu_ids = cfg.param.get('gpu_id', '0') gpu_id = gpu_ids.split(',')[0] # build the model from a config file and a checkpoint file - self.model = init_detector( - config_file, checkpoint_file, device=f'cuda:{gpu_id}', cfg_options=cfg_options) + self.model = init_detector(config_file, checkpoint_file, device=f'cuda:{gpu_id}', cfg_options=cfg_options) def infer(self, img): return inference_detector(self.model, img) @@ -124,21 +114,20 @@ def main(): for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): img = cv2.imread(asset_path) result = model.infer(img) - raw_anns = mmdet_result_to_ymir( - result, cfg.param.class_names) + raw_anns = mmdet_result_to_ymir(result, cfg.param.class_names) - infer_result[asset_path] = [ - ann for ann in raw_anns if ann.score >= conf_threshold] + infer_result[asset_path] = [ann for ann in raw_anns if ann.score >= conf_threshold] idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process( - stage=YmirStage.TASK, p=idx / N, task_idx=model.task_idx, task_num=model.task_num) + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=model.task_idx, + task_num=model.task_num) monitor.write_monitor_logger(percent=percent) rw.write_infer_result(infer_result=infer_result) - percent = get_ymir_process(stage=YmirStage.POSTPROCESS, - p=1, task_idx=model.task_idx, task_num=model.task_num) + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, p=1, task_idx=model.task_idx, task_num=model.task_num) monitor.write_monitor_logger(percent=percent) return 0 diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py index 0299edc..7eeaa1f 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining.py @@ -2,24 +2,31 @@ data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py """ +import os import random import sys -from typing import Any, Dict, List, Tuple +from typing import Any, Callable, Dict, List, Tuple import cv2 -from easydict import EasyDict as edict import numpy as np +import torch +import torch.distributed as dist +from easydict import EasyDict as edict +from mmcv.runner import init_dist +from mmdet.apis.test import collect_results_gpu +from mmdet.utils.util_ymir import BBOX, CV_IMAGE from nptyping import NDArray from scipy.stats import entropy from tqdm import tqdm - -from mmdet.utils.util_ymir import (BBOX, CV_IMAGE, YmirStage, - get_merged_config, get_ymir_process) -from ymir_exc import dataset_reader as dr -from ymir_exc import env, monitor +from ymir_exc import monitor from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process from ymir_infer import YmirModel +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: ''' @@ -32,11 +39,13 @@ def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: ''' n1 = boxes1.shape[0] n2 = boxes2.shape[0] - max_xy = np.minimum(np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + max_xy = np.minimum( + np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) - min_xy = np.maximum(np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + min_xy = np.maximum( + np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) @@ -59,8 +68,12 @@ def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ return image, bbox -def cutout(image: CV_IMAGE, bbox: BBOX, cut_num: int = 2, fill_val: int = 0, - bbox_remove_thres: float = 0.4, bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: +def cutout(image: CV_IMAGE, + bbox: BBOX, + cut_num: int = 2, + fill_val: int = 0, + bbox_remove_thres: float = 0.4, + bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: ''' Cutout augmentation image: A PIL image @@ -89,8 +102,7 @@ def cutout(image: CV_IMAGE, bbox: BBOX, cut_num: int = 2, fill_val: int = 0, right = left + cutout_size_w top = random.uniform(0, original_h - cutout_size_h) bottom = top + cutout_size_h - cutout = np.array( - [[float(left), float(top), float(right), float(bottom)]]) + cutout = np.array([[float(left), float(top), float(right), float(bottom)]]) # Calculate intersect between cutout and bounding boxes overlap_size = intersect(cutout, bbox) @@ -162,7 +174,7 @@ def get_affine_transform(center: NDArray, dst_h = output_size[1] rot_rad = np.pi * rot / 180 - src_dir = get_dir([0, src_w * -0.5], rot_rad) + src_dir = get_dir(np.array([0, src_w * -0.5], np.float32), rot_rad) dst_dir = np.array([0, dst_w * -0.5], np.float32) src = np.zeros((3, 2), dtype=np.float32) @@ -253,12 +265,24 @@ def __init__(self, cfg: edict): self.task_num = task_num def mining(self): - N = dr.items_count(env.DatasetType.CANDIDATE) + with open(self.cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + if RANK == -1: + N = len(images) + tbar = tqdm(images) + else: + images_rank = images[RANK::WORLD_SIZE] + N = len(images_rank) + if RANK == 0: + tbar = tqdm(images_rank) + else: + tbar = images_rank + monitor_gap = max(1, N // 100) idx = -1 beta = 1.3 mining_result = [] - for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): + for asset_path in tbar: img = cv2.imread(asset_path) # xyxy,conf,cls result = self.predict(img) @@ -296,10 +320,8 @@ def mining(self): consistency_box = max_iou consistency_cls = 0.5 * \ (conf[origin_idx] + conf_key[aug_idx]) * (1 - js) - consistency_per_inst = abs( - consistency_box + consistency_cls - beta) - consistency_per_aug = min( - consistency_per_aug, consistency_per_inst.item()) + consistency_per_inst = abs(consistency_box + consistency_cls - beta) + consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) consistency += consistency_per_aug @@ -309,10 +331,15 @@ def mining(self): idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process( - stage=YmirStage.TASK, p=idx / N, task_idx=self.task_idx, task_num=self.task_num) + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=self.task_idx, + task_num=self.task_num) monitor.write_monitor_logger(percent=percent) + if RANK != -1: + mining_result = collect_results_gpu(mining_result, len(images)) + return mining_result def predict(self, img: CV_IMAGE) -> NDArray: @@ -342,10 +369,7 @@ def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], D return the predict result and augment bbox. """ - aug_dict = dict(flip=horizontal_flip, - cutout=cutout, - rotate=rotate, - resize=resize) + aug_dict: Dict[str, Callable] = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) aug_bboxes = dict() aug_results = dict() @@ -360,14 +384,23 @@ def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], D def main(): + if LOCAL_RANK != -1: + init_dist(launcher='pytorch', backend="nccl" if dist.is_nccl_available() else "gloo") + cfg = get_merged_config() miner = YmirMining(cfg) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu = int(gpu_id.split(',')[LOCAL_RANK]) + device = torch.device('cuda', gpu) + miner.model.to(device) mining_result = miner.mining() - rw.write_mining_result(mining_result=mining_result) - percent = get_ymir_process(stage=YmirStage.POSTPROCESS, - p=1, task_idx=miner.task_idx, task_num=miner.task_num) - monitor.write_monitor_logger(percent=percent) + if RANK in [0, -1]: + rw.write_mining_result(mining_result=mining_result) + + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, p=1, task_idx=miner.task_idx, task_num=miner.task_num) + monitor.write_monitor_logger(percent=percent) + return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index 917ea44..e8a6c59 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -66,7 +66,8 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): with torch.no_grad(): pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) preprocess_image_shape = batch['image'].shape[2:] for inner_idx, det in enumerate(pred): # per image result_per_image = [] @@ -98,7 +99,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader for idx, batch in enumerate(pbar): - if idx % monitor_gap == 0: + if idx % monitor_gap == 0 and RANK in [-1, 0]: ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) batch_consistency = [0.0 for _ in range(len(batch['image_file']))] From 45d57c792596ca68ee47b61b816d1896ddabba67 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 09:32:02 +0800 Subject: [PATCH 105/150] update mmdetection to ymir-executor-sdk 1.1.0 --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 130 ++++++------------- det-mmdetection-tmi/ymir_train.py | 5 +- 2 files changed, 45 insertions(+), 90 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 982a1d4..3f28149 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -5,7 +5,6 @@ import logging import os import os.path as osp -from enum import IntEnum from typing import Any, List, Optional import mmcv @@ -14,60 +13,12 @@ from mmcv import Config from nptyping import NDArray, Shape, UInt8 from packaging.version import Version -from ymir_exc import env from ymir_exc import result_writer as rw - -class YmirStage(IntEnum): - PREPROCESS = 1 # convert dataset - TASK = 2 # training/mining/infer - POSTPROCESS = 3 # export model - - BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def get_ymir_process(stage: YmirStage, p: float, task_idx: int = 0, task_num: int = 1) -> float: - """ - stage: pre-process/task/post-process - p: percent for stage - task_idx: index for multiple tasks like mining (task_idx=0) and infer (task_idx=1) - task_num: the total number of multiple tasks. - """ - # const value for ymir process - PREPROCESS_PERCENT = 0.1 - TASK_PERCENT = 0.8 - POSTPROCESS_PERCENT = 0.1 - - if p < 0 or p > 1.0: - raise Exception(f'p not in [0,1], p={p}') - - init = task_idx * 1.0 / task_num - ratio = 1.0 / task_num - if stage == YmirStage.PREPROCESS: - return init + PREPROCESS_PERCENT * p * ratio - elif stage == YmirStage.TASK: - return init + (PREPROCESS_PERCENT + TASK_PERCENT * p) * ratio - elif stage == YmirStage.POSTPROCESS: - return init + (PREPROCESS_PERCENT + TASK_PERCENT + POSTPROCESS_PERCENT * p) * ratio - else: - raise NotImplementedError(f'unknown stage {stage}') - - -def get_merged_config() -> edict: - """ - merge ymir_config and executor_config - """ - merged_cfg = edict() - # the hyperparameter information - merged_cfg.param = env.get_executor_config() - - # the ymir path information - merged_cfg.ymir = env.get_current_env() - return merged_cfg - - def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: """ useful for training process @@ -76,11 +27,9 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: - modify epochs, checkpoint, tensorboard config """ # modify dataset config - ymir_ann_files = dict( - train=ymir_cfg.ymir.input.training_index_file, - val=ymir_cfg.ymir.input.val_index_file, - test=ymir_cfg.ymir.input.candidate_index_file - ) + ymir_ann_files = dict(train=ymir_cfg.ymir.input.training_index_file, + val=ymir_cfg.ymir.input.val_index_file, + test=ymir_cfg.ymir.input.candidate_index_file) # validation may augment the image and use more gpu # so set smaller samples_per_gpu for validation @@ -96,8 +45,7 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: ann_prefix=ymir_cfg.ymir.input.annotations_dir, classes=ymir_cfg.param.class_names, data_root=ymir_cfg.ymir.input.root_dir, - filter_empty_gt=False - ) + filter_empty_gt=False) # modify dataset config for `split` mmdet_dataset_cfg = mmdet_cfg.data.get(split, None) if mmdet_dataset_cfg is None: @@ -113,8 +61,7 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: elif src_dataset_type in ['MultiImageMixDataset', 'RepeatDataset']: mmdet_dataset_cfg.dataset.update(ymir_dataset_cfg) else: - raise Exception( - f'unsupported source dataset type {src_dataset_type}') + raise Exception(f'unsupported source dataset type {src_dataset_type}') # modify model output channel mmdet_model_cfg = mmdet_cfg.model.bbox_head @@ -124,8 +71,7 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: if ymir_cfg.param.get('max_epochs', None): mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir - tensorboard_logger = dict(type='TensorboardLoggerHook', - log_dir=ymir_cfg.ymir.output.tensorboard_dir) + tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir=ymir_cfg.ymir.output.tensorboard_dir) if len(mmdet_cfg.log_config['hooks']) <= 1: mmdet_cfg.log_config['hooks'].append(tensorboard_logger) else: @@ -144,6 +90,7 @@ def get_weight_file(cfg: edict) -> str: """ return the weight file path by priority find weight file in cfg.param.pretrained_model_params or cfg.param.model_params_path + load coco-pretrained weight for yolox """ if cfg.ymir.run_training: model_params_path: List[str] = cfg.param.get('pretrained_model_params', []) @@ -152,19 +99,38 @@ def get_weight_file(cfg: edict) -> str: model_dir = cfg.ymir.input.models_dir model_params_path = [ - osp.join(model_dir, p) for p in model_params_path if osp.exists(osp.join(model_dir, p)) and p.endswith(('.pth', '.pt'))] + osp.join(model_dir, p) for p in model_params_path + if osp.exists(osp.join(model_dir, p)) and p.endswith(('.pth', '.pt')) + ] # choose weight file by priority, best_xxx.pth > latest.pth > epoch_xxx.pth - best_pth_files = [ - f for f in model_params_path if osp.basename(f).startswith('best_')] + best_pth_files = [f for f in model_params_path if osp.basename(f).startswith('best_')] if len(best_pth_files) > 0: return max(best_pth_files, key=os.path.getctime) - epoch_pth_files = [ - f for f in model_params_path if osp.basename(f).startswith(('epoch_', 'iter_'))] + epoch_pth_files = [f for f in model_params_path if osp.basename(f).startswith(('epoch_', 'iter_'))] if len(epoch_pth_files) > 0: return max(epoch_pth_files, key=os.path.getctime) + if cfg.ymir.run_training: + weight_files = [f for f in glob.glob('/weights/**/*', recursive=True) if f.endswith(('.pth', '.pt'))] + + model_name_splits = osp.basename(cfg.param.config_file).split('_') + if len(weight_files) > 0 and model_name_splits[0] == 'yolox': + yolox_weight_files = [ + f for f in weight_files if osp.basename(f).startswith(f'yolox_{model_name_splits[1]}') + ] + + if len(yolox_weight_files) == 0: + if model_name_splits[1] == 'nano': + # yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth or yolox_tiny.py + yolox_weight_files = [f for f in weight_files if osp.basename(f).startswith('yolox_tiny')] + elif model_name_splits[1] == 'm': + yolox_weight_files = [f for f in weight_files if osp.basename(f).startswith('yolox_l')] + + if len(yolox_weight_files) > 0: + logging.info(f'load yolox pretrained weight {yolox_weight_files[0]}') + return yolox_weight_files[0] return "" @@ -181,8 +147,7 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f logging.info(f'key_score is {key_score}') COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') if COCO_EVAL_TMP_FILE is None: - raise Exception( - 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') eval_result = mmcv.load(COCO_EVAL_TMP_FILE) # eval_result may be empty dict {}. @@ -190,26 +155,22 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f WORK_DIR = os.getenv('YMIR_MODELS_DIR') if WORK_DIR is None or not osp.isdir(WORK_DIR): - raise Exception( - f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') + raise Exception(f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') # assert only one model config file in work_dir - result_files = [osp.basename(f) for f in glob.glob( - osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + result_files = [osp.basename(f) for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] if last: # save all output file - rw.write_model_stage(files=result_files, - mAP=float(map), - stage_name='last') + rw.write_model_stage(files=result_files, mAP=float(map), stage_name='last') else: # save newest weight file in format epoch_xxx.pth or iter_xxx.pth - weight_files = [osp.join(WORK_DIR, f) for f in result_files if f.startswith( - ('iter_', 'epoch_')) and f.endswith('.pth')] + weight_files = [ + osp.join(WORK_DIR, f) for f in result_files if f.startswith(('iter_', 'epoch_')) and f.endswith('.pth') + ] if len(weight_files) > 0: - newest_weight_file = osp.basename( - max(weight_files, key=os.path.getctime)) + newest_weight_file = osp.basename(max(weight_files, key=os.path.getctime)) stage_name = osp.splitext(newest_weight_file)[0] training_result_file = osp.join(WORK_DIR, 'result.yaml') @@ -222,9 +183,7 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f if stage_name not in model_stages: config_files = [f for f in result_files if f.endswith('.py')] - rw.write_model_stage(files=[newest_weight_file] + config_files, - mAP=float(map), - stage_name=stage_name) + rw.write_model_stage(files=[newest_weight_file] + config_files, mAP=float(map), stage_name=stage_name) def _write_ancient_ymir_training_result(key_score: Optional[float] = None): @@ -233,8 +192,7 @@ def _write_ancient_ymir_training_result(key_score: Optional[float] = None): COCO_EVAL_TMP_FILE = os.getenv('COCO_EVAL_TMP_FILE') if COCO_EVAL_TMP_FILE is None: - raise Exception( - 'please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') + raise Exception('please set valid environment variable COCO_EVAL_TMP_FILE to write result into json file') eval_result = mmcv.load(COCO_EVAL_TMP_FILE) # eval_result may be empty dict {}. @@ -242,12 +200,10 @@ def _write_ancient_ymir_training_result(key_score: Optional[float] = None): WORK_DIR = os.getenv('YMIR_MODELS_DIR') if WORK_DIR is None or not osp.isdir(WORK_DIR): - raise Exception( - f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') + raise Exception(f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') # assert only one model config file in work_dir - result_files = [osp.basename(f) for f in glob.glob( - osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + result_files = [osp.basename(f) for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] training_result_file = osp.join(WORK_DIR, 'result.yaml') if osp.exists(training_result_file): diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 31c2375..552654d 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -5,10 +5,9 @@ import sys from easydict import EasyDict as edict +from mmdet.utils.util_ymir import get_weight_file, write_ymir_training_result from ymir_exc import monitor - -from mmdet.utils.util_ymir import (YmirStage, get_merged_config, get_weight_file, - get_ymir_process, write_ymir_training_result) +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process def main(cfg: edict) -> int: From 5cc425d4854529ceaa531b68f51b80d923239999 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 18:14:37 +0800 Subject: [PATCH 106/150] yzbx to modelai, add vidt --- README.MD | 71 +++++++++++++----- README_zh-CN.MD | 72 ++++++++++++++----- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 2 +- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 11 ++- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 4 +- det-yolov5-tmi/cuda102.dockerfile | 2 +- det-yolov5-tmi/cuda111.dockerfile | 2 +- det-yolov5-tmi/mining/data_augment.py | 1 - det-yolov5-tmi/mining/mining_cald.py | 5 +- 9 files changed, 122 insertions(+), 48 deletions(-) diff --git a/README.MD b/README.MD index 50ce730..e2073d1 100644 --- a/README.MD +++ b/README.MD @@ -2,11 +2,11 @@ - [ymir](https://github.com/IndustryEssentials/ymir) -- [wiki](https://github.com/yzbx/ymir-executor-fork/wiki) +- [wiki](https://github.com/modelai/ymir-executor-fork/wiki) ## ymir-1.1.0 official image -- [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) +- [yolov4](https://github.com/modelai/ymir-executor-fork#det-yolov4-training) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi @@ -14,7 +14,7 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi ``` -- [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) +- [yolov5](https://github.com/modelai/ymir-executor-fork#det-yolov5-tmi) - [change log](./det-yolov5-tmi/README.md) @@ -24,7 +24,7 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi ``` -- [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) +- [mmdetection](https://github.com/modelai/ymir-executor-fork#det-mmdetection-tmi) - [change log](./det-mmdetection-tmi/README.md) @@ -34,35 +34,68 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi ``` -- [detectron2](https://github.com/yzbx/ymir-detectron2) +- [detectron2](https://github.com/modelai/ymir-detectron2) - - [change log](https://github.com/yzbx/ymir-detectron2/blob/master/README.md) + - [change log](https://github.com/modelai/ymir-detectron2/blob/master/README.md) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi ``` -- [yolov7](https://github.com/yzbx/ymir-yolov7) +- [yolov7](https://github.com/modelai/ymir-yolov7) - - [change log](https://github.com/yzbx/ymir-yolov7/blob/main/ymir/README.md) + - [change log](https://github.com/modelai/ymir-yolov7/blob/main/ymir/README.md) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` -- overview +- [vidt](https://github.com/modelai/ymir-vidt) - | docker image | [finetune](https://github.com/yzbx/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weights | - | - | - | - | - | - | - | - | - | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | - | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | - | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | - | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | + - [change log](https://github.com/modelai/ymir-vidt/tree/main/ymir) - - online pretrained weights may download through network + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi + ``` + +## overview + +| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weights | +| - | - | - | - | - | - | - | +| yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | +| yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | +| yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | +| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| vidt | ? | ✔️ | ✔️ | ✔️ pytorch | ❌ | online | + +- online pretrained weights may download through network + +- local pretrained weights have copied to docker images when building image + +### benchmark + +- training dataset: voc2012-train 5717 images +- validation dataset: voc2012-val 5823 images +- image size: 640 + +gpu: single Tesla P4 + +| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | +| yolov5 | 16 | 100 | yolov5s | 70.05% | 9h | coco-pretrained | +| vidt | 2 | 100 | swin-nano | 54.13% | 2d | imagenet-pretrained | +| yolov4 | 4 | 20000 steps | yolov4 | 66.18% | 2d | imagenet-pretrained | +| yolov7 | 16 | 100 | yolov7-tiny | 70% | 8h | coco-pretrained | + +gpu: single GeForce GTX 1080 Ti - - local pretrained weights have copied to docker images when building image +| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | +| yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | +| yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | +| mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | +| detectron2 | 2 | 20000 | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | --- @@ -101,7 +134,7 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . - [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) -- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) +- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ## how to import pretrained model weights diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 83e7f57..443e3f0 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -2,11 +2,11 @@ - [ymir](https://github.com/IndustryEssentials/ymir) -- [wiki](https://github.com/yzbx/ymir-executor-fork/wiki) +- [wiki](https://github.com/modelai/ymir-executor-fork/wiki) ## ymir-1.1.0 官方镜像 -- [yolov4](https://github.com/yzbx/ymir-executor-fork#det-yolov4-training) +- [yolov4](https://github.com/modelai/ymir-executor-fork#det-yolov4-training) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi @@ -14,7 +14,7 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi ``` -- [yolov5](https://github.com/yzbx/ymir-executor-fork#det-yolov5-tmi) +- [yolov5](https://github.com/modelai/ymir-executor-fork#det-yolov5-tmi) - [change log](./det-yolov5-tmi/README.md) @@ -24,7 +24,7 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi ``` -- [mmdetection](https://github.com/yzbx/ymir-executor-fork#det-mmdetection-tmi) +- [mmdetection](https://github.com/modelai/ymir-executor-fork#det-mmdetection-tmi) - [change log](./det-mmdetection-tmi/README.md) @@ -34,35 +34,69 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi ``` -- [detectron2](https://github.com/yzbx/ymir-detectron2) +- [detectron2](https://github.com/modelai/ymir-detectron2) - - [change log](https://github.com/yzbx/ymir-detectron2/blob/master/README.md) + - [change log](https://github.com/modelai/ymir-detectron2/blob/master/README.md) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi ``` -- [yolov7](https://github.com/yzbx/ymir-yolov7) +- [yolov7](https://github.com/modelai/ymir-yolov7) - - [change log](https://github.com/yzbx/ymir-yolov7/blob/main/ymir/README.md) + - [change log](https://github.com/modelai/ymir-yolov7/blob/main/ymir/README.md) ``` docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi ``` -- 比较 +- [vidt](https://github.com/modelai/ymir-vidt) - | docker image | [finetune](https://github.com/yzbx/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weight | - | - | - | - | - | - | - | - | - | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | - | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | - | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | - | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | - | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | + - [change log](https://github.com/modelai/ymir-vidt/tree/main/ymir) - - online 预训练权重可能在训练时通过网络下载 + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi + ``` + +## 比较 + +| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weight | +| - | - | - | - | - | - | - | +| yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | +| yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | +| yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | +| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| vidt | ? | ✔️ | ✔️ | ✔️ pytorch | ❌ | online | + +- online 预训练权重可能在训练时通过网络下载 + +- local 预训练权重在构建镜像时复制到了镜像 + +### benchmark + +- training dataset: voc2012-train 5717 images +- validation dataset: voc2012-val 5823 images +- image size: 640 + +gpu: single Tesla P4 + +| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | +| yolov5 | 16 | 100 | yolov5s | 70.05% | 9h | coco-pretrained | +| vidt | 2 | 100 | swin-nano | 54.13% | 2d | imagenet-pretrained | +| yolov4 | 4 | 20000 steps | yolov4 | 66.18% | 2d | imagenet-pretrained | +| yolov7 | 16 | 100 | yolov7-tiny | 70% | 8h | coco-pretrained | + +gpu: single GeForce GTX 1080 Ti + +| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | +| yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | +| yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | +| mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | +| detectron2 | 2 | 20000 | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | - - local 预训练权重在构建镜像时复制到了镜像 --- ## det-yolov4-tmi @@ -114,7 +148,7 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile - [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) -- [ymir-executor-sdk](https://github.com/yzbx/ymir-executor-sdk) ymir镜像开发辅助库 +- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ymir镜像开发辅助库 ## 如何导入预训练模型 diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 517acd0..6d07aa6 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -28,7 +28,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html \ - && pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ && conda clean --all # Install det-mmdetection-tmi diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index fbf2508..c811c85 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -26,7 +26,7 @@ RUN apt-get update && apt-get install -y build-essential ffmpeg libsm6 libxext6 # Install ymir-exc sdk and MMCV RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html \ - && pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ && conda clean --all # Install det-mmdetection-tmi @@ -37,4 +37,13 @@ RUN pip install --no-cache-dir -r requirements/runtime.txt \ && mv *-template.yaml /img-man \ && echo "cd /app && python3 start.py" > /usr/bin/start.sh +# Download coco-pretrained yolox weight to /weights +# view https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox for detail +# RUN apt-get update && apt install -y wget && rm -rf /var/lib/apt/lists/* +# RUN mkdir -p /weights && cd /weights \ +# && wget https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth \ +# && wget https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth \ +# && wget https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth \ +# && wget https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth + CMD bash /usr/bin/start.sh diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 3f28149..8498d9c 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -125,8 +125,8 @@ def get_weight_file(cfg: edict) -> str: if model_name_splits[1] == 'nano': # yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth or yolox_tiny.py yolox_weight_files = [f for f in weight_files if osp.basename(f).startswith('yolox_tiny')] - elif model_name_splits[1] == 'm': - yolox_weight_files = [f for f in weight_files if osp.basename(f).startswith('yolox_l')] + else: + yolox_weight_files = [f for f in weight_files if osp.basename(f).startswith('yolox_s')] if len(yolox_weight_files) > 0: logging.info(f'load yolox pretrained weight {yolox_weight_files[0]}') diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile index e8ab497..0014b60 100644 --- a/det-yolov5-tmi/cuda102.dockerfile +++ b/det-yolov5-tmi/cuda102.dockerfile @@ -20,7 +20,7 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # install ymir-exc sdk -RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" +RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" # Copy file from host to docker and install requirements COPY . /app diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile index 5d1e165..84427a8 100644 --- a/det-yolov5-tmi/cuda111.dockerfile +++ b/det-yolov5-tmi/cuda111.dockerfile @@ -23,7 +23,7 @@ RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ COPY ./requirements.txt /workspace/ # install ymir-exc sdk and requirements -RUN pip install "git+https://github.com/yzbx/ymir-executor-sdk.git@ymir1.0.0" \ +RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ && pip install -r /workspace/requirements.txt # Copy file from host to docker and install requirements diff --git a/det-yolov5-tmi/mining/data_augment.py b/det-yolov5-tmi/mining/data_augment.py index 47b1d50..42af914 100644 --- a/det-yolov5-tmi/mining/data_augment.py +++ b/det-yolov5-tmi/mining/data_augment.py @@ -8,7 +8,6 @@ import cv2 import numpy as np from nptyping import NDArray - from utils.ymir_yolov5 import BBOX, CV_IMAGE diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index 0e08660..560326c 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -8,17 +8,16 @@ import cv2 import numpy as np from easydict import EasyDict as edict +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate from nptyping import NDArray from scipy.stats import entropy from tqdm import tqdm +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 - def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: if len(result) > 0: From a7d65a6d59be59d7950776cdb3529fb147d25d14 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 18:36:00 +0800 Subject: [PATCH 107/150] add minig algorithm reference --- README.MD | 4 ++++ README_zh-CN.MD | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/README.MD b/README.MD index e2073d1..d3cbe03 100644 --- a/README.MD +++ b/README.MD @@ -139,3 +139,7 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## how to import pretrained model weights - [import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) + +## reference + +- [mining algorithm: CALD](https://github.com/we1pingyu/CALD/) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 443e3f0..e0086ac 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -156,6 +156,10 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 +## 参考 + +- [挖掘算法CALD](https://github.com/we1pingyu/CALD/) + --- # FAQ From 4d3c8696b1599b5fb1edf128c0664c73a0ac1701 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 18:44:38 +0800 Subject: [PATCH 108/150] add other reference --- README.MD | 5 +++++ README_zh-CN.MD | 15 ++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/README.MD b/README.MD index d3cbe03..525edf3 100644 --- a/README.MD +++ b/README.MD @@ -143,3 +143,8 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## reference - [mining algorithm: CALD](https://github.com/we1pingyu/CALD/) +- [yolov5](https://github.com/ultralytics/yolov5) +- [mmdetection](https://github.com/open-mmlab/mmdetection) +- [yolov7](https://github.com/wongkinyiu/yolov7) +- [detectron2](https://github.com/facebookresearch/detectron2) +- [vidt](https://github.com/naver-ai/vidt) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index e0086ac..89d2283 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -159,6 +159,11 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 参考 - [挖掘算法CALD](https://github.com/we1pingyu/CALD/) +- [yolov5](https://github.com/ultralytics/yolov5) +- [mmdetection](https://github.com/open-mmlab/mmdetection) +- [yolov7](https://github.com/wongkinyiu/yolov7) +- [detectron2](https://github.com/facebookresearch/detectron2) +- [vidt](https://github.com/naver-ai/vidt) --- @@ -166,9 +171,9 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 关于cuda版本 -- 推荐安装11.2以上的cuda版本, 使用11.1及以上的镜像 +- 推荐主机安装11.2以上的cuda版本, 使用11.1及以上的镜像 -- GTX3080/GTX3090系统不支持11.1以下的cuda,只能使用cuda11.1及以上的镜像 +- GTX3080/GTX3090不支持11.1以下的cuda,只能使用cuda11.1及以上的镜像 ## apt 或 pip 安装慢或出错 @@ -247,10 +252,10 @@ tail -f -n 100 ymir_app.log - 挂载目录并运行镜像``,注意需要将ymir部署目录挂载到镜像中 ``` - docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v : bash + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v : -v /sandbox//training_assset_cache:/in/assets bash - # 以/home/ymir/ymir-workplace作为ymir部署目录为例 - docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash + # 以/home/ymir/ymir-workplace作为ymir部署目录为例, 以实际情况为准 + docker run -it --gpus all --shm-size 12G -v $PWD/in:/in -v /home/ymir/ymir-workplace/sandbox/0001/training_assset_cache:/in/assets -v $PWD/out:/out -v /home/ymir/ymir-workplace:/home/ymir/ymir-workplace bash ``` - 进入到docker 容器中后, 执行镜像默认的命令, 如dockerfile中写的 `CMD bash /usr/bin/start.sh` From 8a30f5a83fb48cf9862b5cfdee8a5c4eec289ce5 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 18:49:27 +0800 Subject: [PATCH 109/150] update doc --- README.MD | 2 +- README_zh-CN.MD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.MD b/README.MD index 525edf3..cde68fb 100644 --- a/README.MD +++ b/README.MD @@ -95,7 +95,7 @@ gpu: single GeForce GTX 1080 Ti | yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | -| detectron2 | 2 | 20000 | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | --- diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 89d2283..fb369f1 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -95,7 +95,7 @@ gpu: single GeForce GTX 1080 Ti | yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | -| detectron2 | 2 | 20000 | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | --- From 09522d40eda8a927065c1f9a36a845cd3f19dd61 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 31 Aug 2022 18:51:09 +0800 Subject: [PATCH 110/150] zzz --- README.MD | 2 +- README_zh-CN.MD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.MD b/README.MD index cde68fb..4b52b4b 100644 --- a/README.MD +++ b/README.MD @@ -67,7 +67,7 @@ | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | -| vidt | ? | ✔️ | ✔️ | ✔️ pytorch | ❌ | online | +| vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | - online pretrained weights may download through network diff --git a/README_zh-CN.MD b/README_zh-CN.MD index fb369f1..765b6d6 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -67,7 +67,7 @@ | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | -| vidt | ? | ✔️ | ✔️ | ✔️ pytorch | ❌ | online | +| vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | - online 预训练权重可能在训练时通过网络下载 From 7544900cb2765fe7dc9aec5ce0ed871a16f31f36 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 11:08:42 +0800 Subject: [PATCH 111/150] update readme --- README.MD | 11 +++++++++++ README_zh-CN.MD | 11 +++++++++++ .../mmdet/core/evaluation/eval_hooks.py | 5 ++--- det-mmdetection-tmi/tools/train.py | 4 ++-- det-mmdetection-tmi/ymir_infer.py | 11 ++++++----- det-mmdetection-tmi/ymir_mining.py | 2 +- det-yolov5-tmi/mining/ymir_infer.py | 7 +++---- det-yolov5-tmi/mining/ymir_mining.py | 7 +++---- 8 files changed, 39 insertions(+), 19 deletions(-) diff --git a/README.MD b/README.MD index 4b52b4b..890bba7 100644 --- a/README.MD +++ b/README.MD @@ -58,6 +58,14 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi ``` +- [nanodet](https://github.com/modelai/ymir-nanodet/tree/ymir-dev) + + - [change log](https://github.com/modelai/ymir-nanodet/tree/ymir-dev/ymir) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-nanodet-cu111-tmi + ``` + ## overview | docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weights | @@ -68,6 +76,7 @@ | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | +| nanodet | ❌ | ✔️ | ❌ | pytorch_lightning | ❌ | online | - online pretrained weights may download through network @@ -143,8 +152,10 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## reference - [mining algorithm: CALD](https://github.com/we1pingyu/CALD/) +- [yolov4](https://github.com/AlexeyAB/darknet) - [yolov5](https://github.com/ultralytics/yolov5) - [mmdetection](https://github.com/open-mmlab/mmdetection) - [yolov7](https://github.com/wongkinyiu/yolov7) - [detectron2](https://github.com/facebookresearch/detectron2) - [vidt](https://github.com/naver-ai/vidt) +- [nanodet](https://github.com/RangiLyu/nanodet) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 765b6d6..53a25c9 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -58,6 +58,14 @@ docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi ``` +- [nanodet](https://github.com/modelai/ymir-nanodet/tree/ymir-dev) + + - [change log](https://github.com/modelai/ymir-nanodet/tree/ymir-dev/ymir) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-nanodet-cu111-tmi + ``` + ## 比较 | docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weight | @@ -68,6 +76,7 @@ | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | +| nanodet | ❌ | ✔️ | ❌ | pytorch_lightning | ❌ | online | - online 预训练权重可能在训练时通过网络下载 @@ -159,11 +168,13 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 参考 - [挖掘算法CALD](https://github.com/we1pingyu/CALD/) +- [yolov4](https://github.com/AlexeyAB/darknet) - [yolov5](https://github.com/ultralytics/yolov5) - [mmdetection](https://github.com/open-mmlab/mmdetection) - [yolov7](https://github.com/wongkinyiu/yolov7) - [detectron2](https://github.com/facebookresearch/detectron2) - [vidt](https://github.com/naver-ai/vidt) +- [nanodet](https://github.com/RangiLyu/nanodet) --- diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index 6b10dc1..b2e7dff 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -6,11 +6,10 @@ import torch.distributed as dist from mmcv.runner import DistEvalHook as BaseDistEvalHook from mmcv.runner import EvalHook as BaseEvalHook +from mmdet.utils.util_ymir import write_ymir_training_result from torch.nn.modules.batchnorm import _BatchNorm from ymir_exc import monitor - -from mmdet.utils.util_ymir import (YmirStage, get_ymir_process, - write_ymir_training_result) +from ymir_exc.util import YmirStage, get_ymir_process def _calc_dynamic_intervals(start_interval, dynamic_interval_list): diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index b3b6d65..2ecc642 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -11,13 +11,13 @@ from mmcv import Config, DictAction from mmcv.runner import get_dist_info, init_dist from mmcv.utils import get_git_hash - from mmdet import __version__ from mmdet.apis import init_random_seed, set_random_seed, train_detector from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes -from mmdet.utils.util_ymir import _modify_mmdet_config, get_merged_config +from mmdet.utils.util_ymir import _modify_mmdet_config +from ymir_exc.util import get_merged_config def parse_args(): diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index 9920ca2..a7f22bd 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -9,14 +9,12 @@ from easydict import EasyDict as edict from mmcv import DictAction from mmdet.apis import inference_detector, init_detector -from mmdet.utils.util_ymir import YmirStage, get_merged_config, get_weight_file, get_ymir_process -from nptyping import NDArray, Shape +from mmdet.utils.util_ymir import get_weight_file from tqdm import tqdm from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw - -DETECTION_RESULT = NDArray[Shape['*,5'], Any] +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process def parse_option(cfg_options: str) -> dict: @@ -35,7 +33,10 @@ def parse_option(cfg_options: str) -> dict: return args.cfg_options -def mmdet_result_to_ymir(results: List[DETECTION_RESULT], class_names: List[str]) -> List[rw.Annotation]: +def mmdet_result_to_ymir(results: List[Any], class_names: List[str]) -> List[rw.Annotation]: + """ + results: List[NDArray[Shape['*,5'], Any]] + """ ann_list = [] for idx, result in enumerate(results): for line in result: diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py index 7eeaa1f..787290e 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining.py @@ -314,7 +314,7 @@ def mining(self): p = cls_scores_aug[aug_idx] q = cls_scores[origin_idx] m = (p + q) / 2. - js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) if js < 0: js = 0 consistency_box = max_iou diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 827dc8a..f10f210 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -12,13 +12,12 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from tqdm import tqdm -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - from mining.util import YmirDataset, load_image_file +from tqdm import tqdm from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining.py index e8a6c59..7ac11bd 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining.py @@ -14,14 +14,13 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from tqdm import tqdm -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, update_consistency) +from tqdm import tqdm from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) From 53bc0f50ca19b10d6976c13211d9170929998610 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 11:30:39 +0800 Subject: [PATCH 112/150] fix mining entropy bug --- det-mmdetection-tmi/training-template.yaml | 2 +- det-yolov5-tmi/mining/mining_cald.py | 2 +- det-yolov5-tmi/mining/util.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index d4c191f..7744172 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -3,7 +3,7 @@ export_format: 'ark:raw' samples_per_gpu: 16 workers_per_gpu: 16 max_epochs: 300 -config_file: 'configs/yolox/yolox_nano_8x8_300e_coco.py' +config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' cfg_options: '' metric: 'bbox' diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index 560326c..1588665 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -85,7 +85,7 @@ def mining(self) -> List: p = cls_scores_aug[aug_idx] q = cls_scores[origin_idx] m = (p + q) / 2. - js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) if js < 0: js = 0 consistency_box = max_iou diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py index 41c7c73..54ef5dd 100644 --- a/det-yolov5-tmi/mining/util.py +++ b/det-yolov5-tmi/mining/util.py @@ -107,7 +107,7 @@ def update_consistency(consistency, consistency_per_aug, beta, pred_bboxes_key, p = cls_scores_aug[aug_idx] q = cls_scores[origin_idx] m = (p + q) / 2. - js = 0.5 * entropy(p, m) + 0.5 * entropy(q, m) + js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) if js < 0: js = 0 consistency_box = max_iou From f920f48911ee64bbc0abe1eab1df11cbec2f84bb Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 12:00:40 +0800 Subject: [PATCH 113/150] fix mining bug --- README.MD | 8 ++++---- README_zh-CN.MD | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.MD b/README.MD index 890bba7..c4046bf 100644 --- a/README.MD +++ b/README.MD @@ -68,7 +68,7 @@ ## overview -| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weights | +| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args/cfg options | framework | onnx | pretrained weights | | - | - | - | - | - | - | - | | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | @@ -76,11 +76,11 @@ | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | -| nanodet | ❌ | ✔️ | ❌ | pytorch_lightning | ❌ | online | +| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | online | -- online pretrained weights may download through network +- `online` pretrained weights may download through network -- local pretrained weights have copied to docker images when building image +- `local` pretrained weights have copied to docker images when building image ### benchmark diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 53a25c9..a5baec1 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -68,7 +68,7 @@ ## 比较 -| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args | framework | onnx | pretrained weight | +| docker image | [finetune](https://github.com/modelai/ymir-executor-fork/wiki/use-yolov5-to-finetune-or-training-model) | tensorboard | args/cfg options | framework | onnx | pretrained weight | | - | - | - | - | - | - | - | | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | @@ -76,11 +76,11 @@ | mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | -| nanodet | ❌ | ✔️ | ❌ | pytorch_lightning | ❌ | online | +| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | online | -- online 预训练权重可能在训练时通过网络下载 +- `online` 预训练权重可能在训练时通过网络下载 -- local 预训练权重在构建镜像时复制到了镜像 +- `local` 预训练权重在构建镜像时复制到了镜像 ### benchmark From 70817f8c5aaec25d0240b998978a4f0520c0f57f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 15:48:20 +0800 Subject: [PATCH 114/150] zzz| --- README.MD | 1 + README_zh-CN.MD | 1 + 2 files changed, 2 insertions(+) diff --git a/README.MD b/README.MD index c4046bf..3c9eaab 100644 --- a/README.MD +++ b/README.MD @@ -105,6 +105,7 @@ gpu: single GeForce GTX 1080 Ti | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | | detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | --- diff --git a/README_zh-CN.MD b/README_zh-CN.MD index a5baec1..f9004a1 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -105,6 +105,7 @@ gpu: single GeForce GTX 1080 Ti | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | | detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h |s --- From ec58f3c12189fb2cb246365364132e743a140884 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 15:48:59 +0800 Subject: [PATCH 115/150] zzz --- README_zh-CN.MD | 2 +- det-yolov5-tmi/mining/util.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index f9004a1..47eb7b5 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -105,7 +105,7 @@ gpu: single GeForce GTX 1080 Ti | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | | detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | -| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h |s +| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | --- diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py index 54ef5dd..5c9b669 100644 --- a/det-yolov5-tmi/mining/util.py +++ b/det-yolov5-tmi/mining/util.py @@ -19,11 +19,10 @@ import cv2 import numpy as np import torch.utils.data as td -from scipy.stats import entropy -from torch.utils.data._utils.collate import default_collate - from mining.data_augment import cutout, horizontal_flip, resize, rotate from mining.mining_cald import get_ious +from scipy.stats import entropy +from torch.utils.data._utils.collate import default_collate from utils.augmentations import letterbox LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html From 0c8b49f65de31ab737ff1b95e4b2bc41eb37fd20 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 2 Sep 2022 15:54:03 +0800 Subject: [PATCH 116/150] add nanodet benchmark --- README.MD | 2 +- README_zh-CN.MD | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.MD b/README.MD index 3c9eaab..433dc2d 100644 --- a/README.MD +++ b/README.MD @@ -105,7 +105,7 @@ gpu: single GeForce GTX 1080 Ti | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | | detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | -| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | +| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | --- diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 47eb7b5..09b3fcc 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -84,9 +84,11 @@ ### benchmark -- training dataset: voc2012-train 5717 images -- validation dataset: voc2012-val 5823 images -- image size: 640 +- 训练集: voc2012-train 5717 images +- 测试集: voc2012-val 5823 images +- 图像大小: 640 (nanodet为416) + +**由于 coco 数据集包含 voc 数据集中的类, 因此这个对比并不公平, 仅供参考** gpu: single Tesla P4 @@ -105,7 +107,7 @@ gpu: single GeForce GTX 1080 Ti | yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | | mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | | detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | -| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | +| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | --- From 197fa095486d4817bbcb9664922f69dedc71fbf7 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 6 Sep 2022 09:38:58 +0800 Subject: [PATCH 117/150] fix mmdet ddp bug --- det-mmdetection-tmi/README.md | 4 ++++ det-mmdetection-tmi/mmdet/utils/util_ymir.py | 5 ++++- det-mmdetection-tmi/tools/train.py | 4 ++-- det-mmdetection-tmi/ymir_train.py | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/det-mmdetection-tmi/README.md b/det-mmdetection-tmi/README.md index b2ed690..5c1934d 100644 --- a/det-mmdetection-tmi/README.md +++ b/det-mmdetection-tmi/README.md @@ -25,3 +25,7 @@ docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg SERVER_MODE=dev --bu - add `training-template.yaml, infer-template.yaml, mining-template.yaml` for ymir pre-defined hyper-parameters. - add `docker/Dockerfile.cuda102, docker/Dockerfile.cuda111` to build docker image - remove `docker/Dockerfile` to avoid misuse + +--- + +- 2022/09/06: set `find_unused_parameters = True`, fix DDP bug diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 8498d9c..674117f 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -19,7 +19,7 @@ CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: +def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: """ useful for training process - modify dataset config @@ -83,6 +83,9 @@ def _modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: mmdet_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') # TODO Whether to evaluating the AP for each class # mmdet_cfg.evaluation.classwise = True + + # fix DDP error + mmdet_cfg.find_unused_parameters = True return mmdet_cfg diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index 2ecc642..3868d1e 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -16,7 +16,7 @@ from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes -from mmdet.utils.util_ymir import _modify_mmdet_config +from mmdet.utils.util_ymir import modify_mmdet_config from ymir_exc.util import get_merged_config @@ -101,7 +101,7 @@ def main(): cfg = Config.fromfile(args.config) print(cfg) # modify mmdet config from file - cfg = _modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) + cfg = modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index 552654d..a84a805 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -12,7 +12,7 @@ def main(cfg: edict) -> int: # default ymir config - gpu_id = cfg.param.get("gpu_id", '0') + gpu_id: str = str(cfg.param.get("gpu_id", '0')) num_gpus = len(gpu_id.split(",")) if num_gpus == 0: raise Exception(f'gpu_id = {gpu_id} is not valid, eg: 0 or 2,4') From 5edd3909456477c7c0b2cd83e4f1f749029158ab Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 6 Sep 2022 14:56:25 +0800 Subject: [PATCH 118/150] fix link change --- README.MD | 4 ++-- README_zh-CN.MD | 4 ++-- live-code-executor/ymir_start.py | 8 +++++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/README.MD b/README.MD index 433dc2d..e82a242 100644 --- a/README.MD +++ b/README.MD @@ -142,13 +142,13 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## how to custom ymir-executor -- [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) +- [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/ymir-dataset-zh-CN.md) - [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ## how to import pretrained model weights -- [import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) +- [import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md) ## reference diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 09b3fcc..f367f39 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -158,13 +158,13 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 如何制作自己的ymir-executor -- [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/docs/ymir-dataset-zh-CN.md) +- [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/ymir-dataset-zh-CN.md) - [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ymir镜像开发辅助库 ## 如何导入预训练模型 -- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/docs/import-extra-models.md) +- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md) - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 diff --git a/live-code-executor/ymir_start.py b/live-code-executor/ymir_start.py index d2c5415..ee81336 100644 --- a/live-code-executor/ymir_start.py +++ b/live-code-executor/ymir_start.py @@ -50,7 +50,13 @@ def main(): logger.info('no python package needs to install') # step 3. run /app/start.py - cmd = 'python3 start.py' + if osp.exists('/app/start.py'): + cmd = 'python3 start.py' + elif osp.exists('/app/ymir/start.py'): + cmd = 'python3 ymir/start.py' + else: + raise Exception('cannot found start.py') + logger.info(f'run task: {cmd}') subprocess.run(cmd.split(), check=True, cwd='/app') From 958d1214c6ca89dd9a286dd370b3d81318d8ac5b Mon Sep 17 00:00:00 2001 From: LuciferZap <92283801+LuciferZap@users.noreply.github.com> Date: Thu, 8 Sep 2022 16:33:11 +0800 Subject: [PATCH 119/150] add yolov5 mining code --- .idea/encodings.xml | 4 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 222 ++++++++++++++++++++++ .idea/ymir-executor-fork.iml | 12 ++ det-yolov5-tmi/mining/ymir_mining_aldd.py | 167 ++++++++++++++++ 7 files changed, 426 insertions(+) create mode 100644 .idea/encodings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml create mode 100644 .idea/ymir-executor-fork.iml create mode 100644 det-yolov5-tmi/mining/ymir_mining_aldd.py diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..15a15b2 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..a09183e --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..c5ce0fc --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..c116a57 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + nd.sum + kernel + + + torch.log + torch.sum + avg_pool_kernel + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - 1662622258260 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/ymir-executor-fork.iml b/.idea/ymir-executor-fork.iml deleted file mode 100644 index 7c9d48f..0000000 --- a/.idea/ymir-executor-fork.iml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - \ No newline at end of file From b6e25f9f84419bc9b7ff9043051ad21f6841cc4d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 8 Sep 2022 16:48:44 +0800 Subject: [PATCH 121/150] update mmdet and readme --- README.MD | 19 +++-- README_zh-CN.MD | 21 ++--- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 85 +++++++++++++++----- det-mmdetection-tmi/tools/train.py | 4 +- det-mmdetection-tmi/ymir_infer.py | 4 +- det-mmdetection-tmi/ymir_train.py | 8 +- 6 files changed, 97 insertions(+), 44 deletions(-) diff --git a/README.MD b/README.MD index e82a242..d4ef769 100644 --- a/README.MD +++ b/README.MD @@ -99,13 +99,14 @@ gpu: single Tesla P4 gpu: single GeForce GTX 1080 Ti -| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | -| - | - | - | - | - | - | - | -| yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | -| yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | -| mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | -| detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | -| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | +| docker image | image size | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | - | +| yolov4 | 608 | 64/32 | 20000 steps | yolov4 | 72.73% | 6h | imagenet-pretrained | +| yolov5 | 640 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | +| yolov7 | 640 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | +| mmdetection | 640 | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | +| detectron2 | 640 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| nanodet | 416 | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | --- @@ -148,7 +149,9 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## how to import pretrained model weights -- [import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md) +- [import and finetune model](https://github.com/modelai/ymir-executor-fork/wiki/import-and-finetune-model) + +- ~~[import pretainted model weights](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md)~~ ## reference diff --git a/README_zh-CN.MD b/README_zh-CN.MD index f367f39..6cbcd8c 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -86,7 +86,7 @@ - 训练集: voc2012-train 5717 images - 测试集: voc2012-val 5823 images -- 图像大小: 640 (nanodet为416) +- 图像大小: 640 (nanodet为416, yolov4为608) **由于 coco 数据集包含 voc 数据集中的类, 因此这个对比并不公平, 仅供参考** @@ -101,13 +101,14 @@ gpu: single Tesla P4 gpu: single GeForce GTX 1080 Ti -| docker image | batch size | epoch number | model | voc2012 val map50 | training time | note | -| - | - | - | - | - | - | - | -| yolov5 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | -| yolov7 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | -| mmdetection | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | -| detectron2 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | -| nanodet | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | +| docker image | image size | batch size | epoch number | model | voc2012 val map50 | training time | note | +| - | - | - | - | - | - | - | - | +| yolov4 | 608 | 64/32 | 20000 steps | yolov4 | 72.73% | 6h | imagenet-pretrained | +| yolov5 | 640 | 16 | 100 | yolov5s | 70.35% | 2h | coco-pretrained | +| yolov7 | 640 | 16 | 100 | yolov7-tiny | 70.4% | 5h | coco-pretrained | +| mmdetection | 640 | 16 | 100 | yolox_tiny | 66.2% | 5h | coco-pretrained | +| detectron2 | 640 | 2 | 20000 steps | retinanet_R_50_FPN_1x | 53.54% | 2h | imagenet-pretrained | +| nanodet | 416 | 16 | 100 | nanodet-plus-m_416 | 58.63% | 5h | imagenet-pretrained | --- @@ -164,7 +165,9 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 如何导入预训练模型 -- [如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md) +- [如何导入并精调外部模型](https://github.com/modelai/ymir-executor-fork/wiki/import-and-finetune-model) + +- ~~[如何导入外部模型](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/import-extra-models.md)~~ - 通过ymir网页端的 `模型管理/模型列表/导入模型` 同样可以导入模型 diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 674117f..12910ea 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -5,7 +5,7 @@ import logging import os import os.path as osp -from typing import Any, List, Optional +from typing import Any, Iterable, List, Optional import mmcv import yaml @@ -19,13 +19,24 @@ CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] -def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: +def modify_mmcv_config(mmcv_cfg: Config, ymir_cfg: edict) -> None: """ useful for training process - modify dataset config - modify model output channel - modify epochs, checkpoint, tensorboard config """ + def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any): + for key in mmcv_cfg: + if key == attribute_key: + mmcv_cfg[key] = attribute_value + elif isinstance(mmcv_cfg[key], Config): + recursive_modify(mmcv_cfg[key], attribute_key, attribute_value) + elif isinstance(mmcv_cfg[key], Iterable): + for cfg in mmcv_cfg[key]: + if isinstance(cfg, Config): + recursive_modify(cfg, attribute_key, attribute_value) + # modify dataset config ymir_ann_files = dict(train=ymir_cfg.ymir.input.training_index_file, val=ymir_cfg.ymir.input.val_index_file, @@ -35,8 +46,11 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: # so set smaller samples_per_gpu for validation samples_per_gpu = ymir_cfg.param.samples_per_gpu workers_per_gpu = ymir_cfg.param.workers_per_gpu - mmdet_cfg.data.samples_per_gpu = samples_per_gpu - mmdet_cfg.data.workers_per_gpu = workers_per_gpu + mmcv_cfg.data.samples_per_gpu = samples_per_gpu + mmcv_cfg.data.workers_per_gpu = workers_per_gpu + + num_classes = len(ymir_cfg.param.class_names) + recursive_modify(mmcv_cfg.model, 'num_classes', num_classes) for split in ['train', 'val', 'test']: ymir_dataset_cfg = dict(type='YmirDataset', @@ -47,7 +61,7 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: data_root=ymir_cfg.ymir.input.root_dir, filter_empty_gt=False) # modify dataset config for `split` - mmdet_dataset_cfg = mmdet_cfg.data.get(split, None) + mmdet_dataset_cfg = mmcv_cfg.data.get(split, None) if mmdet_dataset_cfg is None: continue @@ -63,33 +77,65 @@ def modify_mmdet_config(mmdet_cfg: Config, ymir_cfg: edict) -> Config: else: raise Exception(f'unsupported source dataset type {src_dataset_type}') - # modify model output channel - mmdet_model_cfg = mmdet_cfg.model.bbox_head - mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) + # # modify model output channel + # if mmcv_cfg.model.get('bbox_head'): # yolox, yolo, yolof, retinanet, ssd + # mmdet_model_cfg = mmcv_cfg.model.bbox_head + # elif mmcv_cfg.model.get('roi_head'): # Faster-RCNN, fast-rcnn + # mmdet_model_cfg = mmcv_cfg.model.roi_head.bbox_head + # elif mmcv_cfg.model.get('mask_head'): # SOLO + # mmdet_model_cfg = mmcv_cfg.model.mask_head + # else: + # raise Exception('unknown model structure') + + # if mmdet_model_cfg.get('num_classes'): + # mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) + # else: + # raise Exception('unknown model structure, no attr num_classes found') # modify epochs, checkpoint, tensorboard config if ymir_cfg.param.get('max_epochs', None): - mmdet_cfg.runner.max_epochs = ymir_cfg.param.max_epochs - mmdet_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir + mmcv_cfg.runner.max_epochs = ymir_cfg.param.max_epochs + mmcv_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir=ymir_cfg.ymir.output.tensorboard_dir) - if len(mmdet_cfg.log_config['hooks']) <= 1: - mmdet_cfg.log_config['hooks'].append(tensorboard_logger) + if len(mmcv_cfg.log_config['hooks']) <= 1: + mmcv_cfg.log_config['hooks'].append(tensorboard_logger) else: - mmdet_cfg.log_config['hooks'][1].update(tensorboard_logger) + mmcv_cfg.log_config['hooks'][1].update(tensorboard_logger) # modify evaluation and interval - interval = max(1, mmdet_cfg.runner.max_epochs // 30) - mmdet_cfg.evaluation.interval = interval - mmdet_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') + interval = max(1, mmcv_cfg.runner.max_epochs // 10) + mmcv_cfg.evaluation.interval = interval + mmcv_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') + mmcv_cfg.checkpoint_config.interval = mmcv_cfg.evaluation.interval # TODO Whether to evaluating the AP for each class # mmdet_cfg.evaluation.classwise = True # fix DDP error - mmdet_cfg.find_unused_parameters = True - return mmdet_cfg + mmcv_cfg.find_unused_parameters = True + + # set work dir + mmcv_cfg.work_dir = ymir_cfg.ymir.output.models_dir + + args_options = ymir_cfg.param.get("args_options", '') + cfg_options = ymir_cfg.param.get("cfg_options", '') + + # auto load offered weight file if not set by user! + if (args_options.find('--resume-from') == -1 and + args_options.find('--load-from') == -1 and + cfg_options.find('load_from') == -1 and + cfg_options.find('resume_from') == -1): # noqa: E129 + + weight_file = get_best_weight_file(ymir_cfg) + if weight_file: + if cfg_options: + cfg_options += f' load_from={weight_file}' + else: + cfg_options = f'load_from={weight_file}' + else: + logging.warning('no weight file used for training!') -def get_weight_file(cfg: edict) -> str: +def get_best_weight_file(cfg: edict) -> str: """ return the weight file path by priority find weight file in cfg.param.pretrained_model_params or cfg.param.model_params_path @@ -118,6 +164,7 @@ def get_weight_file(cfg: edict) -> str: if cfg.ymir.run_training: weight_files = [f for f in glob.glob('/weights/**/*', recursive=True) if f.endswith(('.pth', '.pt'))] + # load pretrained model weight for yolox only model_name_splits = osp.basename(cfg.param.config_file).split('_') if len(weight_files) > 0 and model_name_splits[0] == 'yolox': yolox_weight_files = [ diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index 3868d1e..df4f184 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -16,7 +16,7 @@ from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.utils import collect_env, get_root_logger, setup_multi_processes -from mmdet.utils.util_ymir import modify_mmdet_config +from mmdet.utils.util_ymir import modify_mmcv_config from ymir_exc.util import get_merged_config @@ -101,7 +101,7 @@ def main(): cfg = Config.fromfile(args.config) print(cfg) # modify mmdet config from file - cfg = modify_mmdet_config(mmdet_cfg=cfg, ymir_cfg=ymir_cfg) + modify_mmcv_config(mmcv_cfg=cfg, ymir_cfg=ymir_cfg) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) diff --git a/det-mmdetection-tmi/ymir_infer.py b/det-mmdetection-tmi/ymir_infer.py index a7f22bd..939e5bf 100644 --- a/det-mmdetection-tmi/ymir_infer.py +++ b/det-mmdetection-tmi/ymir_infer.py @@ -9,7 +9,7 @@ from easydict import EasyDict as edict from mmcv import DictAction from mmdet.apis import inference_detector, init_detector -from mmdet.utils.util_ymir import get_weight_file +from mmdet.utils.util_ymir import get_best_weight_file from tqdm import tqdm from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor @@ -87,7 +87,7 @@ def __init__(self, cfg: edict): # Specify the path to model config and checkpoint file config_file = get_config_file(cfg) - checkpoint_file = get_weight_file(cfg) + checkpoint_file = get_best_weight_file(cfg) options = cfg.param.get('cfg_options', None) cfg_options = parse_option(options) if options else None diff --git a/det-mmdetection-tmi/ymir_train.py b/det-mmdetection-tmi/ymir_train.py index a84a805..06ed4dd 100644 --- a/det-mmdetection-tmi/ymir_train.py +++ b/det-mmdetection-tmi/ymir_train.py @@ -5,9 +5,9 @@ import sys from easydict import EasyDict as edict -from mmdet.utils.util_ymir import get_weight_file, write_ymir_training_result +from mmdet.utils.util_ymir import get_best_weight_file, write_ymir_training_result from ymir_exc import monitor -from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process +from ymir_exc.util import YmirStage, find_free_port, get_merged_config, get_ymir_process def main(cfg: edict) -> int: @@ -32,7 +32,7 @@ def main(cfg: edict) -> int: (cfg_options is None or (cfg_options.find('load_from') == -1 and cfg_options.find('resume_from') == -1)): - weight_file = get_weight_file(cfg) + weight_file = get_best_weight_file(cfg) if weight_file: if cfg_options: cfg_options += f' load_from={weight_file}' @@ -55,7 +55,7 @@ def main(cfg: edict) -> int: f"--work-dir {work_dir} --gpu-id {gpu_id}" else: os.environ.setdefault('CUDA_VISIBLE_DEVICES', gpu_id) - port = cfg.param.get('port') + port = find_free_port() os.environ.setdefault('PORT', str(port)) cmd = f"bash ./tools/dist_train.sh {config_file} {num_gpus} " + \ f"--work-dir {work_dir}" From 30194ebde5b02b6b029ffe1c4871805442cc92da Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 8 Sep 2022 18:53:35 +0800 Subject: [PATCH 122/150] add new mining algorithm for yolov5 --- det-yolov5-tmi/mining-template.yaml | 2 + det-yolov5-tmi/mining/ymir_mining_aldd.py | 164 +++++++++++------- .../{ymir_mining.py => ymir_mining_cald.py} | 6 +- det-yolov5-tmi/start.py | 25 ++- det-yolov5-tmi/training-template.yaml | 1 - det-yolov5-tmi/utils/ymir_yolov5.py | 11 +- 6 files changed, 122 insertions(+), 87 deletions(-) rename det-yolov5-tmi/mining/{ymir_mining.py => ymir_mining_cald.py} (97%) diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 1ae6d29..0979de2 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -8,6 +8,8 @@ # class_names: [] img_size: 640 +mining_algorithm: aldd +class_distribution_scores: '' conf_thres: 0.25 iou_thres: 0.45 batch_size_per_gpu: 16 diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 7891630..c9bc4f2 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -7,15 +7,16 @@ """ import os import sys +import warnings from functools import partial +from typing import Any, List import numpy as np import torch import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, - update_consistency) +from mining.util import YmirDataset, load_image_file from tqdm import tqdm from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw @@ -26,69 +27,95 @@ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) -def calc_unc_val(heatmap): - avg_pool_kernel = 9 - max_pool_kernel = 30 - pad = (avg_pool_kernel - 1) // 2 - - avg_pooling_layer = torch.nn.AvgPool2d(kernel=(avg_pool_kernel, avg_pool_kernel), stride = (1, 1), count_include_pad=False, pad=(pad, pad)) - max_pooling_layer = torch.nn.MaxPool2d(kernel=(max_pool_kernel, max_pool_kernel), stride = (30, 30), pad=(2, 2)) - - # mean of entropy - prob_pixel = heatmap - prob_pixel_m1 = 1 - heatmap - ent = -(prob_pixel * torch.log(prob_pixel + 1e-12) + prob_pixel_m1 * torch.log(prob_pixel_m1 + 1e-12)) # N, C, H, W - ent = torch.sum(ent, axis=1, keepdims=True) # N, 1, H, W - mean_of_entropy = avg_pooling_layer(ent) # N, 1, H, W - - # entropy of mean - prob_local = avg_pooling_layer(heatmap) # N, C, H, W - prob_local_m1 = 1 - prob_local - entropy_of_mean = -(prob_local * torch.log(prob_local + 1e-12) + prob_local_m1 * torch.log(prob_local_m1 + 1e-12)) # N, C, H, W - entropy_of_mean = torch.sum(entropy_of_mean, axis=1, keepdims=True) # N, 1, H, W - - uncertainty = entropy_of_mean - mean_of_entropy - unc = max_pooling_layer(uncertainty) - - # aggregating - scores = torch.mean(unc, axis=(1, 2, 3)) - return scores - - -def compute_aldd_score(net_output, num_of_class, net_input_shape): - """ - args: - imgs: list[np.array(H, W, C)] - returns: - scores: list of float - """ - - CLASS_DISTRIBUTION_SCORE = np.array([1.0] * num_of_class) - total_scores = [] - - for each_class_index in range(num_of_class): - feature_map_concate = [] - for each_output_feature_map in net_output: - net_output_conf = each_output_feature_map[:, :, :, :, 4] - net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] - feature_map_reshape = torch.nn.functional.interpolate(net_output_cls_mult_conf, (net_input_shape, net_input_shape), mode='bilinear') - feature_map_concate.append(feature_map_reshape) - - feature_map_concate = torch.cat(feature_map_concate, 1) - scores = calc_unc_val(feature_map_concate) - scores = scores.cpu().detach().numpy() - total_scores.append(scores) - - total_scores = np.array(total_scores) - total_scores = total_scores * CLASS_DISTRIBUTION_SCORE - total_scores = np.sum(total_scores, axis=0) - - return total_scores +class ALDD(object): + def __init__(self, ymir_cfg: edict): + avg_pool_kernel = 9 + max_pool_kernel = 30 + pad = (avg_pool_kernel - 1) // 2 + + self.avg_pooling_layer = torch.nn.AvgPool2d(kernel_size=(avg_pool_kernel, avg_pool_kernel), + stride=(1, 1), + count_include_pad=False, + padding=(pad, pad)) + self.max_pooling_layer = torch.nn.MaxPool2d(kernel_size=(max_pool_kernel, max_pool_kernel), + stride=(30, 30), + padding=(2, 2)) + + self.num_classes = len(ymir_cfg.param.class_names) + if ymir_cfg.param.get('class_distribution_scores', ''): + scores = [float(x.strip()) for x in ymir_cfg.param.class_distribution_scores.split(',')] + if len(scores) < self.num_classes: + warnings.warn('extend 1.0 to class_distribution_scores') + scores.extend([1.0] * (self.num_classes - len(scores))) + self.class_distribution_scores = np.array(scores, dtype=np.float32) + else: + self.class_distribution_scores = np.array([1.0] * self.num_classes, dtype=np.float32) + + def calc_unc_val(self, heatmap: torch.Tensor) -> torch.Tensor: + # mean of entropy + prob_pixel = heatmap + prob_pixel_m1 = 1 - heatmap + ent = -(prob_pixel * torch.log(prob_pixel + 1e-12) + prob_pixel_m1 * torch.log(prob_pixel_m1 + 1e-12) + ) # N, C, H, W + ent = torch.sum(ent, dim=1, keepdim=True) # N, 1, H, W + mean_of_entropy = self.avg_pooling_layer(ent) # N, 1, H, W + + # entropy of mean + prob_local = self.avg_pooling_layer(heatmap) # N, C, H, W + prob_local_m1 = 1 - prob_local + entropy_of_mean = -( + prob_local * torch.log(prob_local + 1e-12) + prob_local_m1 * torch.log(prob_local_m1 + 1e-12)) # N, C, H, W + entropy_of_mean = torch.sum(entropy_of_mean, dim=1, keepdim=True) # N, 1, H, W + + uncertainty = entropy_of_mean - mean_of_entropy + unc = self.max_pooling_layer(uncertainty) + + # aggregating + scores = torch.mean(unc, dim=(1, 2, 3)) + return scores + + def compute_aldd_score(self, net_output: torch.Tensor, net_input_shape: Any): + """ + args: + imgs: list[np.array(H, W, C)] + returns: + scores: list of float + """ + if not isinstance(net_input_shape, (list, tuple)): + net_input_shape = (net_input_shape, net_input_shape) + + # CLASS_DISTRIBUTION_SCORE = np.array([1.0] * num_of_class) + scores_list = [] + + for each_class_index in range(self.num_classes): + feature_map_list: List[torch.Tensor] = [] + + for each_output_feature_map in net_output: + each_output_feature_map.sigmoid_() + net_output_conf = each_output_feature_map[:, :, :, :, 4] + net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] + feature_map_reshape = torch.nn.functional.interpolate(net_output_cls_mult_conf, + net_input_shape, + mode='bilinear', + align_corners=False) + feature_map_list.append(feature_map_reshape) + + feature_map_concate = torch.cat(feature_map_list, 1) + scores = self.calc_unc_val(feature_map_concate) + scores = scores.cpu().detach().numpy() + scores_list.append(scores) + + total_scores = np.array(scores_list) + total_scores = total_scores * self.class_distribution_scores + total_scores = np.sum(total_scores, axis=0) + + return total_scores def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 device = torch.device('cuda', gpu) ymir_yolov5.to(device) @@ -105,7 +132,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): images = [line.strip() for line in f.readlines()] # origin dataset - images_rank = images[RANK::WORLD_SIZE] + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images origin_dataset = YmirDataset(images_rank, load_fn=load_fn) origin_dataset_loader = td.DataLoader(origin_dataset, batch_size=batch_size_per_gpu, @@ -118,10 +148,11 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): mining_results = dict() dataset_size = len(images_rank) pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + miner = ALDD(ymir_cfg) for idx, batch in enumerate(pbar): with torch.no_grad(): featuremap_output = ymir_yolov5.model.model(batch['image'].float().to(device))[1] - unc_scores = compute_aldd_score(featuremap_output, len(ymir_cfg.param.class_names), ymir_yolov5.img_size) + unc_scores = miner.compute_aldd_score(featuremap_output, ymir_yolov5.img_size) for each_imgname, each_score in zip(batch["image_file"], unc_scores): mining_results[each_imgname] = each_score @@ -134,18 +165,21 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): def main() -> int: ymir_cfg = get_merged_config() + # note select_device(gpu_id) will set os.environ['CUDA_VISIBLE_DEVICES'] to gpu_id ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 torch.cuda.set_device(gpu) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") run(ymir_cfg, ymir_yolov5) # wait all process to save the mining result - dist.barrier() + if WORLD_SIZE > 1: + dist.barrier() if RANK in [0, -1]: results = [] diff --git a/det-yolov5-tmi/mining/ymir_mining.py b/det-yolov5-tmi/mining/ymir_mining_cald.py similarity index 97% rename from det-yolov5-tmi/mining/ymir_mining.py rename to det-yolov5-tmi/mining/ymir_mining_cald.py index 7ac11bd..d84e2f7 100644 --- a/det-yolov5-tmi/mining/ymir_mining.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -29,7 +29,8 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 device = torch.device('cuda', gpu) ymir_yolov5.to(device) @@ -158,7 +159,8 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 torch.cuda.set_device(gpu) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 9e2dfa1..0cc29df 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -5,15 +5,14 @@ import cv2 from easydict import EasyDict as edict +from models.experimental import attempt_download +from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process, write_ymir_training_result) -from models.experimental import attempt_download -from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file - def start() -> int: cfg = get_merged_config() @@ -119,18 +118,21 @@ def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 - if gpu_count <= 1: + mining_algorithm = cfg.param.get('mining_algorithm', 'aldd') + support_mining_algorithms = ['aldd', 'cald'] + if mining_algorithm not in support_mining_algorithms: + raise Exception(f'unknown mining algorithm {mining_algorithm}, not in {support_mining_algorithms}') + + if gpu_count <= 1 and mining_algorithm in ['cald']: command = 'python3 mining/mining_cald.py' else: port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining.py' # noqa + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining_{mining_algorithm}.py' # noqa + logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) monitor.write_monitor_logger( @@ -143,10 +145,7 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: convert_ymir_to_yolov5(cfg) logging.info(f'generate {out_dir}/data.yaml') monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, - p=1.0, - task_idx=task_idx, - task_num=task_num)) + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index ac9a91f..f3f7a20 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -17,4 +17,3 @@ opset: 11 args_options: '--exist-ok' save_period: 10 sync_bn: False # work for multi-gpu only -port: 29500 # work for multi-gpu only diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 4093100..22ec372 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -11,16 +11,15 @@ import torch import yaml from easydict import EasyDict as edict +from models.common import DetectMultiBackend from nptyping import NDArray, Shape, UInt8 from packaging.version import Version -from ymir_exc import monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process - -from models.common import DetectMultiBackend from utils.augmentations import letterbox from utils.general import check_img_size, non_max_suppression, scale_coords from utils.torch_utils import select_device +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] @@ -65,7 +64,7 @@ def __init__(self, cfg: edict, task='infer'): self.task_num = 1 self.gpu_id: str = str(cfg.param.get('gpu_id', '0')) - device = select_device(self.gpu_id) + device = select_device(self.gpu_id) # will set CUDA_VISIBLE_DEVICES=self.gpu_id self.gpu_count: int = len(self.gpu_id.split(',')) if self.gpu_id else 0 self.batch_size_per_gpu: int = int(cfg.param.get('batch_size_per_gpu', 4)) self.num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) From 953fc9a91992a3ec48d841da041e8ae4be3aad59 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 8 Sep 2022 19:00:30 +0800 Subject: [PATCH 123/150] add new mining algorith aldd for yolov5 --- README.MD | 1 + README_zh-CN.MD | 1 + det-yolov5-tmi/README.md | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.MD b/README.MD index d4ef769..1d834f2 100644 --- a/README.MD +++ b/README.MD @@ -156,6 +156,7 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## reference - [mining algorithm: CALD](https://github.com/we1pingyu/CALD/) +- [mining algorithm: ALDD](https://gitlab.com/haghdam/deep_active_learning) - [yolov4](https://github.com/AlexeyAB/darknet) - [yolov5](https://github.com/ultralytics/yolov5) - [mmdetection](https://github.com/open-mmlab/mmdetection) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 6cbcd8c..f22015a 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -174,6 +174,7 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 参考 - [挖掘算法CALD](https://github.com/we1pingyu/CALD/) +- [挖掘算法ALDD](https://gitlab.com/haghdam/deep_active_learning) - [yolov4](https://github.com/AlexeyAB/darknet) - [yolov5](https://github.com/ultralytics/yolov5) - [mmdetection](https://github.com/open-mmlab/mmdetection) diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index 520d78c..102c198 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -7,7 +7,7 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda102-yolov5-tmi --build-arg SERV docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . ``` -## change log +## main change log - add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining @@ -34,3 +34,7 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERV - modify `requirements.txt` - other modify support onnx export, not important. + +## new features + +- 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) From 2e03292d99dadd992da760dcd4ed48bd4a79e112 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 8 Sep 2022 19:03:50 +0800 Subject: [PATCH 124/150] support long scores --- det-yolov5-tmi/mining-template.yaml | 2 +- det-yolov5-tmi/mining/ymir_mining_aldd.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 0979de2..9011fe6 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -9,7 +9,7 @@ img_size: 640 mining_algorithm: aldd -class_distribution_scores: '' +class_distribution_scores: '' # 1.0,1.0,0.1,0.2 conf_thres: 0.25 iou_thres: 0.45 batch_size_per_gpu: 16 diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index c9bc4f2..52a85b8 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -47,7 +47,7 @@ def __init__(self, ymir_cfg: edict): if len(scores) < self.num_classes: warnings.warn('extend 1.0 to class_distribution_scores') scores.extend([1.0] * (self.num_classes - len(scores))) - self.class_distribution_scores = np.array(scores, dtype=np.float32) + self.class_distribution_scores = np.array(scores[0:self.num_classes], dtype=np.float32) else: self.class_distribution_scores = np.array([1.0] * self.num_classes, dtype=np.float32) From 9abe0c99d4513f736e5c53f82269481470527c6e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 14 Sep 2022 10:42:59 +0800 Subject: [PATCH 125/150] support change num_workers_per_gpu --- det-mmdetection-tmi/training-template.yaml | 2 +- det-yolov5-tmi/start.py | 4 +++- det-yolov5-tmi/training-template.yaml | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 7744172..7a265ac 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -1,7 +1,7 @@ shm_size: '32G' export_format: 'ark:raw' samples_per_gpu: 16 -workers_per_gpu: 16 +workers_per_gpu: 8 max_epochs: 300 config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 0cc29df..bd4b537 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -56,6 +56,7 @@ def _run_training(cfg: edict) -> None: # 2. training model epochs: int = int(cfg.param.epochs) batch_size_per_gpu: int = int(cfg.param.batch_size_per_gpu) + num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 8)) model: str = cfg.param.model img_size: int = int(cfg.param.img_size) save_period: int = max(1, min(epochs // 10, int(cfg.param.save_period))) @@ -87,7 +88,8 @@ def _run_training(cfg: edict) -> None: str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', project, '--cfg', f'models/{model}.yaml', '--name', name, '--weights', weights, '--img-size', str(img_size), '--save-period', - str(save_period), '--device', device + str(save_period), '--device', device, + '--workers', str(num_workers_per_gpu) ]) if gpu_count > 1 and sync_bn: diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index f3f7a20..4bd27b5 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -11,6 +11,7 @@ shm_size: '32G' export_format: 'ark:raw' model: 'yolov5s' batch_size_per_gpu: 16 +num_workers_per_gpu: 8 epochs: 300 img_size: 640 opset: 11 From a3079e5cf697828094b66cdd3cadf0971e571597 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 15 Sep 2022 17:28:13 +0800 Subject: [PATCH 126/150] assign before used --- det-yolov5-tmi/README.md | 1 + det-yolov5-tmi/mining/ymir_infer.py | 2 +- det-yolov5-tmi/mining/ymir_mining_aldd.py | 14 +++++++++++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index 102c198..bc1d11a 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -38,3 +38,4 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERV ## new features - 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) +- 2022/09/14: support change hyper-parameter `num_workers_per_gpu` diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index f10f210..258af64 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -67,9 +67,9 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): preprocess_image_shape = batch['image'].shape[2:] for idx, det in enumerate(pred): # per image result_per_image = [] + image_file = batch['image_file'][idx] if len(det): origin_image_shape = (batch['origin_shape'][0][idx], batch['origin_shape'][1][idx]) - image_file = batch['image_file'][idx] # Rescale boxes from img_size to img size det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() result_per_image.append(det) diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 52a85b8..5a2dd72 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -74,7 +74,7 @@ def calc_unc_val(self, heatmap: torch.Tensor) -> torch.Tensor: scores = torch.mean(unc, dim=(1, 2, 3)) return scores - def compute_aldd_score(self, net_output: torch.Tensor, net_input_shape: Any): + def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: Any): """ args: imgs: list[np.array(H, W, C)] @@ -87,27 +87,35 @@ def compute_aldd_score(self, net_output: torch.Tensor, net_input_shape: Any): # CLASS_DISTRIBUTION_SCORE = np.array([1.0] * num_of_class) scores_list = [] + for feature_map in net_output: + feature_map.sigmoid_() + for each_class_index in range(self.num_classes): feature_map_list: List[torch.Tensor] = [] + # each_output_feature_map: [bs, 3, h, w, 5 + num_classes] for each_output_feature_map in net_output: - each_output_feature_map.sigmoid_() net_output_conf = each_output_feature_map[:, :, :, :, 4] net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] + # feature_map_reshape: [bs, 3, h, w] feature_map_reshape = torch.nn.functional.interpolate(net_output_cls_mult_conf, net_input_shape, mode='bilinear', align_corners=False) feature_map_list.append(feature_map_reshape) + # len(net_output) = 3 + # feature_map_concate: [bs, 9, h, w] feature_map_concate = torch.cat(feature_map_list, 1) + # scores: [bs, 1] scores = self.calc_unc_val(feature_map_concate) scores = scores.cpu().detach().numpy() scores_list.append(scores) + # total_scores: [bs, num_classes] total_scores = np.array(scores_list) total_scores = total_scores * self.class_distribution_scores - total_scores = np.sum(total_scores, axis=0) + total_scores = np.sum(total_scores, axis=1) return total_scores From f11acb40a7a70e037205ea396bb31689531df810 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 16 Sep 2022 10:05:38 +0800 Subject: [PATCH 127/150] update aldd mining algorithm --- det-yolov5-tmi/.dockerignore | 1 + det-yolov5-tmi/mining/ymir_mining_aldd.py | 60 +++++++++++------------ det-yolov5-tmi/utils/ymir_yolov5.py | 6 +++ 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/det-yolov5-tmi/.dockerignore b/det-yolov5-tmi/.dockerignore index bee6b98..9f34de6 100644 --- a/det-yolov5-tmi/.dockerignore +++ b/det-yolov5-tmi/.dockerignore @@ -14,6 +14,7 @@ data/samples/* # Neural Network weights ----------------------------------------------------------------------------------------------- #**/*.pt **/*.pth +**/*.pkl **/*.onnx **/*.engine **/*.mlmodel diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 5a2dd72..2ae2845 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -14,6 +14,7 @@ import numpy as np import torch import torch.distributed as dist +import torch.nn.functional as F import torch.utils.data as td from easydict import EasyDict as edict from mining.util import YmirDataset, load_image_file @@ -29,17 +30,9 @@ class ALDD(object): def __init__(self, ymir_cfg: edict): - avg_pool_kernel = 9 - max_pool_kernel = 30 - pad = (avg_pool_kernel - 1) // 2 - - self.avg_pooling_layer = torch.nn.AvgPool2d(kernel_size=(avg_pool_kernel, avg_pool_kernel), - stride=(1, 1), - count_include_pad=False, - padding=(pad, pad)) - self.max_pooling_layer = torch.nn.MaxPool2d(kernel_size=(max_pool_kernel, max_pool_kernel), - stride=(30, 30), - padding=(2, 2)) + self.avg_pool_size = 9 + self.max_pool_size = 32 + self.avg_pool_pad = (self.avg_pool_size - 1) // 2 self.num_classes = len(ymir_cfg.param.class_names) if ymir_cfg.param.get('class_distribution_scores', ''): @@ -53,25 +46,32 @@ def __init__(self, ymir_cfg: edict): def calc_unc_val(self, heatmap: torch.Tensor) -> torch.Tensor: # mean of entropy - prob_pixel = heatmap - prob_pixel_m1 = 1 - heatmap - ent = -(prob_pixel * torch.log(prob_pixel + 1e-12) + prob_pixel_m1 * torch.log(prob_pixel_m1 + 1e-12) - ) # N, C, H, W - ent = torch.sum(ent, dim=1, keepdim=True) # N, 1, H, W - mean_of_entropy = self.avg_pooling_layer(ent) # N, 1, H, W + ent = F.binary_cross_entropy(heatmap, heatmap, reduction='none') + avg_ent = F.avg_pool2d(ent, + kernel_size=self.avg_pool_size, + stride=1, + padding=self.avg_pool_pad, + count_include_pad=False) # N, 1, H, W + mean_of_entropy = torch.sum(avg_ent, dim=1, keepdim=True) # N, 1, H, W # entropy of mean - prob_local = self.avg_pooling_layer(heatmap) # N, C, H, W - prob_local_m1 = 1 - prob_local - entropy_of_mean = -( - prob_local * torch.log(prob_local + 1e-12) + prob_local_m1 * torch.log(prob_local_m1 + 1e-12)) # N, C, H, W - entropy_of_mean = torch.sum(entropy_of_mean, dim=1, keepdim=True) # N, 1, H, W + avg_heatmap = F.avg_pool2d(heatmap, + kernel_size=self.avg_pool_size, + stride=1, + padding=self.avg_pool_pad, + count_include_pad=False) # N, C, H, W + ent_avg = F.binary_cross_entropy(avg_heatmap, avg_heatmap, reduction='none') + entropy_of_mean = torch.sum(ent_avg, dim=1, keepdim=True) # N, 1, H, W uncertainty = entropy_of_mean - mean_of_entropy - unc = self.max_pooling_layer(uncertainty) + unc = F.max_pool2d(uncertainty, + kernel_size=self.max_pool_size, + stride=self.max_pool_size, + padding=0, + ceil_mode=False) # aggregating - scores = torch.mean(unc, dim=(1, 2, 3)) + scores = torch.mean(unc, dim=(1, 2, 3)) # (N,) return scores def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: Any): @@ -98,22 +98,22 @@ def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: An net_output_conf = each_output_feature_map[:, :, :, :, 4] net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] # feature_map_reshape: [bs, 3, h, w] - feature_map_reshape = torch.nn.functional.interpolate(net_output_cls_mult_conf, - net_input_shape, - mode='bilinear', - align_corners=False) + feature_map_reshape = F.interpolate(net_output_cls_mult_conf, + net_input_shape, + mode='bilinear', + align_corners=False) feature_map_list.append(feature_map_reshape) # len(net_output) = 3 # feature_map_concate: [bs, 9, h, w] feature_map_concate = torch.cat(feature_map_list, 1) - # scores: [bs, 1] + # scores: [bs, 1] for each class scores = self.calc_unc_val(feature_map_concate) scores = scores.cpu().detach().numpy() scores_list.append(scores) # total_scores: [bs, num_classes] - total_scores = np.array(scores_list) + total_scores = np.stack(scores_list, axis=1) total_scores = total_scores * self.class_distribution_scores total_scores = np.sum(total_scores, axis=1) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 22ec372..675110c 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -85,6 +85,12 @@ def __init__(self, cfg: edict, task='infer'): self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup self.img_size: List[int] = imgsz + def extract_feats(self, x): + """ + return the feature maps before sigmoid for mining + """ + return self.model.model(x)[1] + def forward(self, x, nms=False): pred = self.model(x) if not nms: From 4affb2908922078544ddc77ffed51b0261160fde Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 16 Sep 2022 15:31:37 +0800 Subject: [PATCH 128/150] fix code-review conversation --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 49 +++++++++----------- det-mmdetection-tmi/training-template.yaml | 1 + det-yolov5-tmi/mining/ymir_mining_aldd.py | 7 ++- det-yolov5-tmi/mining/ymir_mining_cald.py | 8 ++-- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 12910ea..a809af4 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -26,16 +26,23 @@ def modify_mmcv_config(mmcv_cfg: Config, ymir_cfg: edict) -> None: - modify model output channel - modify epochs, checkpoint, tensorboard config """ - def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any): + def recursive_modify_attribute(mmcv_cfg: Config, attribute_key: str, attribute_value: Any): + """ + recursive modify mmcv_cfg: + 1. mmcv_cfg.attribute_key to attribute_value + 2. mmcv_cfg.xxx.xxx.xxx.attribute_key to attribute_value (recursive) + 3. mmcv_cfg.xxx[i].attribute_key to attribute_value (i=0, 1, 2 ...) + 4. mmcv_cfg.xxx[i].xxx.xxx[j].attribute_key to attribute_value + """ for key in mmcv_cfg: if key == attribute_key: mmcv_cfg[key] = attribute_value elif isinstance(mmcv_cfg[key], Config): - recursive_modify(mmcv_cfg[key], attribute_key, attribute_value) + recursive_modify_attribute(mmcv_cfg[key], attribute_key, attribute_value) elif isinstance(mmcv_cfg[key], Iterable): for cfg in mmcv_cfg[key]: if isinstance(cfg, Config): - recursive_modify(cfg, attribute_key, attribute_value) + recursive_modify_attribute(cfg, attribute_key, attribute_value) # modify dataset config ymir_ann_files = dict(train=ymir_cfg.ymir.input.training_index_file, @@ -49,8 +56,9 @@ def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any) mmcv_cfg.data.samples_per_gpu = samples_per_gpu mmcv_cfg.data.workers_per_gpu = workers_per_gpu + # modify model output channel num_classes = len(ymir_cfg.param.class_names) - recursive_modify(mmcv_cfg.model, 'num_classes', num_classes) + recursive_modify_attribute(mmcv_cfg.model, 'num_classes', num_classes) for split in ['train', 'val', 'test']: ymir_dataset_cfg = dict(type='YmirDataset', @@ -77,24 +85,9 @@ def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any) else: raise Exception(f'unsupported source dataset type {src_dataset_type}') - # # modify model output channel - # if mmcv_cfg.model.get('bbox_head'): # yolox, yolo, yolof, retinanet, ssd - # mmdet_model_cfg = mmcv_cfg.model.bbox_head - # elif mmcv_cfg.model.get('roi_head'): # Faster-RCNN, fast-rcnn - # mmdet_model_cfg = mmcv_cfg.model.roi_head.bbox_head - # elif mmcv_cfg.model.get('mask_head'): # SOLO - # mmdet_model_cfg = mmcv_cfg.model.mask_head - # else: - # raise Exception('unknown model structure') - - # if mmdet_model_cfg.get('num_classes'): - # mmdet_model_cfg.num_classes = len(ymir_cfg.param.class_names) - # else: - # raise Exception('unknown model structure, no attr num_classes found') - # modify epochs, checkpoint, tensorboard config if ymir_cfg.param.get('max_epochs', None): - mmcv_cfg.runner.max_epochs = ymir_cfg.param.max_epochs + mmcv_cfg.runner.max_epochs = int(ymir_cfg.param.max_epochs) mmcv_cfg.checkpoint_config['out_dir'] = ymir_cfg.ymir.output.models_dir tensorboard_logger = dict(type='TensorboardLoggerHook', log_dir=ymir_cfg.ymir.output.tensorboard_dir) if len(mmcv_cfg.log_config['hooks']) <= 1: @@ -102,9 +95,15 @@ def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any) else: mmcv_cfg.log_config['hooks'][1].update(tensorboard_logger) + # TODO save only the best top-k model weight files. # modify evaluation and interval - interval = max(1, mmcv_cfg.runner.max_epochs // 10) - mmcv_cfg.evaluation.interval = interval + val_interval: int = int(ymir_cfg.param.get('val_interval', 1)) + if val_interval > 0: + val_interval = min(val_interval, mmcv_cfg.runner.max_epochs) + else: + val_interval = max(1, mmcv_cfg.runner.max_epochs // 10) + + mmcv_cfg.evaluation.interval = val_interval mmcv_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') mmcv_cfg.checkpoint_config.interval = mmcv_cfg.evaluation.interval # TODO Whether to evaluating the AP for each class @@ -120,10 +119,8 @@ def recursive_modify(mmcv_cfg: Config, attribute_key: str, attribute_value: Any) cfg_options = ymir_cfg.param.get("cfg_options", '') # auto load offered weight file if not set by user! - if (args_options.find('--resume-from') == -1 and - args_options.find('--load-from') == -1 and - cfg_options.find('load_from') == -1 and - cfg_options.find('resume_from') == -1): # noqa: E129 + if (args_options.find('--resume-from') == -1 and args_options.find('--load-from') == -1 + and cfg_options.find('load_from') == -1 and cfg_options.find('resume_from') == -1): # noqa: E129 weight_file = get_best_weight_file(ymir_cfg) if weight_file: diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 7a265ac..dcb0ce9 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -7,4 +7,5 @@ config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' cfg_options: '' metric: 'bbox' +val_interval: 0 # <0 means use auto interval = max(1, max_epochs//10) port: 12345 diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 2ae2845..928c6e1 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -122,7 +122,6 @@ def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: An def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 device = torch.device('cuda', gpu) ymir_yolov5.to(device) @@ -178,7 +177,6 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 torch.cuda.set_device(gpu) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") @@ -186,7 +184,7 @@ def main() -> int: run(ymir_cfg, ymir_yolov5) # wait all process to save the mining result - if WORLD_SIZE > 1: + if LOCAL_RANK != -1: dist.barrier() if RANK in [0, -1]: @@ -200,7 +198,8 @@ def main() -> int: ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - print(f'rank: {RANK}, start destroy process group') + if LOCAL_RANK != -1: + print(f'rank: {RANK}, start destroy process group') dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index d84e2f7..06f2542 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -29,7 +29,6 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 device = torch.device('cuda', gpu) ymir_yolov5.to(device) @@ -159,7 +158,6 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - # gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 torch.cuda.set_device(gpu) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") @@ -167,7 +165,8 @@ def main() -> int: run(ymir_cfg, ymir_yolov5) # wait all process to save the mining result - dist.barrier() + if LOCAL_RANK != -1: + dist.barrier() if RANK in [0, -1]: results = [] @@ -180,7 +179,8 @@ def main() -> int: ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - print(f'rank: {RANK}, start destroy process group') + if LOCAL_RANK != -1: + print(f'rank: {RANK}, start destroy process group') dist.destroy_process_group() return 0 From 680147bb2f0a77c75ffe424b364e2143c40d2518 Mon Sep 17 00:00:00 2001 From: LuciferZap <92283801+LuciferZap@users.noreply.github.com> Date: Fri, 16 Sep 2022 15:34:02 +0800 Subject: [PATCH 129/150] use imagesize to get img w and h instead of read img --- det-yolov4-tmi/convert_label_ark2txt.py | 7 ++----- det-yolov4-tmi/cuda101.dockerfile | 2 +- det-yolov4-tmi/cuda112.dockerfile | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/det-yolov4-tmi/convert_label_ark2txt.py b/det-yolov4-tmi/convert_label_ark2txt.py index ae54b63..2e963f7 100755 --- a/det-yolov4-tmi/convert_label_ark2txt.py +++ b/det-yolov4-tmi/convert_label_ark2txt.py @@ -1,6 +1,6 @@ import os +import imagesize -import cv2 def _annotation_path_for_image(image_path: str, annotations_dir: str) -> str: @@ -30,10 +30,7 @@ def _convert_annotations(index_file_path: str, dst_annotations_dir: str) -> None # each_txtfile: annotation path each_imgpath, each_txtfile = each_img_anno_path.split() - img = cv2.imread(each_imgpath) - if img is None: - raise ValueError(f"can not read image: {each_imgpath}") - img_h, img_w, _ = img.shape + img_w, img_h = imagesize.get(each_imgpath) with open(each_txtfile, 'r') as f: txt_content = f.readlines() diff --git a/det-yolov4-tmi/cuda101.dockerfile b/det-yolov4-tmi/cuda101.dockerfile index 53aa01b..66273c3 100644 --- a/det-yolov4-tmi/cuda101.dockerfile +++ b/det-yolov4-tmi/cuda101.dockerfile @@ -15,7 +15,7 @@ RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_o RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py -RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu101==1.5.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm imagesize ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev diff --git a/det-yolov4-tmi/cuda112.dockerfile b/det-yolov4-tmi/cuda112.dockerfile index aac49de..bab5c7d 100644 --- a/det-yolov4-tmi/cuda112.dockerfile +++ b/det-yolov4-tmi/cuda112.dockerfile @@ -15,7 +15,7 @@ RUN wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_o RUN rm /usr/bin/python3 RUN ln -s /usr/bin/python3.7 /usr/bin/python3 RUN python3 get-pip.py -RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm +RUN pip3 install -i ${PIP_SOURCE} mxnet-cu112==1.9.1 numpy opencv-python pyyaml watchdog tensorboardX six scipy tqdm imagesize ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install -y libopencv-dev From 512194c471439544326c357484165136b3ce32fa Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Fri, 16 Sep 2022 17:14:45 +0800 Subject: [PATCH 130/150] save topk checkpoint weight files --- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 48 ++++++++++++++++++-- det-mmdetection-tmi/training-template.yaml | 3 +- det-yolov5-tmi/README.md | 1 + det-yolov5-tmi/models/common.py | 23 +++++++++- det-yolov5-tmi/models/experimental.py | 14 +++++- det-yolov5-tmi/start.py | 12 +++-- 6 files changed, 88 insertions(+), 13 deletions(-) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index a809af4..515c22a 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -14,6 +14,7 @@ from nptyping import NDArray, Shape, UInt8 from packaging.version import Version from ymir_exc import result_writer as rw +from ymir_exc.util import get_merged_config BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] @@ -101,11 +102,17 @@ def recursive_modify_attribute(mmcv_cfg: Config, attribute_key: str, attribute_v if val_interval > 0: val_interval = min(val_interval, mmcv_cfg.runner.max_epochs) else: - val_interval = max(1, mmcv_cfg.runner.max_epochs // 10) + val_interval = 1 mmcv_cfg.evaluation.interval = val_interval mmcv_cfg.evaluation.metric = ymir_cfg.param.get('metric', 'bbox') + + # save best top-k model weights files + # max_keep_ckpts <= 0 # save all checkpoints + max_keep_ckpts: int = int(ymir_cfg.param.get('max_keep_checkpoints', 1)) mmcv_cfg.checkpoint_config.interval = mmcv_cfg.evaluation.interval + mmcv_cfg.checkpoint_config.max_keep_ckpts = max_keep_ckpts + # TODO Whether to evaluating the AP for each class # mmdet_cfg.evaluation.classwise = True @@ -189,6 +196,30 @@ def write_ymir_training_result(last: bool = False, key_score: Optional[float] = _write_ancient_ymir_training_result(key_score) +def get_topk_checkpoints(files: List[str], k: int) -> List[str]: + """ + keep topk checkpoint files, remove other files. + """ + checkpoints_files = [f for f in files if f.endswith(('.pth', '.pt'))] + + best_pth_files = [f for f in checkpoints_files if osp.basename(f).startswith('best_')] + if len(best_pth_files) > 0: + # newest first + topk_best_pth_files = sorted(best_pth_files, key=os.path.getctime, reverse=True) + else: + topk_best_pth_files = [] + + epoch_pth_files = [f for f in checkpoints_files if osp.basename(f).startswith(('epoch_', 'iter_'))] + if len(epoch_pth_files) > 0: + topk_epoch_pth_files = sorted(epoch_pth_files, key=os.path.getctime, reverse=True) + else: + topk_epoch_pth_files = [] + + # python will check the length of list + return topk_best_pth_files[0:k] + topk_epoch_pth_files[0:k] + + +# TODO save topk checkpoints, fix invalid stage due to delete checkpoint def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[float] = None): if key_score: logging.info(f'key_score is {key_score}') @@ -209,6 +240,11 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f if last: # save all output file + ymir_cfg = get_merged_config() + max_keep_checkpoints = int(ymir_cfg.param.get('max_keep_checkpoints', 1)) + if max_keep_checkpoints > 0: + topk_checkpoints = get_topk_checkpoints(result_files, max_keep_checkpoints) + result_files = [f for f in result_files if not f.endswith(('.pth', '.pt'))] + topk_checkpoints rw.write_model_stage(files=result_files, mAP=float(map), stage_name='last') else: # save newest weight file in format epoch_xxx.pth or iter_xxx.pth @@ -245,13 +281,17 @@ def _write_ancient_ymir_training_result(key_score: Optional[float] = None): # eval_result may be empty dict {}. map = eval_result.get('bbox_mAP_50', 0) - WORK_DIR = os.getenv('YMIR_MODELS_DIR') - if WORK_DIR is None or not osp.isdir(WORK_DIR): - raise Exception(f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') + ymir_cfg = get_merged_config() + WORK_DIR = ymir_cfg.ymir.output.models_dir # assert only one model config file in work_dir result_files = [osp.basename(f) for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + max_keep_checkpoints = int(ymir_cfg.param.get('max_keep_checkpoints', 1)) + if max_keep_checkpoints > 0: + topk_checkpoints = get_topk_checkpoints(result_files, max_keep_checkpoints) + result_files = [f for f in result_files if not f.endswith(('.pth', '.pt'))] + topk_checkpoints + training_result_file = osp.join(WORK_DIR, 'result.yaml') if osp.exists(training_result_file): with open(training_result_file, 'r') as f: diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index dcb0ce9..902f435 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -7,5 +7,6 @@ config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' cfg_options: '' metric: 'bbox' -val_interval: 0 # <0 means use auto interval = max(1, max_epochs//10) +val_interval: 1 # <0 means evaluation every interval +max_keep_checkpoints: 1 # <0 means save all weight file, 1 means save last and best weight files, k means save topk best weight files and topk epoch/step weigth files port: 12345 diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index bc1d11a..c2ad3c2 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -39,3 +39,4 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERV - 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) - 2022/09/14: support change hyper-parameter `num_workers_per_gpu` +- 2022/09/16: support change activation, view [rknn](https://github.com/airockchip/rknn_model_zoo/tree/main/models/vision/object_detection/yolov5-pytorch) diff --git a/det-yolov5-tmi/models/common.py b/det-yolov5-tmi/models/common.py index d116aa5..b7b6d16 100644 --- a/det-yolov5-tmi/models/common.py +++ b/det-yolov5-tmi/models/common.py @@ -3,6 +3,7 @@ Common modules """ +import os import json import math import platform @@ -41,7 +42,17 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, k super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) - self.act = nn.Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + + activation = os.environ.get('ACTIVATION', None) + if activation is None: + self.act = nn.Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + else: + if activation.lower() == 'relu': + custom_act = nn.ReLU() + else: + warnings.warn(f'unknown activation {activation}, use Hardswish instead') + custom_act = nn.Hardswish() + self.act = custom_act if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) def forward(self, x): return self.act(self.bn(self.conv(x))) @@ -115,7 +126,15 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, nu self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) self.cv4 = Conv(2 * c_, c2, 1, 1) self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) - self.act = nn.SiLU() + activation = os.environ.get('ACTIVATION', None) + if activation is None: + self.act = nn.SiLU() + else: + if activation.lower() == 'relu': + self.act = nn.ReLU() + else: + warnings.warn(f'unknown activation {activation}, use SiLU instead') + self.act = nn.SiLU() self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): diff --git a/det-yolov5-tmi/models/experimental.py b/det-yolov5-tmi/models/experimental.py index 463e551..dbfecbf 100644 --- a/det-yolov5-tmi/models/experimental.py +++ b/det-yolov5-tmi/models/experimental.py @@ -2,6 +2,7 @@ """ Experimental modules """ +import os import math import numpy as np @@ -10,6 +11,7 @@ from models.common import Conv from utils.downloads import attempt_download +import warnings class CrossConv(nn.Module): @@ -59,14 +61,22 @@ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kern b = [c2] + [0] * n a = np.eye(n + 1, n, k=-1) a -= np.roll(a, 1, axis=1) - a *= np.array(k) ** 2 + a *= np.array(k)**2 a[0] = 1 c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b self.m = nn.ModuleList( [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) self.bn = nn.BatchNorm2d(c2) - self.act = nn.SiLU() + activation = os.environ.get('ACTIVATION', None) + if activation is None: + self.act = nn.SiLU() + else: + if activation.lower() == 'relu': + self.act = nn.ReLU() + else: + warnings.warn(f'unknown activation {activation}, use SiLU instead') + self.act = nn.SiLU() def forward(self, x): return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index bd4b537..8cd13b4 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -14,9 +14,7 @@ write_ymir_training_result) -def start() -> int: - cfg = get_merged_config() - +def start(cfg: edict) -> int: logging.info(f'merged config: {cfg}') if cfg.ymir.run_training: @@ -187,5 +185,11 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: datefmt='%Y%m%d-%H:%M:%S', level=logging.INFO) + cfg = get_merged_config() os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') - sys.exit(start()) + + # activation: relu + activation: str = cfg.param.get('activation', '') + if activation: + os.environ.setdefault('ACTIVATION', activation) + sys.exit(start(cfg)) From ef09dcf7e2b91e710d5856859864cc388ffc08ad Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 21 Sep 2022 14:29:30 +0800 Subject: [PATCH 131/150] fix 30min dist.barrier() time-out errors --- det-mmdetection-tmi/ymir_mining.py | 4 ++++ det-yolov5-tmi/mining/ymir_infer.py | 4 ++++ det-yolov5-tmi/mining/ymir_mining_aldd.py | 6 +++++- det-yolov5-tmi/mining/ymir_mining_cald.py | 8 ++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining.py index 787290e..506506d 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining.py @@ -283,6 +283,10 @@ def mining(self): beta = 1.3 mining_result = [] for asset_path in tbar: + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + img = cv2.imread(asset_path) # xyxy,conf,cls result = self.predict(img) diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 258af64..7ac0c4b 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -58,6 +58,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + with torch.no_grad(): pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 928c6e1..f013584 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -157,6 +157,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader miner = ALDD(ymir_cfg) for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + with torch.no_grad(): featuremap_output = ymir_yolov5.model.model(batch['image'].float().to(device))[1] unc_scores = miner.compute_aldd_score(featuremap_output, ymir_yolov5.img_size) @@ -200,7 +204,7 @@ def main() -> int: if LOCAL_RANK != -1: print(f'rank: {RANK}, start destroy process group') - dist.destroy_process_group() + dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index 06f2542..bd5df34 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -62,6 +62,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): dataset_size = len(images_rank) pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + with torch.no_grad(): pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) @@ -98,6 +102,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + if idx % monitor_gap == 0 and RANK in [-1, 0]: ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) From 20491affaa948114424dbcbbd2dd498d82204b2d Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 22 Sep 2022 13:52:35 +0800 Subject: [PATCH 132/150] update readme --- README.MD | 6 +++--- README_zh-CN.MD | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.MD b/README.MD index 1d834f2..1e06bd9 100644 --- a/README.MD +++ b/README.MD @@ -73,10 +73,10 @@ | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | -| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | -| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | online | +| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | local+online | - `online` pretrained weights may download through network @@ -156,7 +156,7 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## reference - [mining algorithm: CALD](https://github.com/we1pingyu/CALD/) -- [mining algorithm: ALDD](https://gitlab.com/haghdam/deep_active_learning) +- [mining algorithm: ALDD](https://gitlab.com/haghdam/deep_active_learning) - [yolov4](https://github.com/AlexeyAB/darknet) - [yolov5](https://github.com/ultralytics/yolov5) - [mmdetection](https://github.com/open-mmlab/mmdetection) diff --git a/README_zh-CN.MD b/README_zh-CN.MD index f22015a..ac6d483 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -73,10 +73,10 @@ | yolov4 | ? | ✔️ | ❌ | darknet + mxnet | ❌ | local | | yolov5 | ✔️ | ✔️ | ✔️ | pytorch | ✔️ | local+online | | yolov7 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | -| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | +| mmdetection | ✔️ | ✔️ | ✔️ | pytorch | ❌ | local+online | | detectron2 | ✔️ | ✔️ | ✔️ | pytorch | ❌ | online | | vidt | ? | ✔️ | ✔️ | pytorch | ❌ | online | -| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | online | +| nanodet | ✔️ | ✔️ | ❌ | pytorch_lightning | ❌ | local+online | - `online` 预训练权重可能在训练时通过网络下载 From ebc3f24e4e7d64f48e942d9126c2249a4f0ca212 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 26 Sep 2022 16:54:57 +0800 Subject: [PATCH 133/150] update readme --- README.MD | 64 +---------------- README_zh-CN.MD | 70 ++----------------- det-yolov4-tmi/training-template.yaml | 5 +- det-yolov5-tmi/mining/ymir_infer.py | 2 - det-yolov5-tmi/mining/ymir_mining_aldd.py | 3 +- det-yolov5-tmi/mining/ymir_mining_cald.py | 3 +- det-yolov5-tmi/train.py | 7 +- det-yolov5-tmi/utils/ymir_yolov5.py | 69 ++---------------- docs/ymir-executor-version.md | 19 +++++ .../img-man/training-template.yaml | 2 + official-docker-image.md | 61 ++++++++++++++++ 11 files changed, 104 insertions(+), 201 deletions(-) create mode 100644 docs/ymir-executor-version.md create mode 100644 official-docker-image.md diff --git a/README.MD b/README.MD index 1e06bd9..e326aeb 100644 --- a/README.MD +++ b/README.MD @@ -4,67 +4,7 @@ - [wiki](https://github.com/modelai/ymir-executor-fork/wiki) -## ymir-1.1.0 official image - -- [yolov4](https://github.com/modelai/ymir-executor-fork#det-yolov4-training) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi - ``` - -- [yolov5](https://github.com/modelai/ymir-executor-fork#det-yolov5-tmi) - - - [change log](./det-yolov5-tmi/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi - ``` - -- [mmdetection](https://github.com/modelai/ymir-executor-fork#det-mmdetection-tmi) - - - [change log](./det-mmdetection-tmi/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi - ``` - -- [detectron2](https://github.com/modelai/ymir-detectron2) - - - [change log](https://github.com/modelai/ymir-detectron2/blob/master/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi - ``` - -- [yolov7](https://github.com/modelai/ymir-yolov7) - - - [change log](https://github.com/modelai/ymir-yolov7/blob/main/ymir/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi - ``` - -- [vidt](https://github.com/modelai/ymir-vidt) - - - [change log](https://github.com/modelai/ymir-vidt/tree/main/ymir) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi - ``` - -- [nanodet](https://github.com/modelai/ymir-nanodet/tree/ymir-dev) - - - [change log](https://github.com/modelai/ymir-nanodet/tree/ymir-dev/ymir) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-nanodet-cu111-tmi - ``` +- [ymir executor](./official-docker-image.md) ## overview @@ -110,6 +50,8 @@ gpu: single GeForce GTX 1080 Ti --- +# build ymir executor + ## det-yolov4-tmi - yolov4 training, mining and infer docker image, use `mxnet` and `darknet` framework diff --git a/README_zh-CN.MD b/README_zh-CN.MD index ac6d483..3ca0c44 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -2,69 +2,9 @@ - [ymir](https://github.com/IndustryEssentials/ymir) -- [wiki](https://github.com/modelai/ymir-executor-fork/wiki) +- [说明文档](https://github.com/modelai/ymir-executor-fork/wiki) -## ymir-1.1.0 官方镜像 - -- [yolov4](https://github.com/modelai/ymir-executor-fork#det-yolov4-training) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi - ``` - -- [yolov5](https://github.com/modelai/ymir-executor-fork#det-yolov5-tmi) - - - [change log](./det-yolov5-tmi/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi - ``` - -- [mmdetection](https://github.com/modelai/ymir-executor-fork#det-mmdetection-tmi) - - - [change log](./det-mmdetection-tmi/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi - - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi - ``` - -- [detectron2](https://github.com/modelai/ymir-detectron2) - - - [change log](https://github.com/modelai/ymir-detectron2/blob/master/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi - ``` - -- [yolov7](https://github.com/modelai/ymir-yolov7) - - - [change log](https://github.com/modelai/ymir-yolov7/blob/main/ymir/README.md) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi - ``` - -- [vidt](https://github.com/modelai/ymir-vidt) - - - [change log](https://github.com/modelai/ymir-vidt/tree/main/ymir) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi - ``` - -- [nanodet](https://github.com/modelai/ymir-nanodet/tree/ymir-dev) - - - [change log](https://github.com/modelai/ymir-nanodet/tree/ymir-dev/ymir) - - ``` - docker pull youdaoyzbx/ymir-executor:ymir1.1.0-nanodet-cu111-tmi - ``` +- [ymir镜像](./official-docker-image.md) ## 比较 @@ -112,6 +52,8 @@ gpu: single GeForce GTX 1080 Ti --- +# 手动构建ymir镜像 + ## det-yolov4-tmi - yolov4的训练、挖掘与推理镜像,采用mxnet与darknet框架 @@ -145,7 +87,7 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## live-code-executor -- 可以通过`git_url`, `commit id` 或 `tag` 从网上clone代码到镜像并运行, 不推荐使用`branch`, 因为这样拉取的代码可能随时间变化, 实验结果不具备可重复性. +- 可以通过`git_url`, `commit id` 或 `tag` 从网上clone代码到镜像并运行, 不推荐使用`branch`, 因为这样拉取的代码可能随时间变化, 过程不具备可重复性. - 参考 [live-code](https://github.com/IndustryEssentials/ymir-remote-git) @@ -189,7 +131,7 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 关于cuda版本 -- 推荐主机安装11.2以上的cuda版本, 使用11.1及以上的镜像 +- 推荐主机安装高版本驱动,支持11.2以上的cuda版本, 使用11.1及以上的镜像 - GTX3080/GTX3090不支持11.1以下的cuda,只能使用cuda11.1及以上的镜像 diff --git a/det-yolov4-tmi/training-template.yaml b/det-yolov4-tmi/training-template.yaml index 5e75eaf..17810f6 100644 --- a/det-yolov4-tmi/training-template.yaml +++ b/det-yolov4-tmi/training-template.yaml @@ -4,9 +4,10 @@ image_width: 608 learning_rate: 0.0013 max_batches: 20000 warmup_iterations: 1000 -batch: 4 -subdivisions: 32 +batch: 64 +subdivisions: 64 shm_size: '16G' +export_format: 'ark:raw' # class_names: # - cat # gpu_id: '0,1,2,3' diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 7ac0c4b..7b86684 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -88,8 +88,6 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) - torch.cuda.set_device(gpu) torch.cuda.set_device(LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index f013584..5397372 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -181,8 +181,7 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - torch.cuda.set_device(gpu) + torch.cuda.set_device(LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") run(ymir_cfg, ymir_yolov5) diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index bd5df34..63022fc 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -166,8 +166,7 @@ def main() -> int: if LOCAL_RANK != -1: assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - torch.cuda.set_device(gpu) + torch.cuda.set_device(LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") run(ymir_cfg, ymir_yolov5) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 0d208bf..f84f343 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -39,8 +39,6 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process, write_ymir_training_result - import val # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model @@ -59,6 +57,7 @@ from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process, write_ymir_training_result LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -419,7 +418,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') weight_file = str(w / f'epoch{epoch}.pt') - write_ymir_training_result(ymir_cfg, map50=results[2], id=str(epoch), files=[weight_file]) + write_ymir_training_result(ymir_cfg, map50=results[2], id=f'epoch_{epoch}', files=[weight_file]) del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) @@ -468,7 +467,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear torch.cuda.empty_cache() # save the best and last weight file with other files in models_dir if RANK in [-1, 0]: - write_ymir_training_result(ymir_cfg, map50=best_fitness, id=str(epochs), files=[]) + write_ymir_training_result(ymir_cfg, map50=best_fitness, id=f'epoch_{epochs}', files=[]) return results diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index 675110c..e2b4c7d 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -171,79 +171,20 @@ def write_monitor_logger(self, stage: YmirStage, p: float): percent=get_ymir_process(stage=stage, p=p, task_idx=self.task_idx, task_num=self.task_num)) -def convert_ymir_to_yolov5(cfg: edict): +def convert_ymir_to_yolov5(cfg: edict, out_dir: str = None): """ convert ymir format dataset to yolov5 format generate data.yaml for training/mining/infer """ - data = dict(path=cfg.ymir.output.root_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) + out_dir = out_dir or cfg.ymir.output.root_dir + data = dict(path=out_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) for split, prefix in zip(['train', 'val', 'test'], ['training', 'val', 'candidate']): src_file = getattr(cfg.ymir.input, f'{prefix}_index_file') if osp.exists(src_file): - shutil.copy(src_file, f'{cfg.ymir.output.root_dir}/{split}.tsv') + shutil.copy(src_file, f'{out_dir}/{split}.tsv') data[split] = f'{split}.tsv' - with open(osp.join(cfg.ymir.output.root_dir, 'data.yaml'), 'w') as fw: + with open(osp.join(out_dir, 'data.yaml'), 'w') as fw: fw.write(yaml.safe_dump(data)) - - -def write_ymir_training_result(cfg: edict, map50: float = 0.0, epoch: int = 0, weight_file: str = ""): - YMIR_VERSION = os.getenv('YMIR_VERSION', '1.2.0') - if Version(YMIR_VERSION) >= Version('1.2.0'): - _write_latest_ymir_training_result(cfg, float(map50), epoch, weight_file) - else: - _write_ancient_ymir_training_result(cfg, float(map50)) - - -def _write_latest_ymir_training_result(cfg: edict, map50: float, epoch: int, weight_file: str) -> int: - """ - for ymir>=1.2.0 - cfg: ymir config - map50: map50 - epoch: stage - weight_file: saved weight files, empty weight_file will save all files - - 1. save weight file for each epoch. - 2. save weight file for last.pt, best.pt and other config file - 3. save weight file for best.onnx, no valid map50, attach to stage f"{model}_last_and_best" - """ - model = cfg.param.model - # use `rw.write_training_result` to save training result - if weight_file: - rw.write_model_stage(stage_name=f"{model}_{epoch}", files=[osp.basename(weight_file)], mAP=float(map50)) - else: - # save other files with - files = [ - osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*')) if not f.endswith('.pt') - ] + ['last.pt', 'best.pt'] - - training_result_file = cfg.ymir.output.training_result_file - if osp.exists(training_result_file): - with open(training_result_file, 'r') as f: - training_result = yaml.safe_load(stream=f) - - map50 = max(training_result.get('map', 0.0), map50) - rw.write_model_stage(stage_name=f"{model}_last_and_best", files=files, mAP=float(map50)) - return 0 - - -def _write_ancient_ymir_training_result(cfg: edict, map50: float) -> None: - """ - for 1.0.0 <= ymir <=1.1.0 - """ - - files = [osp.basename(f) for f in glob.glob(osp.join(cfg.ymir.output.models_dir, '*'))] - training_result_file = cfg.ymir.output.training_result_file - if osp.exists(training_result_file): - with open(training_result_file, 'r') as f: - training_result = yaml.safe_load(stream=f) - - training_result['model'] = files - training_result['map'] = max(float(training_result.get('map', 0)), map50) - else: - training_result = {'model': files, 'map': float(map50), 'stage_name': cfg.param.model} - - with open(training_result_file, 'w') as f: - yaml.safe_dump(training_result, f) diff --git a/docs/ymir-executor-version.md b/docs/ymir-executor-version.md new file mode 100644 index 0000000..247ee13 --- /dev/null +++ b/docs/ymir-executor-version.md @@ -0,0 +1,19 @@ +# ymir1.3.0 (2022-09-30) + +- 支持分开输出模型权重,用户可以采用epoch10.pth进行推理,也可以选择epoch20.pth进行推理 + +- 训练镜像需要指定数据集标注格式, ymir1.1.0默认标注格式为`ark:raw` + +- 训练镜像可以获得系统的ymir版本,方便镜像做兼容 + +## 辅助库 + +- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) 采用ymir1.3.0分支 + +- [ymir-executor-verifier]() 镜像检查工具 + +# ymir1.1.0 + +- [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/ymir-dataset-zh-CN.md) + +- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) 采用ymir1.0.0分支 diff --git a/live-code-executor/img-man/training-template.yaml b/live-code-executor/img-man/training-template.yaml index 865b40b..0ac8798 100644 --- a/live-code-executor/img-man/training-template.yaml +++ b/live-code-executor/img-man/training-template.yaml @@ -6,3 +6,5 @@ gpu_id: '0' task_id: 'default-training-task' pretrained_model_params: [] class_names: [] +export_format: 'ark:raw' +shm_size: '32G' diff --git a/official-docker-image.md b/official-docker-image.md new file mode 100644 index 0000000..a01a91a --- /dev/null +++ b/official-docker-image.md @@ -0,0 +1,61 @@ +# official docker image + +- [yolov4](https://github.com/modelai/ymir-executor-fork#det-yolov4-training) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu112-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov4-cu101-tmi + ``` + +- [yolov5](https://github.com/modelai/ymir-executor-fork#det-yolov5-tmi) + + - [change log](./det-yolov5-tmi/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov5-cu102-tmi + ``` + +- [mmdetection](https://github.com/modelai/ymir-executor-fork#det-mmdetection-tmi) + + - [change log](./det-mmdetection-tmi/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu111-tmi + + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-mmdet-cu102-tmi + ``` + +- [detectron2](https://github.com/modelai/ymir-detectron2) + + - [change log](https://github.com/modelai/ymir-detectron2/blob/master/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-detectron2-cu111-tmi + ``` + +- [yolov7](https://github.com/modelai/ymir-yolov7) + + - [change log](https://github.com/modelai/ymir-yolov7/blob/main/ymir/README.md) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-yolov7-cu111-tmi + ``` + +- [vidt](https://github.com/modelai/ymir-vidt) + + - [change log](https://github.com/modelai/ymir-vidt/tree/main/ymir) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu111-tmi + ``` + +- [nanodet](https://github.com/modelai/ymir-nanodet/tree/ymir-dev) + + - [change log](https://github.com/modelai/ymir-nanodet/tree/ymir-dev/ymir) + + ``` + docker pull youdaoyzbx/ymir-executor:ymir1.1.0-nanodet-cu111-tmi + ``` From 92f4c4bdf6a65d3f2bd73a6ce3734af191ab5ed1 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 28 Sep 2022 15:34:47 +0800 Subject: [PATCH 134/150] update infer process --- det-yolov5-tmi/start.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 8cd13b4..daaaebb 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -176,7 +176,7 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: subprocess.run(command.split(), check=True) monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) if __name__ == '__main__': From 122862f537b4e66125373cbe594d79ce2a17b4bd Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 12 Oct 2022 17:10:20 +0800 Subject: [PATCH 135/150] fix barrier --- det-yolov5-tmi/README.md | 1 + det-yolov5-tmi/mining/data_augment.py | 19 ++++++--- det-yolov5-tmi/mining/mining_cald.py | 17 ++++---- det-yolov5-tmi/mining/util.py | 5 ++- det-yolov5-tmi/mining/ymir_infer.py | 15 +++++--- det-yolov5-tmi/mining/ymir_mining_aldd.py | 15 ++++++-- det-yolov5-tmi/mining/ymir_mining_cald.py | 17 ++++---- det-yolov5-tmi/models/common.py | 47 +++++++++++++++-------- det-yolov5-tmi/start.py | 5 ++- det-yolov5-tmi/train.py | 3 +- det-yolov5-tmi/utils/ymir_yolov5.py | 9 +++-- 11 files changed, 94 insertions(+), 59 deletions(-) diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index c2ad3c2..6bf9151 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -40,3 +40,4 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERV - 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) - 2022/09/14: support change hyper-parameter `num_workers_per_gpu` - 2022/09/16: support change activation, view [rknn](https://github.com/airockchip/rknn_model_zoo/tree/main/models/vision/object_detection/yolov5-pytorch) +- 2022/10/09: fix dist.destroy_process_group() hang diff --git a/det-yolov5-tmi/mining/data_augment.py b/det-yolov5-tmi/mining/data_augment.py index 42af914..cfafaa7 100644 --- a/det-yolov5-tmi/mining/data_augment.py +++ b/det-yolov5-tmi/mining/data_augment.py @@ -8,6 +8,7 @@ import cv2 import numpy as np from nptyping import NDArray + from utils.ymir_yolov5 import BBOX, CV_IMAGE @@ -22,11 +23,13 @@ def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: ''' n1 = boxes1.shape[0] n2 = boxes2.shape[0] - max_xy = np.minimum(np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + max_xy = np.minimum( + np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) - min_xy = np.maximum(np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + min_xy = np.maximum( + np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) @@ -49,8 +52,12 @@ def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ return image, bbox -def cutout(image: CV_IMAGE, bbox: BBOX, cut_num: int = 2, fill_val: int = 0, - bbox_remove_thres: float = 0.4, bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: +def cutout(image: CV_IMAGE, + bbox: BBOX, + cut_num: int = 2, + fill_val: int = 0, + bbox_remove_thres: float = 0.4, + bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: ''' Cutout augmentation image: A PIL image diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py index 1588665..ab458ff 100644 --- a/det-yolov5-tmi/mining/mining_cald.py +++ b/det-yolov5-tmi/mining/mining_cald.py @@ -8,16 +8,17 @@ import cv2 import numpy as np from easydict import EasyDict as edict -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate from nptyping import NDArray from scipy.stats import entropy from tqdm import tqdm -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 + def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: if len(result) > 0: @@ -33,6 +34,7 @@ def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: class MiningCald(YmirYolov5): + def __init__(self, cfg: edict): super().__init__(cfg) @@ -101,8 +103,10 @@ def mining(self) -> List: idx += 1 if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, - task_idx=self.task_idx, task_num=self.task_num) + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=self.task_idx, + task_num=self.task_num) monitor.write_monitor_logger(percent=percent) return mining_result @@ -114,10 +118,7 @@ def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], D return the predict result and augment bbox. """ - aug_dict = dict(flip=horizontal_flip, - cutout=cutout, - rotate=rotate, - resize=resize) + aug_dict = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) aug_bboxes = dict() aug_results = dict() diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py index 5c9b669..54ef5dd 100644 --- a/det-yolov5-tmi/mining/util.py +++ b/det-yolov5-tmi/mining/util.py @@ -19,10 +19,11 @@ import cv2 import numpy as np import torch.utils.data as td -from mining.data_augment import cutout, horizontal_flip, resize, rotate -from mining.mining_cald import get_ious from scipy.stats import entropy from torch.utils.data._utils.collate import default_collate + +from mining.data_augment import cutout, horizontal_flip, resize, rotate +from mining.mining_cald import get_ious from utils.augmentations import letterbox LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 7b86684..61d305f 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -12,13 +12,14 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file from tqdm import tqdm -from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config +from mining.util import YmirDataset, load_image_file +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 + LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) @@ -42,6 +43,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] + max_barrier_times = len(images) // max(1, WORLD_SIZE) // batch_size_per_gpu # origin dataset images_rank = images[RANK::WORLD_SIZE] origin_dataset = YmirDataset(images_rank, load_fn=load_fn) @@ -59,7 +61,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader for idx, batch in enumerate(pbar): # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1: + if LOCAL_RANK != -1 and idx < max_barrier_times: dist.barrier() with torch.no_grad(): @@ -123,8 +125,9 @@ def main() -> int: ymir_infer_result[img_file] = anns rw.write_infer_result(infer_result=ymir_infer_result) - print(f'rank: {RANK}, start destroy process group') - dist.destroy_process_group() + if LOCAL_RANK != -1: + print(f'rank: {RANK}, start destroy process group') + # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 5397372..8151a1b 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -17,18 +17,20 @@ import torch.nn.functional as F import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config +from mining.util import YmirDataset, load_image_file +from utils.ymir_yolov5 import YmirYolov5 + LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) class ALDD(object): + def __init__(self, ymir_cfg: edict): self.avg_pool_size = 9 self.max_pool_size = 32 @@ -138,6 +140,8 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu + # origin dataset if RANK != -1: images_rank = images[RANK::WORLD_SIZE] @@ -158,7 +162,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): miner = ALDD(ymir_cfg) for idx, batch in enumerate(pbar): # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1: + if LOCAL_RANK != -1 and idx < max_barrier_times: dist.barrier() with torch.no_grad(): @@ -188,7 +192,9 @@ def main() -> int: # wait all process to save the mining result if LOCAL_RANK != -1: + print(f'rank: {RANK}, sync start before merge') dist.barrier() + print(f'rank: {RANK}, sync finished before merge') if RANK in [0, -1]: results = [] @@ -203,7 +209,8 @@ def main() -> int: if LOCAL_RANK != -1: print(f'rank: {RANK}, start destroy process group') - dist.destroy_process_group() + # dist.destroy_process_group() + print(f'rank: {RANK}, finished destroy process group') return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index 63022fc..343a501 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -14,13 +14,14 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, update_consistency) -from tqdm import tqdm from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -45,6 +46,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu # origin dataset images_rank = images[RANK::WORLD_SIZE] origin_dataset = YmirDataset(images_rank, load_fn=load_fn) @@ -63,7 +65,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader for idx, batch in enumerate(pbar): # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1: + if LOCAL_RANK != -1 and idx < max_barrier_times: dist.barrier() with torch.no_grad(): @@ -98,14 +100,11 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): pin_memory=ymir_yolov5.pin_memory, drop_last=False) + # cannot sync here!!! dataset_size = len(results) monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader for idx, batch in enumerate(pbar): - # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1: - dist.barrier() - if idx % monitor_gap == 0 and RANK in [-1, 0]: ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) @@ -188,7 +187,7 @@ def main() -> int: if LOCAL_RANK != -1: print(f'rank: {RANK}, start destroy process group') - dist.destroy_process_group() + # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/models/common.py b/det-yolov5-tmi/models/common.py index b7b6d16..35bbc69 100644 --- a/det-yolov5-tmi/models/common.py +++ b/det-yolov5-tmi/models/common.py @@ -3,9 +3,9 @@ Common modules """ -import os import json import math +import os import platform import warnings from collections import OrderedDict, namedtuple @@ -246,11 +246,12 @@ class GhostBottleneck(nn.Module): def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride super().__init__() c_ = c2 // 2 - self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw - DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw - GhostConv(c_, c2, 1, 1, act=False)) # pw-linear - self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), - Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() + self.conv = nn.Sequential( + GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, + act=False)) if s == 2 else nn.Identity() def forward(self, x): return self.conv(x) + self.shortcut(x) @@ -279,9 +280,9 @@ def __init__(self, gain=2): def forward(self, x): b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' s = self.gain - x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) + x = x.view(b, s, s, c // s**2, h, w) # x(1,2,2,16,80,80) x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) - return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) + return x.view(b, c // s**2, h * s, w * s) # x(1,16,160,160) class Concat(nn.Module): @@ -334,7 +335,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') - check_requirements(('opencv-python>=4.5.4',)) + check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') @@ -345,7 +346,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): session = onnxruntime.InferenceSession(w, providers=providers) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') - check_requirements(('openvino-dev',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/ + check_requirements(('openvino-dev', )) # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie core = ie.IECore() if not Path(w).is_file(): # if not *.xml @@ -400,9 +401,11 @@ def wrap_frozen_graph(gd, inputs, outputs): Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') - delegate = {'Linux': 'libedgetpu.so.1', - 'Darwin': 'libedgetpu.1.dylib', - 'Windows': 'edgetpu.dll'}[platform.system()] + delegate = { + 'Linux': 'libedgetpu.so.1', + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll' + }[platform.system()] interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # Lite LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') @@ -573,8 +576,13 @@ def forward(self, imgs, size=640, augment=False, profile=False): t.append(time_sync()) # Post-process - y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes, - agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS + y = non_max_suppression(y if self.dmb else y[0], + self.conf, + iou_thres=self.iou, + classes=self.classes, + agnostic=self.agnostic, + multi_label=self.multi_label, + max_det=self.max_det) # NMS for i in range(n): scale_coords(shape1, y[i][:, :4], shape0[i]) @@ -615,8 +623,13 @@ def display(self, pprint=False, show=False, save=False, crop=False, render=False label = f'{self.names[int(cls)]} {conf:.2f}' if crop: file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None - crops.append({'box': box, 'conf': conf, 'cls': cls, 'label': label, - 'im': save_one_box(box, im, file=file, save=save)}) + crops.append({ + 'box': box, + 'conf': conf, + 'cls': cls, + 'label': label, + 'im': save_one_box(box, im, file=file, save=save) + }) else: # all others annotator.box_label(box, label, color=colors(cls)) im = annotator.im diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index daaaebb..c250745 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -5,14 +5,15 @@ import cv2 from easydict import EasyDict as edict -from models.experimental import attempt_download -from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process, write_ymir_training_result) +from models.experimental import attempt_download +from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file + def start(cfg: edict) -> int: logging.info(f'merged config: {cfg}') diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index f84f343..6b5e8ee 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -39,6 +39,8 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process, write_ymir_training_result + import val # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model @@ -57,7 +59,6 @@ from utils.metrics import fitness from utils.plots import plot_evolve, plot_labels from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first -from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process, write_ymir_training_result LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index e2b4c7d..e58c81d 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -11,16 +11,17 @@ import torch import yaml from easydict import EasyDict as edict -from models.common import DetectMultiBackend from nptyping import NDArray, Shape, UInt8 from packaging.version import Version -from utils.augmentations import letterbox -from utils.general import check_img_size, non_max_suppression, scale_coords -from utils.torch_utils import select_device from ymir_exc import monitor from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process +from models.common import DetectMultiBackend +from utils.augmentations import letterbox +from utils.general import check_img_size, non_max_suppression, scale_coords +from utils.torch_utils import select_device + BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] From bb0b572722c95716bd6552f4ce3a2efcbb11c4c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=BD=B3=E6=AC=A3?= Date: Thu, 13 Oct 2022 13:48:24 +0800 Subject: [PATCH 136/150] Add files via upload --- docs/ymir-docker-develop.drawio.png | Bin 0 -> 56330 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/ymir-docker-develop.drawio.png diff --git a/docs/ymir-docker-develop.drawio.png b/docs/ymir-docker-develop.drawio.png new file mode 100644 index 0000000000000000000000000000000000000000..706a95e4e3d9e5a1ac665e4d73163aa49bc002ae GIT binary patch literal 56330 zcmeFYcT`i|*DgvC5KvJB3rH1&sPq5{NDUzgp@tf&p@fKy-AB3`dy(%d4H?%4vzp%ECRI9DGThMBoyz6FePV@vcsefA7gk%gRc~NK44bnMi|0T6f6L>XorwQ5BrhoqG$3y7N_6z3_yCvsCcqCFX}~O}30wg;6oC4Nm;MT1 z5b#UG%ge*b+{qs8>MJU%Eib1iDW?FK#nDI;JzY^*7;x?3>h1*mLO40NdmdJCB;hEM zuAV?~8DJQaGJpx@Lh>X4g|ua)fxaZc5U7Hr3|J1h^Iwm4SX%ba_>6IQ9Le=Rx;uPo zCtsZN-zO%ydqNz1wY*()v`7|?3eI+DN95lpB|DLPT!Cr&Gim>JepuNzz{}~c)zOLU z>Hv&HR#XlSOtzyd&Y6TG07YE?nNM#cO>&?S21W^h`d}>m90_C-(%;sJIN%ZfG%X5O z6xDzo{sSft_%8~FifYJ+YRdm@SJ5mGZ?B*Sh8xP0P=2zGGB7tyXJcbSdwaYi-qA(T zn`EpgXNgB?V(~`KUeYpN?nFOQAVka2QwytOXlxFK;q=Y4&A`A6Xy_}F$pi!fYiOdO zh{uS^YC8LA2S7}LTmEo;mq33nu!6rLSWef#2khV);N(ZdQ}oI1?w%fQ@)R^)(@Wmd zOvBm98CW`+7`TQ4TwlW-qOI+K({tB>X}boHjJzD3-L(D9-A%R80XP!Q9AXBbTkd* z6~H=JV+t8f!XagKDIOSkQ&*@w85pe}PS#Hjj@Co_7-R8B1yeb&zde-T?(YEwZt7|S zGp9|qcf^xjWb_?8kWN@L7nBRuN5{uU*3{V$1*UjnFs8be&SYme1t<#hbJv$QawljQ zdf7WzLbP=;@-99u#sOdl2S0cqmOycnMw#lMWCEnI+Hg;tn>1Ee5m>F7IAa9f2uZS# z^)yABno^7vyirhZe*;~trIv!LfsB_vR1rpTlE%6lIs5zhA)#muPZyYs0to`vf_WPH z`}#@i0yC)%M<1?kBWWXZO;41fv!NdFmmI>L=#po!8h{&*8< zB1~3SQNayPa6I>N^5%H{D4kn;hK)_p3;`O zfiBv>pAZsW+uz~vP0Gn*u!^q!8gdp0pdE83h?gNAxbUE8>*yHE1(WWVjoBVl-7xRJfCqQ99rP}(2q;E6)Y5s}6^ zP_hx!Q%BB8fnuORax#oa7DVWFZD7C!Ivn&W8)hoaEsnZAtW_U?BnKI3H;RO+_7F894BJeme zFlJ3%X@Y^5JXs&-rb98)C!sX}n}rwfFaZQhFR(ovi!zorm$OG3E9yHCOq~J<7%c=2 zp-pguDcJjBWxSAzSOsG_2X}4Y}a$Dj54}X$5$I$pNyO-Ujx1I6c=uc@o^uQXZ^;rNA6L zJW#q$0ZuYLa2c|RhYJB`rsry6ig1BS``P0h^(Zn3ih-OD$y^cZ1*`?x0HkLiOhW^Y zCCb1}bpo_?9f2cO+g?r+D^2#0@o)No2+AGW{3$i^duSU!i<5baZ_+bxZ-32?PZ;Sb**XdqUWoJGr$?+ zy!DJ6@Gb$yK2W%>f+5*S3rx0<39yH_NZWfKb`RW_v#^Jn`4J9RpuL`q0Y%>tN0Eh? z%gB=Cz;ZhBihiyZit>JD9%im)W&|u&*VxV67y-jNX}Vbujpd0%H@Fr--dNWN>!*c= zYGW`Mumu@wDy^Vx?r(3ZjX?NQWDSfQwTwv4<}f#uvxTWS;CD1NG)3GpVH zxe*<3M2HU(@Il%nqKuZCjy71!#KQ|=ZtAW`qUZy|am0~P#$acXeV{=A($ztc=!$kV zbTgMF>Uqg~8)j%Qy$PDPWNVteZ355lhk}%7fu9z(gouU|KTTaEyWd;TT*j zH4M$n{uT4$y^q7?bSLIHIeow40N>v#hRXpo@{CvAYRTQxW0mp$h{;oltNcH*>&; zqJU1l@iKs$*VF-PdlO&?sHK@F(oF{CNJN1lrur6!a`-?!Y57310Ujq0SCn@)wgHZE3q;Eh~g^B@R+#Z@;Sq|cGg+fT(m+? zo@_k#fu7}#hS%+oRMtspS?P<^Ya@)3mDO;Z>h2x9z4DRm+tsNvqpgw3 zuTd@ioJ1BEKDIH72pN^%W0eSB7A9HqUxIv?n4Pqg0QUS-tW0kFmipm|vo?*?wjyKi z_2W-2alJrBh}jkismH)iM1V5b=rC-KBs$Eu3&)W%?Hpxu7&@wbirV%26%65t43GH2 zx#SRxm~X2vLJb!k?dLd$1tSu)38h2Iq%r{a_6++Zndm^|EygaOq0kdRL%DQ7ir;{{+6=1+EOPl>!e^Z<5KFtcvZ1mu_Ucr2^C66gI<}sLHf2pvg*WJzr zeV6}$@qQX4fd>rv<}AU5&Y=@zTDf?c?ZRu%q52SySwd%1eq~F5(Mw)3KA%od6!!K) zBRy#ACYM~x)AGZm_U&%I_(@)5PYs2`^qxe{m6CY`U-QH>Y#TvCcdApyZKFjVSSmCo?(7)woEOrE--V(cfJ?9_w7`I_Wjr= zJJY`l_Mw(?1s<}`?J6>IyY_Dfu1;%kqN?h4$<=uy{JmR^DXX*XEv<}q&xrK;uFU5i zUof6}gXv7Z_gv%sZ8vLs4n%Zrq8{q!^X_5qPx66`NSPf~^|^i!Ffm4U{sz~yQq_JU zZ{E3|%hxyK8aRHA$4ntwp}ry`aO0ZYTkTZx9QQYmo?TVbP6@-zPBR1j-EY9*M4I(h z!w?#WnSdFvzaZ4O6(0IK+Bp$Uo8V6aSGCbf(z~`6<)4_HyQQ<6P+K zTLjqUr{IXHvq{&FAEoxatAn{(HF*{8*}o!iQ5w-|H5Gy65$D_IBN83rl(ox}rgNl> zWG+mN_S85Vyt~bfHZyEGZF%{p_w-x8-Fqz_`0+diK~c5p?Cx*eQQS0--WM7CL0S@sMuA;>t>G5>kSQ*1m0=}$iXP27NuJHZ7YVKLF*m(*LP|a6 z(OGtSjFyn|D(t{UFo&>HzS=oo()Ny_;!A6^m_YCi=D2fN;m(~griJ_n`+jrC=y5Zq4bYdZSf6f4y}*ZqPJoK7=Y%Q4{Q#p4O$O{bW} zXkJ@aO6oMdR<>l7S`8IZslbfqgDEgPqXc6#?Yhd;9lMjoZw zuOqn4k*%ijPLFu+GK-88D&ZJ^3WrqEXIXtEzp6PxrYD*q^@WgVXRM&wI*r#jQH_PB z)e{uy6f`Rsnc{i-L~X2O)o!7dDytw9)qdNV4}tWSjF^Oi0!`jt5sQ@Xe4uaadX=@{ ztX-&)eYChP()%}U(f^z!WXNf!oUgdaz{8<2pY3ucrZ`LDLeH>y_3qjBs-(E>@WkN! zSk+fXf@JDQc@qnj0r};39IWIt0y(_2^gd|pmvX1!+Q8N30hLP*I>8)uJd0rC zx7PpT7XOb^|KCMC_YIH{hD&DG@u&)a4(SiAGPGYzOe3^65BYsYz!&a`=KkXhW%zh+ zGci4(;|gamq+`>vpjX$qmCn-s^dG|qEMQ2hyZcguUYzc*Skwz3R6DB7kqi6R@Cq5uh|x}w`E~F7Yj6e(-s_Sdng8iW@Es6_3C~`8c;a6}Ghi5*wYhiv zumPYS^;_I@G_9Wj(tQ6K(g4GUo|<#Ke+Q}_ar6ouGWx~Jb)|m|kVC^Oij2bFfrc}j zGNYq%;wIku{%i0)G&nEVd;A?J9h2}g!1=#xzB%=;VdT)zJlOd2{~r@V4NvSVOlb^A zJI8zK`_(CCzEYFxZE_Wk*wdX)!qeIAP04)He}A2u>jxL|M&gL-ghtxK%Z4jINa`^W zp64-vewP(o=uMx7tFT>LvOG?YnO$PJx&+K2M(`CQ)&92&;QB)I2`NClRI^L(P6uLA zV*xycF%Z+|3b^$FL06+>*c%>oowqGOblw9*^be=*^pb(-^JSC33uY>2(FZ8104xst z_S143Uh&KU-ODMqx%(BXil;wh1ECBT zw$FE!5v;-1h6z3|l=qVJgQ=o0$;DC7Zu;aYq{RmdsQ4){Jq{Cfi?e# z=x$Y>8Ke#Fv0TbJ-cMY}m?%;h3)!!-_WYDT;G*Xg$qQ5Vl3BQ*Eg5YHf-pLp9s- zTjN)2s%xfd=g+qPN@xYwdqnZH43w>$UOR zuYlB}l~($taJHj&$S}@UC4sq#qkhytNp)#o&fr?Ms@mwY%)+CvzIB4@bfxRpWSOMw zsln-sZ*j#V)2+gii*K@+I`-4nX?+)+ zAR})xr+OzxlTY-2^;sSJJ!04XZs%;_XRf$x5TIhaj#L)cGPL_vbJfy z&}?5tT&&9cxxnf8yrgtfpQ6v_^>7EzY|Ad?l=rmYh1FxC1M^eSOe6*nC%0>FZ2%`t z`ugJgTD_We))&9V`BaumDJ|yUr_Ub7ESxK%(^h{vhvnn0yjPNzWMX&y7JMw{LB)`D zQFu6gKD_C)soJqiZDI$(U*oKdU0c`K_{TyDN~G#0TTkq#rM!S#eqVyhCg#Z)&l@So z*4Ea$!L!09AQc{=nT6A5#0`s{XQXYOrzSKNWMb_!U~C)32cD0^oCR^}KLk2THc_=J|Re;>O& z_cfG1@3i_RZm*R^KvW_!K!Z(c2~6k`nHlHjPZ6-mNqh4|RXZ=!v6i@>{(W`jdD@gdkf|xG{^OgxV9~0L0@Fg%estQ^Vp|2rw1-OXDjqH2hS9R@OgHI55dlrUPy5z>wWOl7jH?Z~@Eaan`L&2qU9fhu_K8Y);!Ip6xbHgiA*7A}h}| z`=f+`Qh9pe{*n_+`cZ8{jmK}L@#tvX%D#5YR=@JP)p-*LNu)|LG}-ChEiSccB@IIl zepMpmdvQj#u;RAAGI{=quvv9j`#6o38kd{Nh|%TN+Or0oRH989#Xq-FT@4nWp=M2F zPa3hR6b=X5@o!RMQ*#C3$=aVA?r_PpK0DOdN9n#B2Ocr+YD0CHhsUwqx|nOI99CZ8G>Lv)cM*1eeNJ$Lju3gt@Z_ z{Eb`NYBcB~Na+>>W@M=@+~`(`dOmd|nQo@6R*3peIdJv)r(+6%^(+YC%|=vZ0SO}; z!PNi-^-Ce}kg%L_RxvTr#jgqaH*eo5sy!Qu(my5)8a5bv5%gJ}hU9v8MXa&qeFrP3 z4G*NRm$IaB+8B|9HwPqJrVY^m#pj{@mtVd@*@gDjILGOTC%fFTVkpqUK(-S5#%0sg zzd>cIZg73Ei5Izk-k~s!OI_z@SdNQYEZZ$MP~Zs`#JOE9jQ_^52FLU8th3EF*GjLB zmeq)zMz&Hm7A3Rm6$f=@1}%#OVRl&wuWZt4g3o<)oH*GnRqs&S`%m_!HVJrO~cy|cH{5}$Cx zgK(gemua`0e1FdAy#6>R2vzyiSpLckGj-#A|2MqeIjTM0=y%yl-3Mw_Bf0AbMJIqz zSKhw7@O=43V!AQ~Gy3s9K~Z3xn+gOhYieB!EjZlJjPuJ$AC0dA*5GTPw$qVY3;-;F z=@5bobCh=e)V;spzQUNAqS|j;o|%>UoY^}(QAkwrN8259G1QOeR*HVNn-qtFVdxae z92kcR*ecqPgY5_aNs3B?=7>O6{5vxr7Sl6nq)cSU0XBAbX`GbJy_UBBhx&$Nb;THfL`U71G+US3ksOZS_+5yqguYZ%jHVvJS5S22Q)*A|*gSwyigPz%ZeE;|cfUYLI8z|7vNOD*uRBH6J z++(6`8hxqJtrvuoRBPw`Wq6Y)go@G0_tRG<$xai;GFPW---}Pyude&r`6*SLE+ZTm z+wIM4h<`!7!HJFK-sIx%HZ`|_n_GJ`4S2mjj{ADM{HeMQ5W!(}PeQLmE^O1&9EF}l znnIqbc>PjuNk6`IgDW6>d>Pj?58P2jy;|ZH1J7Jf(Bpk%HN9}-E%p_@wa>Fb&(F`V z$2UUBQ1B#^Fe}Jb@l4ZH@%H`q!EDbiy!cHf>l6|`G_x~}U*#Iyd!KTI0mJa63Tvk`D4?WE97ix`vf6Stv4n9>szEOH5 z?CIcwk8$RQQ?s_3)1TT`9`V=I_ZKvC4WD*i8-S7q?^s_K-^;E36-!Xvje76eSeg`V z^sDOPzM@J{&x+t(2`SF)n!k$}{3VXA^2;D9W zJzctQ*v*tI@n&11ZhPvPwV7nys8#g&@Ks~$^a_p|Xk4G|O0T8gjBMtGCb6%!!CUZY z5ew&rBJ<})cQTf8Zj+Xu^^aWLBMJ2MHsvi&lWBd#xoe8r^QT6F$hEURye0kC&vy(P z9(;Dc7OXUX?<)_FrJ1f074lZ}o*IuDa3+RS`%Y={DtH`yf8;eQQ!wZx6zN^_3Zd`o zHwc-XXwy)OPi$NMVqz+zO-Tf0<&!Dx&f{{YD}gqEG@^bJe6ufVe-%$l9zd&BD^pm zos*|-`T~B`WE*Ywj`Qc&kG54mTaMqrj4xLXuzy&G4t1BDxQ;xvxx@&%kQDEdWE?g@ z=&Eknnvh}EL+%3s@(eGgQZIzUu^_m;m80^KsizzKs==_Hvf)3FQKUAxCY#1p?cPn)NqO+@ z>r4%LmG@?PO^{>3od*yn3c7lS0ytnt3bnol(d%bb4!pJKjKz6zendCXCcmfkdz<7V z|G5zq;FFD!D0%)=$_9`k9}HeT!hVqzRPC_Jd$;a{-IXJVT;7YE&c;_9XP_WkGv#c$ zsa>he8PpRT(RS#S z0B2(Qe(=WZCnXimx8qy7@xEx4=A(MKV%@*DlO2qM{P@xdQ<+YMHw6wVla}Q8tqjtb zXpS>}9qgEh&l)<^n^LrLAeA31r^kHL4(~Pm6~weVKd&)z6|YwhxVN!BM(DHvkPd_Y zSH^A-$ottG$^V@c{J-Mt9H6w%UtWw}cF#w(|6Vjd3e^9m#YDT1pShy>!53AF=U1N- z1UfuYaH*HgnEpy~5FH3t?H|?$Jm7tL(3kBmJpvL2E(Mau6H}wUn`qBj1Bk?zq_)XV zCJ||{n6z^>!BgzMwi|Z~3W@ZDp>-&b;l_(3yc2tOLZIL7{(TOQ2M=)LCly2Orb%Os zGa*vmM`Cyc0=Oj7)Xcdb7`tsbSL|E8zGlSto2=cpzWQr#qYcP@O)4Zj_pe|4{!l^n z3x8GLr{@IK?#QMU_sOWG=_CEtwZP0>K6{x4srWIyG56W#+IEN<>Fmjr(VLz9<)|5! zM(^{XQ+GZtC0n~9hAz)O`{Xy~`xD4fi^u@8qs{m0lGCRl`B?t?FrlRh>kPYo=i+j~ z)bG`cZj6|B?Pnxdiq!Rm8Uthm-qWd#N!5%zK0@;d_EZ5_8MDQAbi#Ma*7&@x=!sv^ zPvdOz+w$9wOsT9NdSxzPkTVvXn0oj2>6B4bRUieAzPK>3ePC)-aDyv!NoFg!ngAs8 zR}8B*m+cB$n&}l?%K*L~UoB*je4|Z+ErnZrxK!`4Vor69o2$vV`1lJT zaYt=a_v`eF##QYsc7*}1Lb$#*j*wJp&jC9&l$Tv$6=fs{U`-qJZWHfP#`!k0GaK_Ui0!e{jz&QU_#e<=nlTqR>5R=npo47RC2HFf4VYRk zvl%n6R-J@2W3Emuw(d0Z2BZT=Q8x!d?TC&@LOdI0OwP3kmbcF4wV8u32B^B{mY6mn zR7=>fvDJAh#8UVaX9U;O&AyrvVg|#*GZI!mI$j!^szp|t0L<549*t^@jJJvw)e(N* zA+ULzhd-n8B0@FA>D1mWi;|##-JAG}iT`Q=(zs@h9OJ`O8dPN?7VHGazZUHj>;wXH zB;`CM63y=ut6Fo0#V`D$fKa;#fhLByXrCO;h|!Nxzx5QcV}*Tob-9X5d&xs zr}XeGu87nYwLR;%EOsBo73eUS7RrB4TPE^r6FS47U*TITbF?GGM^QG!E(Od@o2=lQ z*>h$9g$gS-=ajs#If0Zht~@E!DDiG7P zd26<4I2X`?-8HwqEMO_Z6Ywl4_sm+4GT-kufBr;O@kAC>-q>{H*$DuJ3(iT_SMuG) zVg}hJ3X%FceXo@GslH7kxlO3n(|&8nm=mDD1AzG=S^s2az7o9;U=wfwLa5#Nuf?5X zpwFn6<1K=9e84O`W1Aoy1$zvY!z7Bl92*~XKb@fUTY%NyGKqDOM8;Ri1Zx~D+1vL` zPia6Cee21RaekGyi|a@eCV(eG9hl$eG%^&-qrST_iB=%i1XUip89lpSHGf-Ubva?e zr1Uc5t=a9?6u(tbu6AIJopwIR8GlevXb;Y@n#Tb-(0sfTU&)4#jL}JyjB%BrP$PlD zF>$|=pwYF0TpducrDiSE#@j&K=D+4#F%3nn2q=ivtKGhMC3Ubb5!s)V#+0NP< zq_VT>!**{|6jg711`7JJBG9Dr4;@E30jlEV32XySyNBQJXhw216hc?beZ_xW;NDBR zT+rniChq)?LgJ7`yv61GK{LfzjUq~sooEmP)(Qt5XlkuJo5$KR>tgfc@WfTi+cpeT z&hmFIYT``xtRRi((|P2>L;c7<*zHMYWY1EMejsJ-()MEd^Vg%Vq^45CLfum~UtD_n zM(7Tou3o6WNV>lz`!?#6S;yF&`y7TW0APF?*iHWh7<)%1ayQuNE!5V>>wD~7RQkR7 zyQ30&QEBYQ%{fkiE)lNH-!q=B6XiCmQ;4t_6bM4DfdbX7PX6Rs9m9VqQUcJ4JrUQi@xr;sz zR1^E3)mSn9(GyLCnyI{r{ST7yIK3{m-L;m5w)pR{hG?}Ynxlln>&pOhw+rm}zX_cr z^&P4-ZfNC#U&C+ft4lxS8}B_E&dJP)+j~@8zlTweiBC9Hcnm4ybLJl(8BwYJwWy^I zrK~!D%1aL^v|)J9swD=MQ29z#7T>n*`Z6a$IQS%_b$6Iff^IX@t zD=+3o`Bpa+)#zyFuAGl&2Hs{Y!@qCyEOKig|I>>OhwcuxZ#>v#)K?ViGe59n4uc%J z8tZD#bQMAU<2x~wdZ?gyJ-%%EYjBvOeO%G6#e8E@(UIxQP{ryFQs;BX z(|~_?qK5!26a~GC|IL80|801TiTehe^RmAlujF_&caY6FTaRFYyLI?JX05kUs^l0tM}>ZrT(1 z1x0_K__+9o`MCrH6}lZT8)KkkE?buBA;%>6kxloZbi~K|yv}n+y|31n6|;VjR@V`f z%{q11y815H=IpDM$;#2#?6AEVyRH)_sP@m@8Dx%8Iiv2m9QpJ7YE0#xL;4byubsXF{^->VW@DUQ{N-M4HbDmGz$vzHKNmEEJ-y^P3nDGUsmMv*7z! z=6UR61=q&`jL>KF$S+TSPO|+OoIF$D+BEzo^YN^^lGWl;x#p&(t#2Y23%o}^7Vj}) zWGU=|qCVdZ84nB7FxQRdI3Y+wdJGVAIskb60D4~?9SgFY8=xU3fV{xyq=Pn zDDpODj!Fty;B}o%BwYsa=;yswGJVV5P(16+E#tC?R>!Z>WDAQPXAe3&d3Y@sqj7g7 zhwtCd~z%cGd}OQI`@8 zo(hz?6g-)#bg&Xn_3)?1cq}q_{2^N{3g2`+x0<@aF8QRoT4{T$B}`wLGQVma$m)Ld zd#a+rthCbV57>k2;}NVaKY*>qQRF@}7YN6sQH75ahhEx5z?PoB=ZZpd-O|XyKEShj zU4PMP7YOCMp0?1mFyXsv;70lQh?P2gfs4;$|Fu1+u6p}`v-R`@B$)jqMnhc(`7R6l zXXRRR`A*-Ik$q8Jr(Pv||IY?azOvX7wr>nK(pR1X1HC6GMJl9}WXGv0(Ibtb zX_IJZwOOgjC^v4nR7nd$8M<`}72p5Jdb`8ZE}W}D=jnyR?fFEV`xmX#riR>_2HjZ^ zC2fuTlO?(b>Bq>ijLn{yFpNV7UXm#DAEzN-N3Iz9S0R>l`^%5c(21bR(~OtDt=zTt z!H9Iug zJRtL0H~qm#*^YR3J$Z(MYJc z|B4^~?7wH2C;9nNy{}ziu-nI8ewCE@m@M(xEao(0HXTUDOZYZ$B++plMMn?^s#_C5 z#`yq9iSxsCND4M0RcEeNL(kWzEF0isaNxS>5F3p^lgrxFjpn9ZEjWevf}M{hF=g+UqnrkgL=3)i?i5^r=QA zbr>O9*XR1C4)z)joa5JqQW^f^tlHrP{LmHvlm20g!v&B%ubktQY}Nrlu&l}dJI{X{ zj@sF$0N0xd_^v-LAAnv|c_W3K05Ko=JO^+>3TW`+nf|AWE1fbdx;mTnDL>erTi)e| z^tgq4o6#ih3&gTK!`0u-Zi-T!KA78yYvG6GMS0hXE) z>3E-;S~?khC-FBR>08R#i=A5o*O87Lzml4`!cr&2Xv_sX!{vt!fY4~yH7?`kq1UO+ zb&>IBdKS(V9kaNY5x7gwjoQkrzT8{qe)johJNs&LwPFF~AR_wcT`k)f#69CvXN%Pw zdiU=>2b6;)*AdsBEX5;Q-);GHIs<}1T|#eJUd+oJ-|@~pPNQ?hVee3RB7&~pi%LXh zf32_Wug`DA=-e&N8%Nt25>D{RWH58^rcajG?C6_7iI8-%wla?$^sZZxSW#&+DT;PP z?a_PUslO3k3gF~O1q$0=%_3@HG5p*;n}gdTNbE}Q6%*5n@N&T0;ccl(;7}8!TQ}n`r(2OeDAA&Y!S&TR~{0P51vFc zp-pH2S9$vSo5iOUd+Vw0rnhd=BUvu0aA;>K2Zk}ojIiu7<5d0yURgE44zUPJPWM_p|p;V~#-1xrpeD=sxl5!ttKh zz0EOrkRvly9OOD+<=&|;q~8G-5&hNUiuJ*%v#I6d|0+U@1a_0Fgma*O>lhE_C(mA3`>rx0E8~s*C%;*)GP0<30GbTrwIADnWY@SMqq~S@ ze`>BbU-Z62YK3^zFpC7^?Q3ACj^*p)+q;G73U`aA`_Oye_6AMBf~_s=ckN6#^h1|! zmIdonZDW&zDmj>iEAA8-JC&!})h0G7AXRK*PVtgEJk8fvAnA1PIA-EhH^p5+-`Ihq zE6?@SC9KrOV&(4SUYwT=Wc~F3=RQv9OV2$94_7HMHV%{3YBc(dY|L1D4;4xhzHrz4mMB zIuhm8pu`0c(z(N>c#?Ga>Zwz)%hLYP;Tr@e!{N{IW7V73WY*J|-O1@A8ZQGjD~G`~ zfw3|DlcRG#t8T_vOzrEXJg`@=YVVF^r|l9u96I(M`%F<`9QtZ5iBX9RL&19z5+SoK zzCY^2?7n$ft;t@V_pzlh(>v?|TDGNMqX0+^`#TrJE;NyFx$8W zxJH8s9!wh?vwKZ9y~gVtUGlXGE!MYa72>JXCqj+xsGEdEhd2RbYqoA~lS-(spEl;m zvrBrJ4`mawVB)|FSTJ&H*zQk#J=&h`+uM`J+%9POQagfTJEX1`>*^su`@993$mqVG z5$qUI0s!R7kvA|sRdYX!Y%0>#HF{2(C<8+f^*IjWj9mPRT3V6z(~NoDvwSNVkO4o8 z=ZY3tY)CVI8-g0tD-x*Cne}K($y}fB$>xj^z2yetOjjMo(>&4NBTD^D#2WxP)?SZw=U2FFfU0r~zMz~gLzLK71lX4CIfZxy5# z9usUddAr%s0`T3W|3~=lxw)|5X>4`*K^f~Lx~v9qhYR$Hyp2*nu`LCXgo2(m#SE3? zN>$9|W@hX>c;ciP6oC0v5*C^g>p?!^x95b|5rnHH*f#SIE0YhES7VmD9T%sQajLNy z9N2b);sIN`5=oFp$0XA4M{mG@l$qFvSSj8tqH(fSLKWX$RM4KGV$Z<)Pjf3fRtgQ7 z5T|7N7%9%N@=8MeK^}mtv-BSqK}^gzEsxD;RYcJq1_%Pc#4GWY-Yr}E&;9nw?vu(n zwdJ;bu3H;sjQ|v4l9t*w`!IV_tyyXD`nA3g`ke&s%^kbJJVIuuhWSuv^!A(T57_9K zNfw$6ZSiK85EpuWs3M2Q zx$EVeEqX-ksXs1)7|va&`XROGV~fd8Y3FckUs&-T4E<)rU2HD1S7BnM0#6riLL;{K z)So2}mRkksBU!(vV!_9dUo^qbbvlF^Jy!Gi6q!cOpiUhuwzVjic+yT=9Yn=K>!I?* zuA^`4m&Tl=d#VU$L$-}nLMrzK4);dBDF2?5sR%qS#$#X^$4T>|X`V1|KlGK}HAq9r z%N}kv@FBPkWB_}vtDvz5>7JX_$VH^(}JUqNv z{>6HLkRu8DXnmkFb@Dx>ye+n+p60o~_1tFU{o`8p+a52!FB(0u$=5f?qZXth^VQ4Oz0~TAWV_m zSRLgMS++TMeY+nL^}9n?&m&b^Kc-Pm4c5NQ1WH&L_WOjs{eID=e-08jl(b@PNN!$4 zl{72gdk5CrFwOH(HtJhfTkC8HjY~*7a)GQVDJc5w(Mj9B@Wg8wkfMofFOv^I-QcO>G@QWLh0HGMQs`|<21N`c@=1f3ETkY5^hMhW!E zL71Hi-22#v`;dPNIRQ7^m6RlJ*m&0 z;_5n7x8KDQ>Hv#Kc_r~lg&u=pS9SP!tQt_WOw}yZ8=oCaI>c0Teo{xSebYU$Z~4*@ zK62*z-m2-|CC9y|#c`N&x|xdA>AY7YY}IP4rzoDT&A{7lP<1v}u%hmFS=n@#lPWaX zAhB+9`ECd5MRTZmiDmW)l~MHZ@iA;wBX)aSwX`h7V7PM*SuxNXK)ly~@x#Wff6s)1 zZB>=!#?w%-_-_AA-~RJS^5ECc9{LJ;-$0+U;|@=hHaXs)cR27}Y;qny0C`7x4%R{m zedT)G*Zv|b8EVJ#Y|~@f6%*jB6OjRdr{5RX~8ZL_6f$^};|Fb$I~*=e#vy@|*^ zvb`m6;rt>ys9zBlz4CP7wN1;Z_&b$ps#h|heT(i3dwwgN@$B?`dyY1WA+^oQ-t{GR zft6l^nw?y6-BXH+$f-bFH|+r_T@hU~{$$Ml3$SJy1qzu==|JIJ?729|C286>o-P}C z)zDpk9z|l>cEGz^tkWBsVF$Y_EqUWjDFTA~|F@L_-;pZz)Dsy0M>=6sXbT3bo}X9F z?o??Q7FzRH1V#K*o!Hs#q(25?Pzx1|%$DVRPMTh*{cgDdJ-C49=M)r8Tj)7VndmU@ zG8_Wj1>zG|+XO;ZTTSfjO4zhP2?=ghcZM&SiZoua98vUEIF0nayA2Uooe~Dwu1}M1 z@g4H)CsC*a${o&qfU}^(?6xFFB^{yB$mb?#iAhNXV?DuRw8;SWKyXBgU2!y)GVbw1g5V_`Fz_IX|~gcA39rraaxejTUE z@PN^^*y4d|!&ljc34+%2*TYD>ENxb9Ib%LI*Pd@OSYNZWyjiH`Y` ziBdMNR+gA;c`O;niOe?-pLRsod#dC#(8?v&0_@V;8S(PeAB)$D>oCHZYv)?x6@=4}dCfa52;KasBb+7Quig|J0-G3Hv?u zjj}h129rGYw$9p)Y^!EEHjx+a?p*f*=;2#H;{Wimg0hkk;^0xlm)z0C@fJYAw;qWH zByjiX%AUU2V7pE+06Q}ZHgY0c<33LWk=^DW=aUYl=Bli!tL(d&h=@?3r&j#Y-EGGk z>@0ojW}bh}+pR1Uqy2>J0W6Iz{eOON5c-^+X2Ae_Pv!_f7=SLfA==^&mx@4RmDc;y z*8vtjI;!a(p}NQ(k#EnIKOk%&qlAlg)oxp}?Lu`8SrwWsMO@qa-L{$jcJpAkhS_ZKFjEu&Xc@15uJeb}0Zuii*+f&~tkM8C3~ z`@Pb_06<@=!vFF0!yP-|D~dDfF>>^Pt4!-#)7)N(5xtCgC&h;m;c0u?-hwoE$Ng}4 znb0!*Fsf^EK^cH6EoJLTXQrA|`fp!FL1%W-gK;Uq41$g!m+~?*TvCZUr448HQW8W= zi{Da?d}+M7ZzZ0V5T59=%|o@Pr%i5&@4ne!1c{~j#ys{u#=V)m%Q((zK=ek0VowtebH+rQnO(3J-Sf{LId^PS?X6O+S0jz*9dwfJxwc! zIqwBOzjr`!p|J&Zn*cn&?j2>hDHTxC81| zN!zZK1=Ef$I^w{`*Z5KfOntY3-Q(f$b1J2!fWK@>O9VdK&F|+#T~8Xb!p6pvp}g@limg%E+V``Znv$Ic>D_K+0m=Rmq&E??K3e5LR9|l= z7RvUh$IM;N0s=RC1=!)TtD2jS4?Y#bUiv@Sd(W__wk~Z~K%xXC2#DlZWRZ+ykRn5o zB!dJ|l$>zuXrb;tNJVQH8X9h6q_-3T`_ijx z5Y)TX#1ZMh)Xf@27mvX&w#zbTOB#puddiPI{$+Bh0ldqeyqY2sXn-PFTjL0g z9|)RFxbfYZhk9M=iZ>Y9h%6p09DBM)Y+*Ybc42;8SpY&Zxud=!^&=c1TUcDV_Qws) zL%W!cp58)ByD$atTFIh764sWc^ge-{1me}BC$9zHa5%m#FPdeJav0Il86eRykQA7* zldhUhH~I{g2Pr>dp}EVsF5f(=QB;VRZ*Pxm)5sUA^YqL4pT4n6T&LdqoKF5p;`$@0Kd5S;Q&Y)bZx?bPm3D;@tb#NGDADg|m^RxnF zrB(gpSv{-9#-gWWn-52ok|f6VDbri;?B4*h8Ve1h$i;rhjq1+>#MMJ4LV8~nB^?%V z@LzVRfaL!FDOaAcROd7YvjU2ASgJnVEqs4sN>o8sIs$94R-?vXxuY7RaN4XY4<6tp zhfa-^p9H*RP_uJcD#nr77jYrRIzM}8A=IP2Zs)T&QbLNbo0n}ohrh5mWBo(C2u~6f zM!fPSrv=G~N#I=jRd0|GhSn^^=>t>GLrpC$i}+b|8$23D6X?L6`W@Z}4N+mO!zOgfIdciITyq!ou2(gFqaY6509I(6AUz~ODr^RdBPZ+h&R!~} zp&8`rUL)Kj!_IqFP7#wAF+DR7@6Oqc>F_a!Gj4gCbMdy9uO|e(nFZ!w~CJHPIk)eI8#5L?Pg1lz`fc zIbBF7sCTgB7on@UY`?33`!r1vbwe&s*a&ZN|i^OrAL!so-466(Otwrbjf#ok{xabrT&;} zw2Y+q($tqE5)3Vk+BS^8)fz~1;jiTtR16&%9l-i+$qClUe;5*x@c~%9dSlyT!SNd0 zKOd7{CdT7R44??w5z(R30Y&{q9xyZhWqh0+OMDqj+$iUsrIz{)oF36Ip{qN+&YFq(rwW_zv&)flY8=ZqxJ)!SCPyfA@fZ z+gq*0^}5dYhL0Ce&X9b<_=dy1lAvE!6y~A!SpmIu|D9QuA$O;~|I1*c(bm+bS<^o; zIF5*h(Q}ChvA?RB?2VR-jm{4H5jyq^JlK)aUWuq6hB!`U9bm%%rWEqu(*}}rvu%wB zR_Qs{VwvK*APHsxmWaRDI87ydNU7aS4PlJ~I3EA|z5K<~X$0ULAZ1!onC}F~4Zwx} z0m(VnLqmRG3R@%_CXf{U?+5;u52-FcyzFqK5*bzg@}BZlhv}twlqKAv9ktXa1$Cr9 z3qFrRM6+x?{@EHF)gUFo=<<9!C(?>4yfIJ^th{cI0gz;Lg%JplU=CYFO=Z@kWUC}v1v*G|{EW(x=9IKZ<8f4~_oCE6R@Xe8HKBxk#id60d?Z(-gjrZOPE?+!|p&#-uyHgg_p26`YdKThlRZT%V% z4>sPBFIKYZh$P8rxNA5kmb?dv z#v()3jcAO))&T%Sdg}Yn&=K$ClOYH@TU=IL(sP=*V4MJd_Otc?k3Y5#6bEWcnB&MC zUf=nkWRT>-QoJ!jY;C-z31xu1o-63(pE;>x+CG|Q0?U;rU5gwzPRHdSYr*Ve`R*3f zRDXO^lWX&d%m5T!6ju)SVVc)@UYX}sswg&vHoLAQQn21B=pVRt*LQG7{-?cEnDJT7 z-L>ZamW|!lZRwLOCM-Tb`pB=1W7<@$n2~Nnb223YsT)^a+IQm`rkXCw8*tNjm0?m=->VA*F)eVj22K@I~y^M%rk0W!dYioGs>~mZ$N? zX9o_0t=slZv8jX>QMot~TW`vJ^64N88l77B3P72?^k`^>rAE}qXXNa$;WSN4LK%2hjY>XYb_IPmJS~&5OD((GT{AzHu!i zE3y!7%w8h!f`~;W1pvM*H=jFxw;{#2{F_}9$y3)#t{H}baZHFE| zePD0#;B#$z-z{DS#W)E;zz(RweP@A`aaaE98-$=Ej7;J9OGlUZxu3JX&tN>eTc~Uw z<*fs-&x3A;&9~Wkf@trLzf1rIp|n5iWt1%wela(JxUc`uxHuEgxhx-WCBq+27-eHz zM4(+P!&xY=V;l;{=)|937n#s$<{x0!(&4VaN8W z>lg3J{O-9ND)krL@6Les!961kVKBOP=k#Yro7L=j4W?J?){ZqOapg4$iEEf8@vgcY zP^mJIQ%I`-n|vn|m19!X#!6dJpPE@@$Xk8!Z!#%7ZxP$^>aw!vT0nM5;IoZ30n||S zzA5fi>=e+B%8nXgGy1WgyF+PduMEUDh!x1IlZm~k$G5yC6tLi5BjG^alpCjfZ0g-I zaxsjSICBNIdMw=TXFZk0@UQUMHwdUF$kge4c61!rmhlAA8<-?UBN@HruH+qlDwf@G-37>-RhZp(vdu3Qx5R=6;9KX7fC zz$;bFLoB4+IQ$cISbAa`cyU(960e-MK+J^>z!b;8WCh zG6{vxz~CNXS0RImdhvdp6t;g@Wpl!fE%u2faI&`oZ1o~@y8&P#)GAcQgADd5%52&J zmV7J2K%#Rrw%lqULw3JMU~xboYW2 zF(!F-DLMxjE$a54;a`7z<}&KFd(8x8XvT(|Ut5a59~G5f|C}CTb1NDA3%rYa&x|~5 zBJX6bdvv0MjS?)8&-&rP!9gJJ)_p2AItaO95YBBjIBV3)Ei_NcV5ov0d~S-B9UEaJ z1c{oY9SFlu@rk<*AvM12LKJnYEbqKT&J5uF=K6^MrFAsOgNp-3?r$d*00AL1`SK0l za(5$0dL&o}-8p3SOD6X8j_%}<09OTr)BcUdsLpd7a=s15`qKo;P~~I`0G}D+z{teh zg8Q1EI`1DEUuEp-e-s6oF_XxZVmi?%I_yec-3woN5nr&yTZU(FEn%g5gA^3&y`zZ+ z0p_^Z$?n{P1TIJ82aHj+z=ysmNN~~_2s_ynkG{7mUvV_nBl5#0U1`E0uJS2ntU;ii z<4(znD_^5l4&GUP8)luSlBMM3D)0xtiaD>jC0Yl8uW}0{zlQtt&QrjgZtPv8ce`G# z)E{_bm(+#xJZ)myXt{lH=Q;+_#U_(@HEoXYZR#1!Z@uB$YbYR2p6x4paNtMuq~{gL zknV~f|H;);M{+^%zIaV;oI0-&3GA0Ko^K}$y@=ycu-S~|X!JrW^>P8Csnuc4B|}~i zTxf3Oh-?@9L)X*YEGpSc5q*&K#satAc3s(3^^^X$Hsd$an`BBi7CF=Z6l?4bHO9~J z^zy$@%?aKze%Bvl`)1L;>*KLWg%f5epK#edQQ<4~5t13T!Wo_6Vrh{(-e;HPQ8GU@ zF_$;a=M+5-SC$AjK7QMB3C-C1!5?&8=xw|+Qf*MN)|G+;2$HPyzc9J#>}hpC$sCik ztMz`KWIzXT*-efTNgC{NnVODPp%np`Hn4L}PC$7CObkQ=T!{98bwJ&d)@W17LJEE0 z{amkqDlYH;{COfX05B)=s~@twzOv3(cKd1YRV$EhxoB`UrmZeJ0W1jG^3~e+T8o0( zciUygMh{7iJZSHI-hRzBLsWPCWvg`forQEpnWN}65rMFwD8nHwYC6@44)iJqdr+!(3|5RM9!DFlc}ITV_r>pD$u012D^k5HOa6f&-<#xd!H$lN zXcw&KR3|psslBUKt=cDze9Pj6!$f^(Th@5KsFN)4TCWz$%rlH^wA3ndaroR@!LT?{ z;`AmyOI8$pY|Er5_8)z-Ee-pCnAx8}L+h(Tp->ck#x5>Y$jYYc0MU~aIDFMVmkmU4 zzSbN1PAc(^R^M)PEf2DBW?iM+H=a&kcR>TGvG(0p6=|EidP|hjM)1Lduf?Ic0m&I` zi4F+4M%Ok1#D`vB^Ok4#ly96P8kpun*vWJk)vsY1mBbZw=EM{%P);+iUGD1W+?;JE zm+l0`YbNJDMdRpfRZYj%7e;Qf?Sl(hKnc)5u&YIZgzb$@-Ka;R3YsM)J99hLV>qLt zun`}>ypkaU+AdXgPeE$EbG56p)5ZcXlJInP52ssv4?ev=HSF^@^%N3lwytR-SKiJn zGlt5O%6AYWRY)Kk#Z3oEj{j-k;IJA94NKF#-N%EB%pN@9H^$rUcSzJ7lqTD{&qlZJ6LuHm(se z9-rbo&9PhhHdB>oWq+_tZ>B~PJ-o?PAmn=VL3?a^Jn6g7nr$c}i}zB*y2CS2-cU#$ z*>F^hYT2YJZ`@%SMh5U1+^Xeex8PB+&qEb_Xj%#}hrNld3WOB-jIE zurBd!3gnZ(2GON;;e!CUA>%-bx<5pZ+MDQbsA$Qtm|bz-A@gXJSpgvzHh+G*KJ|IP zqNC$+%adHY!RNIyI@JT1-gD1wh#ztxy)mHrwb!;<3-SxVlKe;B@AD>A-TCa;9T2Lw zY#D?a2!Rjj`YqW0Z_K{^?+5-%3C-*uv@IoSETqclmT^w6vuAHV>iOTC<^RufmXg@W zoaM`BI2#fuY{Xs8h#id-63 z`lpy|{f+vsZy;XLh0DH01@riAx%(MjwS@Oe!}&6|ocZL_vEx@7)gO{j{r- zG*;Bz`Dj(9&Sdw+g#C8yFf9-3&c&rtGSt0y~!6xghh`^rDPv5M8VY6 zB^MSJhRZAz?})e*vNm13A;16dqI;p%Wh2qew=0gr{cQts^_lfCTP$^!Q1~#1i3d4{ z-%;m{(&jXjFY2>WX3SKyx=cRlw7Pu!FRS($P>#R2p3Ao}fAfxm)$p;!FrdE>; z9Lkge%HMdh9eAhpMfJZi-J-gnZ3IFAO@Rxe{Hwy$-gP&o-{5gAJ5fCZmBj1YYG;lU z(KZn=vmuzi+18m6`oeZ>eFM=+uYHwoY#Ha$X*>w;Oo&}|ZHy#F(&nc3NoU;N#vc@r zX}u5nU^^oweE899IZrNC+eI7j4xW6_S$+6h3&0zAQSLF47&DRBZ8u31*p|U{#LbRN z_27{|bZxYV{hwsi@UCcPOQM%?b(sRnYYe$QnJaj1<Cv+C`7URKD&jR6(vFYjHT0!}T54767|3@u2NANHZh{mD@v^kF3}^#B7& z8E1_Q;*t9O&9A1{9_>yMGU9R_e3X3-9;^~Zys9q|qCP-w%#*-*1cruO{?>;?Uhl`9 z0Qc}7&`N0+3Q8JaihRj8Da;wrg&<#io>)%ap<&ctpwlJ*k7tv^e}h3F>eLqIa@@X2 zB^vd$iwBpfPyFhEp+g6w(^^sWV0oWd67!uK=naPmhUMK&s_&wjnwkhg>pk}> z90%BkDWdi0?hG&pUxew$;9p0(q^GTc60l43bT?rA5}N{+TxaN(w*-m>F@lXUakBeZ z;gcV~xrnl6#*+(~Uazk_EJBp4e}8oGaJ0^IaHTUcCV0~ms{G+U0Og4!g+A|=$M$%W zz0rYB_bv7Lgc1J%vV`$`QJG=&8tkRB1{oNtz!m+jBSLA98=2}-GmwSKgNeh)YcTx% z{?miC&}#0+omPs~CViV4#u9bnDipAbE4`>=us#EjH^n8rd#iA|NX&1~?#2k8e_qTY zsE*&rS+*=dLEZ^9E~H=HFGgue$-EN{e4N%WE#o7;e!XE(jc2?=bsf21xu;S7fe;=Q zXa&g!-)MxWXh8>5oP7?CCPn^H(`0-E>X!fJB%>-ECgRPL0%UQkcD;S)r>yEWLqUJf zeCuo_Rm6AtA=AK>BY?zVBbmYVnZyARng-E(@naxnVw67tKi94T^dP#=Od$~>T z%nhBtd0j&(mu64zo=+q4aUJ<&)gC@5BW*K0e+p_B-wG|46d{Z`}1+>;^_4^-a)XVzivihoh?iUBKB z?ET*rr1i=SfJU`E|I{Tp{f#+s@HWn{3l0fG*SJ)sT*Lw!Pftxj>Njg>96HIL{SoElh$KINU8m-So zBvbtKc;3n@)MNe6eAe*ClXQU^+KGsILTC7)Y|;(RTACLO(&DzV;#w2>(Vm65oQxf>?M9Y#2KFjB3Hi{ zS=Dv%>v>N~#GWNAMn_z|r#eLTfoP4#dk-*V+G%Tw!l)1f8>DnpEc%eM;XEG4?U-@n zo*H*<`;#ZGn~7yJKqHeqn6b7JR&d7HiH+dKRlega@;1SN09hVbT^s-DBh$#rs?2_5 zH-oEz%EdRt)X(RQsaBw&zvN)FHMaGBfwvrybgJ9nbv!TAP#cPpRaO=igio31SeG$j z`X8z4*Vy#^4)i?^hIDHR4LsVa3GZK9$@wU3OYO2p`QN9i_q{KS_;E&y8yN7E2p(ky zoy7`z@Nq$stL84@vK-bJ z!c3Ds^s&$O6$v~ZsAca0U7Gk;s-@f(WY+(c z1`0OFX}^H_+(3Js`gs){vSjQQ`lPWf&CUUqQAh*>HKjCOb@z_E$EUV=RUQg-_Hpzg zu<%U5yZ1WVUej8vb?R1F4bYt~CfV+qN)?_a9|!v*Ifg7|Q47~D4h>eIR9^L%`zp0V zOeEvW4_ALd|A=7bLh`FlE8k^h>G!cz@+H^)b><%#?EA+?su_a!=dzfSEOL!ER$|9) zU}va%t(*ww6G)hGzG0VKsbRAbf+U;KRjqZ2 zB1un-t2nP{nHkr`XQwKunGo&Lu~)H^FBg1gJ~7Y}Se7Ic%h&G1npDFvuegjGzX(JF zu9KuDCLN`d)8s<7SNyO;zRyJMiyF_fn z@$|F?|E$dfupBqLJO##pbfYo(%Tdmv)i$X%{+=z$@+SS$vs3)@27< zE;g)#7^>hzJeo1gm?2xAnxTzN=?_jf{$!4#s|x_gsKMgNGA;f?oaLSWm=d_Y z*>P!GcJmo3%mE7$$o>BL9kcwVDLZ_r#Z(*$7*Az8Ir_G~@xNy+DE#m%+yD5C6x2Kd zoQv>)5uVP?$txRLn??2H71L0(wWD`eX8kRMJ~z9$=T)n%jZ|RuCY;p8 z2eEP`KZrQ#rK|%BuZ6cn1RWtm5UcKC_Cm1w(dvbc^4##}@<#P#+;YGcL6oDxKGs0xMTzoD?vppw#I7|zo>c6rmN1+SR;$)gYBKA6 z5Y31-;vB(^+9?LjpT@I6sRpjkkR`vDy}GYgy3rl}L=PnrjC3RQ*JpSj!o#+n)p6Kk#c-2tTLZ1i!RJ^- zyc5yO=fJtH>5GcvwYQD5cB_5n+Kh=5i|Hz%10oToSWN{X~D z)~Z&zSUp4rbCN{QspJxzrV-**Nn76{OS>R7Hsw%>$S*QuvMK{3@tz28{U0)|V0&|T zMq3DiX}_2ci`w5O(Gl0zP$_@|h02S#VK2yF$$?;|zG(o(KT6Y`@bpPxB9Rw^)Q@hF zfp{I5q`%`zM-6*4eiXuYQ@Z2~h(sc<;mR5A_Z|0Uf>v3KG@;`90P!D{E)`4{9s<>5 zneMho{qF8h`451Rkiit1_YP;)2FP)`Iv8%2a`lJtsO1B(OyGez_T{q$WgHfcn!#x zrjY=Qd~qcTY-(4|JcbDn^a7}dDZWpqCyE>}=j1ceJ@EyX)U{C{k&j$p>U9OE)qy7g zq#ruYR>HeUDb3|^A_4V$w1490S8BfW!V}Oh*Yv>Hm`Kr40+ASW-XAI!sISrq6iQa0 z4FxMCHeOK0cI@FrFV{D01am?sAl<>gUMyeiz+$KtcJwwQ4HCH>Vfzalq4~>-P@8g= zB*0#^>2aYzi}QSW;e%DchkRiQYhsUFz&O-?z74gXoX(UJ-REdOEhYhz9t8pm{^40! zc`F9=7I$H>g7HG2sp1~`s8(F`uR^>ji}=MJrv# zDmPLF?uy94Cs-vEct2r)DMrRD+tQSghVjP0V+_a_R=?3X1Q5gC0cg9w?@GWIl9jd| z1;rA;-I5PNmVpP}<>@}cM*4ceZOVAhXAPvfkJj!;bRwH2mf`^6XXaz@A(qc>Yoo%- zZsLI-VP(~$2ev|^wMgA?XzM(0B_aHAzKO#H3sWyf!8N_t(Lh6M@w2e z>_C5FB{*B?j7JASUeqB`4}iqAZh}XHM8=vQ*>$CZA_TFFa~C14z&mxFty~M)9O!vL zMyfQ5N^;9@%3JeO!P2{ml3Yj4gPCN+zs1MzSb!Qil>Q#>Us_|(=7mro;_-vu(&EQR z+XYj2Gxu+4aWm-PqA>L!Z08rQ2pkOw1wZ=2%ju?gUb%8zvEK){N zEg;r^9|pRQ7fBGJ-NIr@Mq@ztw-UsvU|leY%-7G<9xz&Fdw-jRWjE|0+i=l0sN$>J zBvnR@uVLMf!GXMqQb%{&!`i{&BR_9lt*snvuBpNMmXQ+38*=$ zztYk3+2Tpm6NnXdT8Dp0oC*{3TITL4NR(rYR?M@mE>@U%-{cGIc&=}r`W8=QO}cNM zELA+^ovd6SDCy6=mpOGEyFH}@9(Akq0V^OnLU(dSvd4hLF)1~{kC-ZVv&1@mM{B`| z;M}IuLcq2EM;aR$_A~DXP~B2DbnnBJCr*PM%mZgz4QaQy_bFkcCB~}NQx31AY^Q6T zoqC0q)RdL+CaGZGIcM6?*af_NgT2Gg_&KU8?`5E%RJlIaLKO~PF zOLIRbKk$iAS{dhKbfwD*KltxBmR=^?>ziCUHXD>>@2pta+2DCBh(mXPn zC3l7ond0}Z4K{sWQK9J%J=&ZGm4q|g`=E)2cH`dtr-KL=RoI zLX7L)>5C|F{sEW=Xw+BAczPh|5^O^*48uDn-T$Pf{ItSrJcA|(Qx5}|oHuKP=YxZf zPaMZ1g)veBjYh}%>ex5o!+T%Lx$^y^si__nENKP;$x!{xs^jqHsFUl*-Uo4?#tn64Yx3CxSOdrnFyxVsDS@$$-iemMI69h?WWyFh0s zDk{>{*3PEm#w4J6qgOrGBzzkwhj_uTCn!ME(j)W@NgkrA=w87-QBC&u$8@HY+0T{f zuLu&WY`&GqJSwh|6>Cw^XyJQZ=A#iMjuL3z!+0Aj0Y~wV<~>XXABu2U&s*564WDOQ z@L)suYZGZ$_*D~yLbZoWG$SeM>%Gs8)Nip(H-?TPJ|2uV*EC^4PNj5oY1oDYryi$f z-=l}%QUDU5*1&B_9wlgGV3jc96S$KVz&qK#k59vB-`FV=W_x#Ooc`9d))h}Iz<$0j zCAC1_H#i=2fHZs8cj(kFj{sB<9&peOvGc>f76H`cjF241@oak_HhL92V~iS6DE1;V zXCNoD`zcHLbZKWV17ducbx10s(drR@)|*AGvHU!NAmV}KoNwlO$|kt*sN+2=6(9)w z8c9S+^O6q234aMycuZ|Uj}Dw{te$G1)nyO%x1boANaCa~!@pYdoSgJ1V5P2xl9b8| zAg(Jv{eZ`RI4%)JW#N#rSvoNC27&dcN7sNz;1`65ac61ZHVl+@9Qm}GZaf|$m;aI& z(=68c5Jy6RU-K{>AIwukWylHwmR|JZ`|lFrbQUfm<@yXb(H+n*8Ny@cl7b(#=F>ej zGeQDol1Qv{#Bc<`QGW)$Upe@G&x>$ZCF?w+QZ@&2sLhX5vuRtVHoj_ziZ>kQx1TWb zaAe5YJ+4f8F_y{Wkq9YgdwSG6t^Oo)eWHPFVOliQX#v_vPav8hJiL|W6>dZ3EPefS zZew$RR(7y7h5rV8+QW8o%Jflm)Fyvj9J5G>xo4TRd)_=!0|;9Yc;Ai&wRhBD0{0?g z-f_tzjMn}2Mfdxy7@P@@jhqEzeRQb&`uoL2%Q=qjI+qPtXxkg|=ph!INu=hYwGR6IU>4r$W+=W~pB z@Uowk;)4asNqPI5PpfL>1}Vjc70L@b^SpL(g^$iRSDhhSiEBP8ct@lC-L)92{iTkB z7CH1y4M$1k&(Ny9buSF`D!hsulfY^!N&|?1)?zO=-R!Nk>^2{@4Lr#&c8X{FRX*6m z&vY8giqAP)9o#ZjAU$em&-b~YVidv_9J=8Eyu9n!32YSbC=94>l^dsbk9*i#@m4`KqWin4Uu2v6MPU_}=l9cv&hZTocj3 zKJ%P3XL9G1`aUQ@aM3rQI{_`V(^qWfn=`@w%vGph0u{r0-tjW4^IF3LctlAW%p>A7 z$+dGzk4GudG7(caKikx}DGRT9122h0-{@zm75{p0;Kx#52xwm+rOSKzWIY3m?e0k9s<@Lky?YQuxfSCP0EpGYn~ zML(*w6E$dilIHVJ|7weRjv!ux)R+O5lAB z0QbHHbL+pDJ`K+Mv_V`c#uU?ONVWpp~dWW&_QUW;8eN&G`(1n z<98WCsOTSMh{S4wHV69n`kx0Wf5ClDCkg=E$Fd%zCDi`bPUygaGVhOeLJSi&FH$M! zkFa(`MI;Yfp8ako#F8rt+6i4a>O-+d$RBSM)U{<;X+xJhSzoRNv zrGoe4t}^_9r1G_|Y}e|Nu4I@|s@O=`QT<|W<(-}HKY7H?O}G-^bcWCj7F6Y30qGXd z3tcOc7y~D~dD-ua@7jgQ(xlobFxGqhN*;E-(4onSZ_w`)CtH z=-4mtVr(p=orW`~uhY|#yy~#n=+ce>bRVLRd_(i%Wv|FR<1sGLsqJkHt-IA@`}*uq zNNpL*D#BpTa^;b=TG_pYCOFTm+>IUYn`K<05@Ga+b}y1}1nA4|k@J z_KL9mdkJ(rrjj(W**fN7(CoBu%8LiAtW68{^-og0Q(wlo^{1n?ZHn7Q$Y|{!?n)U4 zu}&M6#;y%^36OSXQ@{>w4He$LeY?L9ZydCqnc(SYo!Sr`WT=;VOWIsmqES?bHOk+% zdHI&Q>qIje#WeI{S2XqeJxuQem8b`3#boIjw>+*h8)Y7ZNYX)PpR!&ILPN>J>u z!t1gy%VB;sWoLKuXPs{l%>n67(h6z}SsFPi&7>}~*iRT3zdI2$DSv`0s8uw(%s%IV z6SA$E;qQmiK_mgX$Xqxo3K#L}9@1Fh+lYNj-X0L~^dsE;-c~aOj^(u1h8uGk=Z^Y; z2qvJRAy$|AL(Pp9YZ#ees>kybJ(EuL8#|QIY;aR?1=QwYacO%eRoj@0w?a_^;T*f?Rc@}4; z^ z^_tt-XfSq}u;}aXxom2OGh}OIF5mHAhU@Xe*#0au#796J_}9*{C&3>Z@xHUNFVyeK zaO?{74Bf1cH2Cx(j!UY0ekHeRBU&~;Iduyw>NVzzy_`gqDzc-RifUobnZtW~9ds5+ z=mS;G*OA|V^W64(Tn=iD;epCR!?yVmF58;%b1 zPOoDeW6YAKdaI*gi5pO>f{imWLPuq;4YOpBr8du^G-#aq!M?EU%uUvEC)uJFlB--8 z=+0W~mo_|9s)3p1pO+QWqxSVP!-mlxNgKY5lzF17cRBMs@0LB7gj00ZZ*-C>20Jx{ z9uvUlfO)lG3VCOXPmNmFz@3#(2E?0OOzNk$#S7eE>cpoESHg;E)~`-)Y%AjXB6m3? zF#GjxAqYUq9vAI!cMbTzq-Q{VOq9>u?_GJ6XBB|0Cj9(eEH59!MO9dH@V%-A1+t>eE3$yh2Yg$3DwGQ# z2238pw2a^N!XLpi2nu&-WP_<2V(i-1M9iclnUJJL9xcXadlkLOEqoOjK3N{2rgEHQ z#DXEtJnYg5O76?A-3)0ghBW%*cuhkFXES`&ub@s(y|jd;jmK`5g4=ZXO>%P|B)?G* zh!O#JZ9=f8pj~+t<96jl|YWhgatnX^`!u&cFQ`pj`) z*Y(H}%_ZXbc+ZW%N>G~Zd40ky7+-ub{4S{6GzjtmHr=%7A}5`<>szA;tLD1c00EY*r4|^aalcfO71HwIWo?fJS3RX3!my>Nms2- zI3awYu98jl@6R!g*btjIdI4f9BqTn$9u-->e9~9>0r1RdqjCUFy(^Lelo58b0Fo5o z^TAq{~46MwgjK zvSYlCxF`N1!yv#_rT$KanE_vZzAkv+FU$=s5_1Fl8|LN&39*?GeRKF5QG=BSt_uGf zqNW`QpgGwzt@sNs^MCn}uvkHB3N9DCj%>eF+sd~rpqEJIqjtMnD&^}&65zPyecq>K ztFO565S>kJ8xHf?lh}`Vpp`xw$#ESjKV5?hY2xHit-m(^3Y7}7eY}Qu9lFy_8<&lI zKjI7-y*fb3TMPsME5TDfV5;2dqOSVIiK+5tbXo!D04V8y&bgZy(1iO>@KXSFOVhyM zImh>bOu4*_jF%6?@5-ZL;9_-GJFjW$)wyttjE!lW9c~nL^*Ku+r?@e1ZnFb4*c1XR zpua{6UIL$s?_8-y)_CQ8OLh@5VBV((<8?|~ZH#k_4_O;4$pJcA+0ZJRX<=c|ktsJj zI|%=-phUc(OG236gPmrgNZxvngA6%p@0ecUwL&OE7D&$WT92{|FT`kAEp(P_LY_T) z2D^1T6j1s{VZby32wrb3yWhPZv=bf5QPpX5tj);CXkX*t;1K%58sXy|jTivEQLX;sZP*GiUeL!RRz=4O$hsZ_kkPtv*|E` zHKfL#xUoTg&j&agC0pNe#%bMLLhEFg#$jZ`S&>OV*i^A@xS?h34SqV zZI{7|eeYB$#-1bUSi7v&r_H9{ben8?Z(YU4&D6cg9#zi5rxY>&z*L<M9(dE&dBRdQBhhO=)djl}0OWFRU}en%eROLW z_jGh5*L7+f@O-+{62ZTsjr@@;nOgIoQLb5E#J_Xyh)yqA6oCn0yIqnF@X&8mgijbe zKmc?fHOLf*><^QhD}O1+lLh5YHIUQ8%9d7Y3>F9|WNK*xG4nd3AQcB$MZ@)SuKDk@ zC9SoK)gSJX4!m60Tj^&Gema*zly_2OdB?8h+QA*UdoB%C(RZjl6M->^!NcmLX|>*- z#O$fch(WB&Wts6H+*CT_PB+>N`IEF;?iUGdY3f@?ALr>vOh0}+YT@-=M78MJuYI~N zae-rzB;&R(=`-VbW_2=$nR~ERG+bcIl?b~ZI8k|KspNlPb&L7Y*vB}Jqc!DP(WoGL zs`J$1x8H5@XtX`TQ>WA~Z%lmf2nbWM7g>!P&TD)@4*{)!4b4`#8nB;nt?7tVYV-NW zqolf==IzgCszO9ai}YkHk=`2b$?%U8wfj~Q3XcyGlahcseruvqk&)`YrIJb}%kK1c zi3K8*0?rHpW6vC}G>j{uKo>aIm}Uc=o%okZTw4Fh5?9%OX^9Jzc^U{DVkshtWnc~? zM?6Mc;H{i{E)jMOg+Om{|J6Bj@8{0}wfJ-%nsior(rJ2gSUp5*)H#ElJQFiz;;3{6 zCHUw}GAnSx8?TfC8R@m&ZB`5z7Pr@r=3Pa_a#7WFO$uexKI{EEy zzkhGIILi%*-JAh|@05r2+>~ET^PQ7-$jEM|)Hu6^3B09mKHN=hf3R3isSH0>TrX1Y9_$Hav|BdI>hgB$6L5FIf!SHvS3|FD(r1B4+q2P z>mUn_PAenlqC^if=&c_2MCI&}>#bnMPHU6u!2^4Q`4=;oU@Oi)36EllLe_zh{Vw>$ z8+Eb2Xo!~5@vngF1q4`CTB#_I{Vs}!L_rBIk{G2x=(Y9SLzoj5c=x=iZt%GRy~Xx! zmea#CZPt2Ey}?UyuiRKmomh>Nd2*-ClWDC>%94vnqZbp&F!vqYAGJ(zZxA50JDf8^ zfEw*^0aBRZ`PR49>s-F**gxr1nf~CtC7{t%&&OL{><^Og+`FE^;VIFh!m+ppzPjHG z_g7z2jC8zqi_)Q)gwv^qr1L@e1M;vTFw5%ub+Q@U34s2>;P1Ozi&LyPI-Pu$;z%!& zw7-&LCKr*9-a_~`q|#554)W{|GR+E3EKWKAE*0O^0j#F=VZK?ud~2kKuzzP zs}W30N3T0YNCY$hz$EyIlnj^8gE9nWUjMoENj|p4f=jO0tnLrN4q?Dk{H?mN%kVTsDJho~r zlGd6t>gHn=qAivdLGf=%}URR%xuQ~%EsW=eUaLy^1dIU#~US3j-0~kFw@#CJKmIJH7 zzOrZfmx_3&55^70%d79Wsft|8tQEs^OoYd3i}er8mD zNA1Kl6(dxOi=g%HhT!&&ube+xyz6(@S~=y5cPnoFTzT&l_MYfG%+9z&eYGl1gF!!} zg25cbj=|?++dGWS>;2sur&cC-Zs2Fe1nYvK_eLL~l)|SKD#y-l_9O9AQrYLX>xe&0 zxf3B1sP%0dGyCgA`FQ8cD^AczE_yMc$fK$Ie-$i_R&d-qK>ZNeALOS~O#tTe)YFiI zW6&KW|IR!G@GsDeq{1(}&~HGz&bMt-J(J=%5-Cai;khUQG8=Qo17)Y~FX$k5A>6dG zm?+`GSRr<=0x$>f_la@w{OHuf!?-OPMRU^a!I=N`7NyUA*+C^WEEOB#Q62V#x$&sA zD!V=Lc4+tY*qq5Mxx{^_LIxj%=b+h+}?n4f0deWqmWkCc*&I01n*^x4!>=fZi!N4yOb`l59le>CMXGY@Diy` zLY+eXpZ3l=EXsCW`yiqSNJuInjRO+W-N?{7l+q}ONQrc}l;F@E;?RwB3W6X=r!(SRgeUdk!O8o?B8Gud0cPmuIME5Zz4Y^_*mK_p`-x833 zul&nBe{0aF-GZ%f=Hx$*u1)^(1`Y_fbYzTLstB>sc;MAY(mmhS2Xux^AQtTWz}N2$ zf{&k{X12kV_iSU)R})l5mE?>wNu-A9twYzugmz^f8D3<_;wo_O6?Ifa#<*C*_EsaB zKDX;fw;}<%H!y9)!}{7&9g!7#eRt%*C`8~Th1kMZ`|lHZvRBlzI^E(kc1%^3?s{6$ z2M!_Em1+zDGMbJKM|6^#ewAjuU=Bd0E%M6}c$4*0^nxp5B$*gT#)K%;tKJmC+s&oJKJk zzDvbf4%V)pK_bHX@4CcB>dYcucbMKIpj-dGkpy(Q_vUQQiB zUQoH5DwuMAb9a9Bm~;~GW5v`Nk-1RJwPBs6_ISeR=o2TVD4ut2m!@^xjoNxM>h?eN zjBnO4I|5nN>s$|N`tKUgs+%gb5;^qZ!7y@HpQJp@^skfe>=hg}cE8M`suPk*XEcz{D0cm5SSY)GWW&U}!xS_+t;-|QHCtzN zY{Rm9@N`>4?1E+JlU<83&x_+|a`Y}JM=<*vTI`U4^dY6ebt!%P;Z?YZmXpVbPW@}N zvaXB_%ogK%wdme(QR6!IJ?QQ0l@_=!!vHdCd0I@Lb*ztwPNC7Le^-XzXGcCjjCIDiLgv()%FZwcvx{`Rz2%ND0&&qgI;NLWS$ich?|R)xzNvc+l}?mYN!{Z4M1{ ztL?@^Z>)+B&1#;3Gx&(~BlU#4p`63JaxX^Cyz|n=UzEJ z!Yemuu6fWkWZlmwu9}?80MCq%2fH{KT-g+f5k@nlmvTo8Azk~%37rlA(O}-=e&d@l zJg8l?kO7>*zo&A{NV&w?Gn{kNvn=#c=hGR>XdIYnsN7C>g~|iV{e6ODaBra~_CW!> zZ2STbBG$>0Dur1i5civnFX8l z#NB(Z$RI7aIF>`$5d3Rm#$Jc5TZ1da|!ttZEpRV|ISa zXn4`m-wJQl;6DB72A6{~T-4gT8D6D#-Ua%^IPP+KixRZhC<)()E)X-6VsmFA31D%U zL3E~U=rv6%b}XEd$#0-@9W|NN!w)Ifh8p3KT!wtt33;hl(w;ulK zt?Q~pu>*s9&WB7%YsA=3ij2N6A)uaC)xSz&)3NK5g6lJh73PPN zxPzwEy~A~RFSd)7nF1Ma!w9$Z+3GxeLQU_@VT+gbS`dF+m=;HAzQuT>(u}N6HN6Eb z(Fr0QDjA<%IC$K%@|)D#{<`RBpj@+3CCt>*Nbapn^t}8r2*1}}M5V;&SKM@*_|-ia z-;W{6zJ#a|=C}*byvVVwI(q$3upcZv$g25jW-$%so0VUAW%e4}D(Dk=hEhlMQ@YK| zpnS5uiQ8W`^;3kBZF(LWq5#{(fDO(Th(U|i?;F^KN=RerazhC?BzAosrcfEIT&J^6cc{O4wuicILWpQcm zz}0b8ZkO|e1{O;*h zcYodlo*$r{rfO-)u9_+m3$kDpfT+EyXW3c=E!3XuMZN&h->TB!*>?5Z3@>hQzx~td z)NY@hCFv`3dC<&4nT;|+4C)--J^t`)0Fsj;Lw#>PCoeCg>9BF@p`G15>xqhx-?oHY zx2?}qwg~h@`V49R=Vz#WTtmuT9m-F zXB9cPL&V$QPHPN%-Twm134sQ*O zTVlC=c}dD)+8nU(70msrsDU>ByM(J;Vkb`)#jgafxsE&>NcxHU+1UHw zW8d47Mx~akk}yInZ2!=Pv1;%1&MmR&Pw>`#UP|U9)O?H44tk9p)(%|g=39L%SQ6zxuaw0Bv{ z{I(C=`8b!`IZ~Df0Zl&x`(K(ecaNG9?t?=R3UC$XXH6K=(~eiT>Q%?K;&)I zwr|2Haa=O^#(X+wPthilQ2f$M)HedTP2_Y!O|rcodXv42v|z!!nm0xrAlwJ=`oI;~ z6W<4SOSoTJ%~mm%JITFWed$)UkSgAz`RY&zTdc~`2z}f}gLcz{0;3v6=9&H@^P{qC zGRy-EK>A|dcLjD?z;ulT2NoI{ni(2xL3f4n%fbTRTS_!EMO+LmDu#d^iNX;+q?|j} z#Q&oHw$43)8$~#`H1|7Y#h~-Po_a$9w;ZO8`A0N#A7=*)Q?yOUgl>F)SY@q*6n{v- zo1?LZU0OCl(>=+82(}S8P~KD6`{DGg`VP?mzbSNJ19Tug4+gJ6r0Fshj6hrBFkU%i zR>9ra8QJb4j!078Cr^1hcOBGu z;F%@-1LEdS)!{iBfuy`J=Eyu3bQb&M3dh6EM|kG06D)>IXaF!IfzNRoo#PHB79!ki zX09R@?;jS9fxc3-7(Sd~h*;VZ2Jl)4(kMZA8=@ zZ3adV;vo`5eZEeMztRq}W(w5ebv`z|1w#OF9QX@R;bc1PuD)av4f$%IS~mvH#jv8g zKJIA1`FcB0=UIw`?y^pdERHH~*arbwho|x`JM$>QxQ?~itpe;OLABxOOIodsh|?Q2 z=^ULLR=Q@yIf-36Z+R08Gks%t^5W#YS^Igw2TaJLaHtY)X5c+AJ-f#;5C_5o?RHS8 zQ>z+bB+AzD5CNIq-wzUj%m%0oq?wq{8oQ!zfDa<#jvUlZ1HaZ&8zYi5y^+L<3T<}o zO|vxNr)!r%2r2&ADFst^c7lq2O)arDx@I5(4h;l{{?SGK^R<}Ee=puoy8jR2{gTc5 z@SP?j2WIi(agqGu$8sAJ*hXs=R@?e@-Ys`;-COGou!t7mzJ4=;MpUWt5Ba;yif4{{ z@_`cBluhzMb_)VZ3~pk6aOfyI8_#$mTjpgAi=jqJo!RYA9pjT}J>O^QUd95=Lws|a zu9VUxs0q|KWA&GEUx*yRq(O@%^~vws-tPAS;!eehV0`n^z!>~w;txkI@K=kS#JZI) z$W&8=?onh1W~xbr)+d)^Lh52A()QMhpSURn|B~;&{hW9QT%@aDRh)bNOSA6<@4Rxs z8F~52P0Z+fOR;;MYq1q3J!~k$w`mR+`LnPw8*}-Lg$I@ep?6gZ)0D$rC67c?#A!8i$udv2qc`&W#`>9TIyGY3 zc}8xlVV~uC!}m2PtuH~I^L#l!pTl?$+a-j8%&ptr0YEry+tzFO`aH#J_NYGhKxr@d z`oQj%tG&5ghl`_qa+W9BJkjjA9&<fpkBm!+66TNuG=G@CI3Et(c5@YbunuoE2+Z+1+JCHNMw~l<8T=m9 zzr!Sis3Z6G1ENjm&!8H*Y>zTU&d6dbEA+ ztz>U%LN6WaTEL^+%Ua7r*;&+uI^#QS(wFFLUe~x=Mi;1VTba0NG8cK`cT}NMypWV` zAge>r#Pf=g*Jv+iwwRi4`jZn9{yzOl!7O=C&KP;?ds*Bj!qx>lDJ5#?z|{x5Q6Igz z3euH-E93L{Z-&q@YrEc1JbrsF{{mw)Cs=4Fv*fV9MJD|ja=i9KC_*LO+32eUJ(zNO z@}yo)|Hv~!^J}4H2AV8Oh0cVN$irZPqcg&ht9xAOa&PFim5L{PG%-4TzJcl4e31yC}Aw8&*_^8WtOdeoz&P{ZsUfsB1pN5 z)|CVIXPl+4NgKs(KF5YVkyg74{kAiR_t4-?&Qlj6iq1I;+Q(J}3`DUXR)NolcE3r& znmO=G45|9S>akfDvjMzy;8uvzwq~?SGL0z8RntM?ET_(eGu8FL)p{ZxAMsm$AWgT-3qYkynq%Ee#p&RN}CFgP7mwP6ImuzW_ zg>-MQ?=U)EZj3K65dQ<;W(^Cay53BU7b}{$ zJmH?lyZDZ&vLnt+EnLdNOzp)}bQy73DpbNH^m&=n;m=l8%qwY4xk-)inW^JCT}rkg ze(#xc);b%Pd!8r5#V>xYu#!QqO}IXlfdco#p=0uX9xWziqIMLLwwG~8s(~?Qb*BKi z+b0q3N9rqR!zDYhm5wsF?6S9dm8&;ZQ$#5-Fa6?4RUe1~GLBYZlBbjBYkA>y!j&%* zNV=HZu#58riSl=`I#CxwVx0e zy^7bOtDCCGKt=Y&r7sre4Q=cfmn^ve67y-zD?kWE5tIC$a@kK?k=?w+G<&&~lZ>_Z z_0R=cZ7k$o95@1IT1S3o!u)R*kad$GOc$2tGCs(=DBB98IgHw-B~h6+HwB8|<2jJ* zV+kk5Pu3S%B#Vr(%wR#k-0xHKra>Pv$~~5q8K@wrBm3Z)>`&8Vzh~?2)PB&COwV@r zHt-kGmDJ;oYq({!0v^|=X#^~n?s|+H^aESu_WjKrbewMOy64SeAUk4t33#^mqQd#M zy4z;niiN>~ZdA0kVDYzHwxu!Ck8_x&pqyQEJSLo3Zc0w`bQ`R=Hl!&b{D4C{?1-)s zU67rzS{oAG!I}=%Q|NC^3S<-YM#idgW>pr9!oJ1uB3v2gbqbl@?~dgQeWj##dokS z2{eyTTevqkCGD@|-#$H>C}7Ar>l(pEZ4CT~e)P{_IWZyy2xTIPME!9j@o~ixK#4WE z%pmGzB&lTB?0x^DMEKi@%^v0p?`|)x4t~k{*TvKxS(k9U-1J{X-h$_hsao1sGQ z6~yt!Fg?=ginLPIiS%8%ff5K#o&B|cs&L1FEsi_!y1gSzvXU+g`(ecZsfyo0#$Vdm zXabd)+}5V6jzRC?T+~%0>br?j@I+pvrg%w`3sIP}{ZFiYc1^i3b8XmRbWvsRzY!H&|0XD0%S8Ka|2}ts{%o+^A4z?`>jF=XF zlzn6GmRaci&C2=1#YzVm+<11Z;my?gyjdTwy&XKRp}F-dL$*LgWy!twJmlK#<(`&h zi&7r(RxJ1ao|bR9l6Pc?w(c*xz@wBz9WRpboUpZBl_1e)Qt5_6>)=ZT`sKfLD@PN8 zsT+V!D<+o;*l~L?>i14B=PsgCF(@IjU&`! z2vgd3#7fZ~?99~j3eRzgGKg+HrWA4EP#Uu_$g3zKc##e#J79aN(Y%||m zc=k;1=Y}54&b7r7o{wRlb~E~g_M-nKLpE;DP9?4lzlSZj;C6|b6(c%hR404xVv2_e zgE@1`c9kl3Yj9x*zw=SR6+3ss!i0&v3eEz;VX06voohyDqkrE&@&fSMzhnvjn_mOE zrGLcdWz{y*N|s~AStyj-i`k||Bek+pG(ay~WtmsG1mI}0I=uYJHm+{DF=QGz=YGp( zv-K8uVot`(F5VsUE)Vt3OTL>qb2HB+KY8N(hiq45J!!-JqNL(<#*DAhhRtzxyk%w% zKl`g7^h<`OAsKpob{U}`vcLzxT=ic9^EDpj0EBwFM1{xkE5JD^iu9@@4mPKid7pg+ zRQ$>S$ec#Dk*L3sgVi}8Wj;LX`87O2yZCg?D-&e2lbL{EA8Tt4Egc;VaGsjNry$g) ze-Ak{vXxStn|o)+4G?3B0SfdgyRRO-A`Iva0q{dH@st7i$4EjDtvfu;c1rHA8?m~K zl|VK(dW$3eh$o#=u6A2t7Uy5F(VFHYL&0#`Tq9|8>W{Nl+`xE!$Ndn~`?NK0(Ag}` zF&BQD1&wbx8;&laHrB2WpK9~=7#%31R{h^<2AF>&!+PGbb39z^c>;9&vMUjqKvx{w zXH;?O)sTB>%(J^clC4qh&5W15x+c>iTJG!^I(@_ByV#&jqx4|fwGoo2@>#1Dpo-Qk zE9__!Wz9cOPOeqe@7rl7e6T*Nhx7~M%)mM!>@O&3xE1@kNU&~&KpB(;fqGvx_|S^| zpC2<`o$ZgYbKevy;o&yLz#z6&l#|wcX}sv1wrTQz5_^E_LB)RgE4?p%Zy#FZ?^c_Y zm9@9}{%!tg%3I&S`QUZ9Lv0!ZI&5!B@+>V51E{~>{O{rGif12K;jMJ7bNy3Og_%si zPmu-YQLr{F&PG0Aw054v_d#)4&i@qodkv?Cct5xBqY@sQ+>$#0*QSrmX;x zhH$4j)P!!%cTgXT!F%@vC^GKwe8hbTC2b8LHh{cN1o1#x0r#^LFH<87?TMDf%u_t? zQ$a(pWy%^_Kb8RjdFKSNc;+YMw+QD0K8-Ka3#32Ci#_krw~u-$b~q|eK2rBg4I$7D z(j&~niIoe(faG}}e8nyUIXeDt2yOyDDEY)1dZkYN0^8^NnPk%b`LwQ}S`jO?wVb%`Urbe)?FI_o7&yxB zksh?nK&-1u71GGsWwPuL+JqsXUH{Z|GJtfT=d)MHd2rfv?+q2DHnH1)V3Fr*9|H#y)Z7>V`ni-iP}P~3uN zn>%0l5YW_ebd8)OASl1FJTMP+x{+9)lzTXi4vk`dGb^YqfCVcw#Z^Jm41d0`sNQJn zeVhPDje5>1xa<gEBM<39R@>2#DRA*NKI{E6s zj%dSaEfw`^+vtI?$N?b|@CozaLpma8vR-3_a|*h+s(^X&{}lwo$)#O`vhz*4p4m_+`Jua3TISX@b# z(}wz+$6N~5NAu9G0LX{D?7EbyhP_LQ5+T`q66ZLoY&2PeocdL-i<=J2^Tn#hJX9w> zmzcAoZm>tS8r}DxTFsOXTMZylqm!5>R=2{&1x_Hru%4o>!YHrx(y2@8Xk(CGL9$)DnIlQP zgd;#92u#Wi=vV*^V_9HjDfu>crz;xpMPkAbo4bF5;gul>?z;)_CvhFe!fdSr*S)sF ztyLc`*?TgRX;_^9=*o5En35OnB4PK7CV&n|UYfEgv)^*>d3eio=Y1<2=B{8wCQK}J z{5_0IGuC5C*eE}c^^q?)o!WxW4K?bLf&sCE>RMz=Xc7Su8z!7rqhE+Ly zYh5^}n~mf0y<`{I9}em0Z=&JiDES1>{|v+ymE~+cedUJCivMwAMKQn7fIC;+^(Eqy zKvbssB#1^kH^_eQBsNFAx@|WW!JCt~=O)|?7FGKeu20JGw(0k8|8SQqmtY+_gBbow zZ5f`JNU|W2F#w~7VXp};uA##a_n`M1EQn}qzD`EmG_A6od^hUanFR7-aGfdBI8n;G zMrh+0o8MqfS>#0exZxQ`{I#TN;_g|Ik1s2jv= zY_#!kv=vA5L(0}vL7p~tSs6w+1%CGvleEpQJ%4_P4K4-4--DF+vwMhDOBhoRwm268 zY{=Q?K|eR*&C72F^^RGPHn_Vy#MlDZ!!ucWR zs4H>u>~MbQz}Va1YiL02!-4T>=-!_ZFP2bK%pd917a_$|hAOiXm6nStH%MSL1dSe^ zf)NsA2!ELejR4GVfd*GT%WtXpn?l>jSFX-|geBC7!EH4uS zEt|B}Z@u2hfR0tU%KGKTUbgOkiDo{TDTIzA6#VrMy$jzdA+0^tr zwysX=konH=Y4Q1=B!x^}w-8c4rr3zKevR)P7YqasQ!S}bkB1m5h{Z70-DVn}nA5|u z7xZc=j`n<@0%^{eqHgD=s8G@sc4y}0r*O&>va2dRyPZ9X1#4a8xT=QcGc)^i{J7GOTdML46M(AS}MLxi9r$Oxs9Oh`Z*{S zh7$#LIzt*y5391RBM(6k#_D!1BMmOt1&L_lOWy?lm~{yYtCu4B zA89k|AGCQ@#c%-!FTT=a8*7acW$5XiC;JV!FtbHUIjHupx~eiTQ5bq)o;mGbdEr&? z7}+jRwmJCPX=AcL!}d7WTe$oeh)$Fs>0!}S&p3Ip#n^OhV(<23rfuT0X4<18TVmBG z-`Jd<)y?A5iX_-jdFdgV(en6LxTX3Q8MAC>KQ?3e057N}#C|2~dl$q~fAQ;-&;-eX zyM9u*goZcN%!%fME|yy?)kdZSC`PwWP?6gxabA>n%a<9;_k=*^ zN-uSc8D@P9P|u5<#fCk4JE?Sk2o!A-713|MP=G}__P`lH>da`@MQ;Yt4+FM)NZM)1 zC&^x(EfMOsKzV8ea&no$zU}c1k+Bi*;fc|~89F{B58!!jV2zQkw~Bz&qm3nZ)RMPH z*w#CBf`NMX8LjxA;x&uXbxke_FJ5j8>*lBFsNz#@+u(R*cm;;$CMb08HVUBJGoMtg z5$bY9uhw{6DdO#wcz+$S>el8b=Sw=>_lFgEP@tN5vheA>jjyK71N7}FYsAf8V)|xm zt%k+SKsUL19ijhTBB3iOIL}c-tIh9f6v(PYc*t@S zMtNJDhv{;)632WF4<9~nJ6N-=tsDnN0V$>#sg!V(e?uG|^qwz=7W9i|vourt25-ca z7YMG{9^$g7#s!TUV3WM@WiI{F zf9DyMtVs*C<(lKP4>Z7nlkTNhuLZu{ich6OPX)O|2R~@UQvVC1dma;*YCvDocIt204m^W)L ztp+4(-3c#D55CI0tV;q4b*)qGH{2E6Qrnz26W8W(B565ipbzRew?LJYMO`7N9 Date: Thu, 13 Oct 2022 16:45:25 +0800 Subject: [PATCH 137/150] add det-demo-tmi.md and docs --- .gitignore | 1 + det-demo-tmi/Dockerfile | 25 ++ det-demo-tmi/README.md | 269 ++++++++++++++++++++ det-demo-tmi/app/start.py | 223 ++++++++++++++++ det-demo-tmi/img-man/infer-template.yaml | 12 + det-demo-tmi/img-man/mining-template.yaml | 11 + det-demo-tmi/img-man/training-template.yaml | 13 + det-demo-tmi/requirements.txt | 4 + docs/det-demo-tmi.md | 1 + docs/ymir-executor-version.md | 4 +- 10 files changed, 561 insertions(+), 2 deletions(-) create mode 100644 det-demo-tmi/Dockerfile create mode 100644 det-demo-tmi/README.md create mode 100644 det-demo-tmi/app/start.py create mode 100644 det-demo-tmi/img-man/infer-template.yaml create mode 100644 det-demo-tmi/img-man/mining-template.yaml create mode 100644 det-demo-tmi/img-man/training-template.yaml create mode 100644 det-demo-tmi/requirements.txt create mode 100644 docs/det-demo-tmi.md diff --git a/.gitignore b/.gitignore index 5563689..2c245d8 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ yolov4_training/yolov4.conv.137 yolov4_training/build_docker.sh yolov4_training/dockerfile_tmp yolov4_training/yolov4.conv.137 +det-demo-tmi/voc_dog diff --git a/det-demo-tmi/Dockerfile b/det-demo-tmi/Dockerfile new file mode 100644 index 0000000..9a742a9 --- /dev/null +++ b/det-demo-tmi/Dockerfile @@ -0,0 +1,25 @@ +# a docker file for an sample training / mining / infer executor + +FROM python:3.8.13-alpine + +# Add bash +RUN apk add bash +# Required to build numpy wheel +RUN apk add g++ + +COPY requirements.txt ./ +RUN pip3 install -r requirements.txt + +WORKDIR /app +# copy user code to WORKDIR +COPY ./app/start.py /app/ + +# copy user config template to /img-man +RUN mkdir -p /img-man +COPY img-man/*-template.yaml /img-man/ + +# entry point for your app +# the whole docker image will be started with `nvidia-docker run ` +# and this command will run automatically +RUN echo "python /app/start.py" > /usr/bin/start.sh +CMD bash /usr/bin/start.sh diff --git a/det-demo-tmi/README.md b/det-demo-tmi/README.md new file mode 100644 index 0000000..4d08199 --- /dev/null +++ b/det-demo-tmi/README.md @@ -0,0 +1,269 @@ +# ymir 用户自定义镜像制作指南 + +## 目的 + +此文档面向以下人员: + +* 为 ymir 开发训练,挖掘及推理镜像的算法人员及工程人员 + +* 希望将已经有的训练,挖掘及推理镜像对接到 ymir 系统的算法及工程人员 + +此文档将详细描述如何使用 ymir executor framework 开发新的镜像。 + +![](../docs/ymir-docker-develop.drawio.png) + +## 准备工作 + +1. 下载 ymir 工程 并构建自己的demo镜像: + +``` +git clone https://github.com/modelai/ymir-executor-fork -b ymir-dev +cd ymir-executor-fork/det-demo-tmi + +docker build -t ymir/executor:det-demo-tmi . +``` + +2. 下载voc dog 数据集 + +``` +sudo apt install wget unzip + +wget https://github.com/modelai/ymir-executor-fork/releases/download/dataset/voc_dog_debug_sample.zip -O voc_dog_debug_sample.zip + +unzip voc_dog_debug_sample.zip +``` +运行上述脚本将得到如下目录 +``` +voc_dog +├── in # 输入目录 +│ ├── annotations # 标注文件目录 +│ ├── assets # 图像文件目录 +│ ├── train-index.tsv # 训练集索引文件 +│ └── val-index.tsv # 验证集索引文件 +└── out # 输出目录 +``` + +3. 配置 `/in/env.yaml` 与 `/in/config.yaml` + + * 示例 `voc_dog/in/env.yaml` + protocol_version: ymir1.3.0之后添加的字段,说明ymir接口版本 + + ``` + task_id: task0 + protocol_version: 1.0.0 + run_training: True + run_mining: False + run_infer: False + input: + root_dir: /in + assets_dir: /in/assets + annotations_dir: /in/annotations + models_dir: /in/models + training_index_file: /in/train-index.tsv + val_index_file: /in/val-index.tsv + candidate_index_file: /in/candidate-index.tsv + config_file: /in/config.yaml + output: + root_dir: /out + models_dir: /out/models + tensorboard_dir: /out/tensorboard + training_result_file: /out/models/result.yaml + mining_result_file: /out/result.tsv + infer_result_file: /out/infer-result.json + monitor_file: /out/monitor.txt + executor_log_file: /out/ymir-executor-out.log + ``` + + * 示例 `voc_dog/in/config.yaml` + ``` + class_names: + - dog + export_format: ark:raw + gpu_count: 1 + # gpu_id: '0,1,2,3' + gpu_id: '0' + pretrained_model_params: [] + shm_size: 128G + task_id: t00000020000020167c11661328921 + + # just for test, remove this key in your own docker image + expected_map: 0.983 # expected map for training task + idle_seconds: 60 # idle seconds for each task + ``` + +4. 运行测试镜像 +``` +# 交互式运行 +docker run -it --rm -v $PWD/voc_dog/in:/in -v $PWD/voc_dog/out:/out ymir/executor:det-demo-tmi bash +> bash /usr/bin/start.sh + +# 直接运行 +docker run --rm -v $PWD/voc_dog/in:/in -v $PWD/voc_dog/out:/out ymir/executor:det-demo-tmi +``` + +## ymir 对镜像的调用流程 + +ymir 通过 mir train / mir mining / mir infer 命令启动镜像,遵循以下步骤: + +1. 导出镜像需要用的图像资源以及标注资源文件 + +2. 准备镜像配置 config.yaml 及 env.yaml + +3. 通过 nvidia-docker run 激活镜像,在启动镜像时,将提供以下目录及文件: + +| 目录或文件 | 说明 | 权限 | +| --- | --- | --- | +| `/in/env.yaml` | 任务类型,任务 id,数据集索引文件位置等信息 | 只读 | +| `/in/config.yaml` | 镜像本身所用到的超参等标注信息 | 只读 | +| `/in/*-index.tsv` | 数据集索引文件 | 只读 | +| `/in/models` | 预训练模型存放目录 | 只读 | +| `/in/assets` | 图像资源存放目录 | 只读 | +| `/in/annotations` | 标注文件存放目录 | 只读 | +| `/out/tensorboard` | tensorboard 日志写入目录 | 读写 | +| `/out/models` | 结果模型保存目录 | 读写 | + +4. 镜像启动以后,完成自己的训练、挖掘或推理任务,将相应结果写入对应文件,若成功,则返回 0,若失败,则返回非 0 错误码 + +5. ymir 将正确结果或异常结果归档,完成整个过程 + +## 训练、挖掘与推理镜像的通用部分开发 + +app/start.py 展示了一个简单的镜像执行部分,此文档也将基于这个样例工程来说明如何使用框架来开发镜像。 + +关于这个文件,有以下部分值得注意: + +1. 在 Dockerfile 中,最后一条命令说明了:当此镜像被 ymir 系统通过 nvidia-docker run 启动时,默认执行的是 `python /app/start.py` 命令,也就是此工程中的 `app/start.py` 文件 + +2. 镜像框架相关的所有内容都在 `ymir_exc` 包中,包括以下部分: + + * `env`:环境,提供任务类型,任务 id 等信息 + + * `dataset_reader`:使用数据集读取器来取得数据集信息 + + * `result_writer`:写入训练,挖掘以及推理结果 + + * `monitor`:写入进度信息 + + * `util`: 常用函数, 如`get_merged_config()` + +3. 使用 `cfg=util.get_merged_config()` 可以取得默认的 `EasyDict` 实例,这个实例的`cfg.ymir`来源于文件 `/in/env.yaml`,如果出于测试的目的想要更改这个默认文件,可以直接更改 `settings.DEFAULT_ENV_FILE_PATH`,但在实际封装成镜像的时候,应该把它的值重新指回成默认的 `/in/env.yaml`. `cfg.param`则来源于`/in/config.yaml` + +4. 在 `start()` 方法中,通过 `cfg.ymir` 中的 `run_training` / `run_mining` / `run_infer` 来判断本次需要执行的任务类型。如果任务类型是本镜像不支持的,可以直接报错 + +5. 虽然 `app/start.py` 展示的是一个训练,挖掘和推理多合一的镜像,开发者也可以分成若干个独立的镜像,例如,训练一个,挖掘和推理合成一个 + +## 训练过程 + +`app/start.py` 中的函数 `_run_training` 展示了一个训练功能的样例,有以下部分需要注意: + +1. 超参的取得 + + * 使用 `cfg.param` 取得外部传入的超参数等信息 + + * 每个训练镜像都应该准备一个超参模板 `training-template.yaml`,ymir 系统将以此模板为基础提供超参 + + * 以下 key 为保留字,将由系统指定: + +| key | 类型 | 说明 | +| --- | --- | --- | +| class_names | list | 类别 | +| gpu_id | str | 可使用的 gpu id,以英文逗号分隔,如果为空,则表示用 cpu 训练 | +| pretrained_model_params | list | 预训练模型列表,如果指定了,则表示需要基于此模型做继续训练 | + +2. 训练集和验证集的取得:使用 `cfg.ymir.input.training_index_file` 和 `cfg.ymir.input.val_index_file` 取得训练集和验证集的索引文件。索引文件中每一行为图像绝对路径与标注绝对路径,以`\t`进行分隔。 +``` +from ymir_exc.util import get_merged_config + +cfg = get_merged_config() +with open(cfg.ymir.input.training_index_file, 'r') as fp: + lines = fp.readlines() + +for idx, line in enumerate(lines): + image_path, annotation_path = line.strip().split() + ... +``` + +3. 模型的保存 + + * 模型按当前正在进行的 stage name,分目录保存 + + * 在 `cfg.ymir.output.models_dir` 中提供了模型的保存目录,用户可以使用 pytorch, mxnet, darknet 等训练框架自带的保存方法将模型保存在此目录下的以当前 stage_name 命名的子目录中 + + * 例如,如果需要保存 stage_name 为 'epoch-5000' 的模型,则需要把这些模型文件保存到 `os.path.join(cfg.ymir.output.model_dir, 'epoch-5000')` 目录下 + + * 之后,可以使用 `result_writer.write_model_stage()` 方法保存训练结果的摘要,这些内容包括:不带目录的模型名称列表,mAP. + + * 也可以使用 `util.write_ymir_training_result()` 方法保存训练结果,它的兼容性与容错性更好。 + + * 需要保存的模型实际记录在`cfg.ymir.output.training_result_file`中,ymir将依据此文件进行文件打包,供用户下载、迭代训练及推理挖掘。 + +4. 进度的记录:使用 `monitor.write_monitor_logger(percent)` 方法记录任务当前的进度,实际使用时,可以每隔若干轮迭代,根据当前迭代次数和总迭代次数来估算当前进度(一个 0 到 1 之间的数),调用此方法记录 + +## 挖掘过程 + +所谓挖掘过程指的是:提供一个基础模型,以及一个不带标注的候选数据集,在此候选数据集上进行 active learning 算法,得到每张图片的得分,并将这个得分结果保存。 + +`app/start.py` 中的函数 `_run_mining` 展示了一个数据挖掘过程的样例,有以下部分需要注意: + +1. 参数的取得 + + * 使用 `cfg = get_merged_config()` 取得外部传入的参数 `cfg.param` + + * 每个挖掘镜像都应该准备一个参数模板 `mining-template.yaml`,ymir 系统将以此模板为基础提供参数 + + * 以下 key 为保留字,将由系统指定: + +| key | 类型 | 说明 | +| --- | --- | --- | +| class_names | list | 类别 | +| gpu_id | str | 可使用的 gpu id,以英文逗号分隔,如果为空,则表示用 cpu 训练 | +| model_params_path | list | 模型路径列表,镜像应该从里面选择自己可以使用的模型,如果有多个模型可以使用,直接报错 | + +2. 候选集的取得 + + * 进行挖掘任务时,所使用的数据集是一个没有带标注的候选集,可以使用 `cfg.ymir.input.candidate_index_file` 取得挖掘数据集的索引文件,这个文件中每一行为图片的绝对路径。 + + ``` + with open(cfg.ymir.input.candidate_index_file, 'r') as fp: + lines = fp.readlines() + + for line in lines: + image_path = line.strip() + ... + ``` + +3. 结果的保存 + + * 使用 `result_writer.write_mining_result()` 对挖掘结果进行保存, 结果将保存到`cfg.ymir.output.mining_result_file`,ymir将依据这个文件进行新数据集生成。 + +## 推理过程 + +所谓推理过程指的是:提供一个基础模型,以及一个不带标注的候选数据集,在此候选数据集上进行模型推理,得到每张图片的 detection 结果(框,类别,得分),并保存此结果。 + +`app/start.py` 中的函数 `_run_infer` 展示了一个推理过程的样例,有以下部分需要注意: + +1. 参数的取得:同数据挖掘过程 + +2. 候选集的取得:同数据挖掘过程, 也是利用文件 `cfg.ymir.input.candidate_index_file` + +3. 结果的保存 + + * 推理结果本身是一个 dict,key 是候选集图片的路径,value 是一个由 `result_writer.Annotation` 构成的 list + + * 使用 `result_writer.write_infer_result()` 保存推理结果, 推理结果将保存到`cfg.ymir.output.infer_result_file`, ymir将依据这个文件进行结果展示与新数据集生成。 + +## 镜像打包 + +可以在 `Dockerfile` 的基础上构建自己的打包脚本 + +## 测试 + +可以使用以下几种方式进行测试: + +1. 通过 `ymir-executor-verifier` 进行测试 + +2. 通过 ymir web 系统进行测试 + +3. 通过 ymir 命令行启动 mir train / mir mining / mir infer 命令进行测试 + + diff --git a/det-demo-tmi/app/start.py b/det-demo-tmi/app/start.py new file mode 100644 index 0000000..2b8e877 --- /dev/null +++ b/det-demo-tmi/app/start.py @@ -0,0 +1,223 @@ +import logging +import os +import random +import sys +import time +from typing import List + +# view https://github.com/protocolbuffers/protobuf/issues/10051 for detail +os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') +from tensorboardX import SummaryWriter +from easydict import EasyDict as edict +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import get_merged_config + + +def start() -> int: + cfg = get_merged_config() + + if cfg.ymir.run_training: + _run_training(cfg) + if cfg.ymir.run_mining: + _run_mining(cfg) + if cfg.ymir.run_infer: + _run_infer(cfg) + + return 0 + + +def _run_training(cfg: edict) -> None: + """ + sample function of training, which shows: + 1. how to get config file + 2. how to read training and validation datasets + 3. how to write logs + 4. how to write training result + """ + #! use `env.get_executor_config` to get config file for training + gpu_id: str = cfg.param.get(key='gpu_id') + class_names: List[str] = cfg.param.get(key='class_names') + expected_mAP: float = cfg.param.get(key='expected_map', default=0.6) + idle_seconds: float = cfg.param.get(key='idle_seconds', default=60) + trigger_crash: bool = cfg.param.get(key='trigger_crash', default=False) + #! use `logging` or `print` to write log to console + # notice that logging.basicConfig is invoked at executor.env + logging.info(f'gpu device: {gpu_id}') + logging.info(f'dataset class names: {class_names}') + logging.info(f"training config: {cfg.param}") + + #! count for image and annotation file + with open(cfg.ymir.input.training_index_file, 'r') as fp: + lines = fp.readlines() + + valid_image_count = 0 + valid_ann_count = 0 + + N = len(lines) + monitor_gap = max(1, N // 100) + for idx, line in enumerate(lines): + asset_path, annotation_path = line.strip().split() + if os.path.isfile(asset_path): + valid_image_count += 1 + + if os.path.isfile(annotation_path): + valid_ann_count += 1 + + #! use `monitor.write_monitor_logger` to write write task process percent to monitor.txt + if idx % monitor_gap == 0: + monitor.write_monitor_logger(percent=0.2 * idx / N) + + logging.info(f'total image-ann pair: {N}') + logging.info(f'valid images: {valid_image_count}') + logging.info(f'valid annotations: {valid_ann_count}') + + #! use `monitor.write_monitor_logger` to write write task process percent to monitor.txt + monitor.write_monitor_logger(percent=0.2) + + # suppose we have a long time training, and have saved the final model + #! model output dir: os.path.join(cfg.ymir.output.models_dir, your_stage_name) + stage_dir = os.path.join(cfg.ymir.output.models_dir, 'epoch10') + os.makedirs(stage_dir, exist_ok=True) + with open(os.path.join(stage_dir, 'epoch10.pt'), 'w') as f: + f.write('fake model weight') + with open(os.path.join(stage_dir, 'config.py'), 'w') as f: + f.write('fake model config file') + #! use `rw.write_model_stage` to save training result + rw.write_model_stage(stage_name='epoch10', files=['epoch10.pt', 'config.py'], mAP=random.random() / 2) + + _dummy_work(idle_seconds=idle_seconds, trigger_crash=trigger_crash) + + write_tensorboard_log(cfg.ymir.output.tensorboard_dir) + + stage_dir = os.path.join(cfg.ymir.output.models_dir, 'epoch20') + os.makedirs(stage_dir, exist_ok=True) + with open(os.path.join(stage_dir, 'epoch20.pt'), 'w') as f: + f.write('fake model weight') + with open(os.path.join(stage_dir, 'config.py'), 'w') as f: + f.write('fake model config file') + rw.write_model_stage(stage_name='epoch20', files=['epoch20.pt', 'config.py'], mAP=expected_mAP) + + #! if task done, write 100% percent log + logging.info('training done') + monitor.write_monitor_logger(percent=1.0) + + +def _run_mining(cfg: edict) -> None: + #! use `cfg.param` to get config file for training + # pretrained models in `cfg.ymir.input.models_dir` + gpu_id: str = cfg.param.get(key='gpu_id') + class_names: List[str] = cfg.param.get(key='class_names') + idle_seconds: float = cfg.param.get('idle_seconds', 60) + trigger_crash: bool = cfg.param.get('trigger_crash', False) + #! use `logging` or `print` to write log to console + logging.info(f"mining config: {cfg.param}") + logging.info(f'gpu device: {gpu_id}') + logging.info(f'dataset class names: {class_names}') + + #! use `cfg.input.candidate_index_file` to read candidate dataset items + # note that annotations path will be empty str if there's no annotations in that dataset + #! count for image files + with open(cfg.ymir.input.candidate_index_file, 'r') as fp: + lines = fp.readlines() + + valid_images = [] + valid_image_count = 0 + for line in lines: + if os.path.isfile(line.strip()): + valid_image_count += 1 + valid_images.append(line.strip()) + + #! use `monitor.write_monitor_logger` to write task process to monitor.txt + logging.info(f"assets count: {len(lines)}, valid: {valid_image_count}") + monitor.write_monitor_logger(percent=0.2) + + _dummy_work(idle_seconds=idle_seconds, trigger_crash=trigger_crash) + + #! write mining result + # here we give a fake score to each assets + total_length = len(valid_images) + mining_result = [(asset_path, index / total_length) for index, asset_path in enumerate(valid_images)] + rw.write_mining_result(mining_result=mining_result) + + #! if task done, write 100% percent log + logging.info('mining done') + monitor.write_monitor_logger(percent=1.0) + + +def _run_infer(cfg: edict) -> None: + #! use `cfg.param` to get config file for training + # models are transfered in `cfg.ymir.input.models_dir` model_params_path + class_names = cfg.param.get('class_names') + idle_seconds: float = cfg.param.get('idle_seconds', 60) + trigger_crash: bool = cfg.param.get('trigger_crash', False) + seed: int = cfg.param.get('seed', 15) + #! use `logging` or `print` to write log to console + logging.info(f"infer config: {cfg.param}") + + #! use `cfg.ymir.input.candidate_index_file` to read candidate dataset items + # note that annotations path will be empty str if there's no annotations in that dataset + with open(cfg.ymir.input.candidate_index_file, 'r') as fp: + lines = fp.readlines() + + valid_images = [] + invalid_images = [] + valid_image_count = 0 + for line in lines: + if os.path.isfile(line.strip()): + valid_image_count += 1 + valid_images.append(line.strip()) + else: + invalid_images.append(line.strip()) + + #! use `monitor.write_monitor_logger` to write log to console and write task process percent to monitor.txt + logging.info(f"assets count: {len(lines)}, valid: {valid_image_count}") + monitor.write_monitor_logger(percent=0.2) + + _dummy_work(idle_seconds=idle_seconds, trigger_crash=trigger_crash) + + #! write infer result + fake_anns = [] + random.seed(seed) + for class_name in class_names: + x = random.randint(0, 100) + y = random.randint(0, 100) + w = random.randint(50, 100) + h = random.randint(50, 100) + ann = rw.Annotation(class_name=class_name, score=random.random(), box=rw.Box(x=x, y=y, w=w, h=h)) + + fake_anns.append(ann) + + infer_result = {asset_path: fake_anns for asset_path in valid_images} + for asset_path in invalid_images: + infer_result[asset_path] = [] + rw.write_infer_result(infer_result=infer_result) + + #! if task done, write 100% percent log + logging.info('infer done') + monitor.write_monitor_logger(percent=1.0) + + +def _dummy_work(idle_seconds: float, trigger_crash: bool = False, gpu_memory_size: int = 0) -> None: + if idle_seconds > 0: + time.sleep(idle_seconds) + if trigger_crash: + raise RuntimeError('app crashed') + + +def write_tensorboard_log(tensorboard_dir: str) -> None: + tb_log = SummaryWriter(tensorboard_dir) + + total_epoch = 30 + for e in range(total_epoch): + tb_log.add_scalar("fake_loss", 10 / (1 + e), e) + time.sleep(1) + monitor.write_monitor_logger(percent=e / total_epoch) + + +if __name__ == '__main__': + logging.basicConfig(stream=sys.stdout, + format='%(levelname)-8s: [%(asctime)s] %(message)s', + datefmt='%Y%m%d-%H:%M:%S', + level=logging.INFO) + sys.exit(start()) diff --git a/det-demo-tmi/img-man/infer-template.yaml b/det-demo-tmi/img-man/infer-template.yaml new file mode 100644 index 0000000..b3d45dd --- /dev/null +++ b/det-demo-tmi/img-man/infer-template.yaml @@ -0,0 +1,12 @@ +# infer template for your executor app +# after build image, it should at /img-man/infer-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +gpu_id: '0' +task_id: 'default-infer-task' +model_params_path: [] +class_names: [] + +# just for test, remove this key in your own docker image +idle_seconds: 3 # idle seconds for each task +seed: 15 diff --git a/det-demo-tmi/img-man/mining-template.yaml b/det-demo-tmi/img-man/mining-template.yaml new file mode 100644 index 0000000..5927eca --- /dev/null +++ b/det-demo-tmi/img-man/mining-template.yaml @@ -0,0 +1,11 @@ +# mining template for your executor app +# after build image, it should at /img-man/mining-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +gpu_id: '0' +task_id: 'default-mining-task' +model_params_path: [] +class_names: [] + +# just for test, remove this key in your own docker image +idle_seconds: 6 # idle seconds for each task diff --git a/det-demo-tmi/img-man/training-template.yaml b/det-demo-tmi/img-man/training-template.yaml new file mode 100644 index 0000000..f114648 --- /dev/null +++ b/det-demo-tmi/img-man/training-template.yaml @@ -0,0 +1,13 @@ +# training template for your executor app +# after build image, it should at /img-man/training-template.yaml +# key: gpu_id, task_id, pretrained_model_paths, class_names should be preserved + +gpu_id: '0' +task_id: 'default-training-task' +pretrained_model_params: [] +class_names: [] +export_format: 'det-voc:raw' + +# just for test, remove this key in your own docker image +expected_map: 0.983 # expected map for training task +idle_seconds: 60 # idle seconds for each task diff --git a/det-demo-tmi/requirements.txt b/det-demo-tmi/requirements.txt new file mode 100644 index 0000000..0517cf4 --- /dev/null +++ b/det-demo-tmi/requirements.txt @@ -0,0 +1,4 @@ +pydantic>=1.8.2 +pyyaml>=5.4.1 +tensorboardX>=2.4 +-e "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0" diff --git a/docs/det-demo-tmi.md b/docs/det-demo-tmi.md new file mode 100644 index 0000000..b7469a6 --- /dev/null +++ b/docs/det-demo-tmi.md @@ -0,0 +1 @@ +# det-demo-tmi diff --git a/docs/ymir-executor-version.md b/docs/ymir-executor-version.md index 247ee13..1c1c30f 100644 --- a/docs/ymir-executor-version.md +++ b/docs/ymir-executor-version.md @@ -4,13 +4,13 @@ - 训练镜像需要指定数据集标注格式, ymir1.1.0默认标注格式为`ark:raw` -- 训练镜像可以获得系统的ymir版本,方便镜像做兼容 +- 训练镜像可以获得系统的ymir接口版本,方便镜像兼容 ## 辅助库 - [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) 采用ymir1.3.0分支 -- [ymir-executor-verifier]() 镜像检查工具 +- [ymir-executor-verifier](https://github.com/modelai/ymir-executor-verifier) 镜像检查工具 # ymir1.1.0 From 81a20197f158a63335dc8c5ddf90f17f7357002f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 13 Oct 2022 16:53:07 +0800 Subject: [PATCH 138/150] update doc --- det-demo-tmi/README.md | 5 +++-- docs/det-demo-tmi.md | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 docs/det-demo-tmi.md diff --git a/det-demo-tmi/README.md b/det-demo-tmi/README.md index 4d08199..c299328 100644 --- a/det-demo-tmi/README.md +++ b/det-demo-tmi/README.md @@ -46,7 +46,8 @@ voc_dog 3. 配置 `/in/env.yaml` 与 `/in/config.yaml` * 示例 `voc_dog/in/env.yaml` - protocol_version: ymir1.3.0之后添加的字段,说明ymir接口版本 + + * protocol_version: ymir1.3.0之后添加的字段,说明ymir接口版本 ``` task_id: task0 @@ -260,7 +261,7 @@ for idx, line in enumerate(lines): 可以使用以下几种方式进行测试: -1. 通过 `ymir-executor-verifier` 进行测试 +1. 通过 [ymir-executor-verifier](https://github.com/modelai/ymir-executor-verifier) 进行测试 2. 通过 ymir web 系统进行测试 diff --git a/docs/det-demo-tmi.md b/docs/det-demo-tmi.md deleted file mode 100644 index b7469a6..0000000 --- a/docs/det-demo-tmi.md +++ /dev/null @@ -1 +0,0 @@ -# det-demo-tmi From 424cd4e8e0d9d5faa48ba82fbb2f4f86f62e1778 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Mon, 17 Oct 2022 15:13:30 +0800 Subject: [PATCH 139/150] add verifier to doc, add ymir_saved_file_patterns to training-template.yaml --- README.MD | 6 +++++- README_zh-CN.MD | 4 ++++ det-demo-tmi/README.md | 14 ++++++++++---- det-mmdetection-tmi/training-template.yaml | 3 ++- det-yolov5-tmi/mining/ymir_infer.py | 2 +- det-yolov5-tmi/start.py | 16 ++++++---------- det-yolov5-tmi/train.py | 22 +++++++++++++++++----- det-yolov5-tmi/training-template.yaml | 6 ++++-- 8 files changed, 49 insertions(+), 24 deletions(-) diff --git a/README.MD b/README.MD index e326aeb..823419c 100644 --- a/README.MD +++ b/README.MD @@ -85,9 +85,13 @@ docker build -t ymir-executor/mmdet:cu111-tmi -f docker/Dockerfile.cuda111 . ## how to custom ymir-executor +- [demo ymir-executor](det-demo-tmi/README.md) from zero to one, build you ymir-executor + - [custom ymir-executor](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/ymir-dataset-zh-CN.md) -- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) +- [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ymir-executor development SDK. + +- [ymir-executor-verifer](https://github.com/modelai/ymir-executor-verifier) debug and check your ymir-executor ## how to import pretrained model weights diff --git a/README_zh-CN.MD b/README_zh-CN.MD index 3ca0c44..e1d9960 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -101,10 +101,14 @@ docker build -t ymir-executor/live-code:mxnet-tmi -f mxnet.dockerfile ## 如何制作自己的ymir-executor +- [示例 ymir-executor](det-demo-tmi/README.md) 从零到一,搭建自己的 ymir-executor + - [ymir-executor 制作指南](https://github.com/IndustryEssentials/ymir/blob/dev/dev_docs/ymir-dataset-zh-CN.md) - [ymir-executor-sdk](https://github.com/modelai/ymir-executor-sdk) ymir镜像开发辅助库 +- [ymir-executor-verifer](https://github.com/modelai/ymir-executor-verifier) 调试与检测 ymir-executor + ## 如何导入预训练模型 - [如何导入并精调外部模型](https://github.com/modelai/ymir-executor-fork/wiki/import-and-finetune-model) diff --git a/det-demo-tmi/README.md b/det-demo-tmi/README.md index c299328..715eb47 100644 --- a/det-demo-tmi/README.md +++ b/det-demo-tmi/README.md @@ -127,16 +127,18 @@ ymir 通过 mir train / mir mining / mir infer 命令启动镜像,遵循以下 5. ymir 将正确结果或异常结果归档,完成整个过程 -## 训练、挖掘与推理镜像的通用部分开发 +## 训练、挖掘与推理镜像的开发工具包 ymir_exc -app/start.py 展示了一个简单的镜像执行部分,此文档也将基于这个样例工程来说明如何使用框架来开发镜像。 +`app/start.py` 展示了一个简单的镜像执行部分,此文档也将基于这个样例工程来说明如何使用`ymir_exc`来开发镜像。 关于这个文件,有以下部分值得注意: -1. 在 Dockerfile 中,最后一条命令说明了:当此镜像被 ymir 系统通过 nvidia-docker run 启动时,默认执行的是 `python /app/start.py` 命令,也就是此工程中的 `app/start.py` 文件 +1. 在 Dockerfile 中,最后一条命令说明了:当此镜像被 ymir 系统通过 nvidia-docker run 启动时,默认执行的是 `bash /usr/bin/start.sh`, 即调用 `python /app/start.py` 命令,也就是此工程中的 `app/start.py` 文件 2. 镜像框架相关的所有内容都在 `ymir_exc` 包中,包括以下部分: + 安装方式 `pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0"`, 注意通过 `pip install ymir_exc` 的方式安装的版本不具有 `ymir_exc.util` 包。 + * `env`:环境,提供任务类型,任务 id 等信息 * `dataset_reader`:使用数据集读取器来取得数据集信息 @@ -151,7 +153,11 @@ app/start.py 展示了一个简单的镜像执行部分,此文档也将基于 4. 在 `start()` 方法中,通过 `cfg.ymir` 中的 `run_training` / `run_mining` / `run_infer` 来判断本次需要执行的任务类型。如果任务类型是本镜像不支持的,可以直接报错 -5. 虽然 `app/start.py` 展示的是一个训练,挖掘和推理多合一的镜像,开发者也可以分成若干个独立的镜像,例如,训练一个,挖掘和推理合成一个 +5. 虽然 `app/start.py` 展示的是一个训练,挖掘和推理多合一的镜像,开发者也可以分成若干个独立的镜像,例如,训练一个,挖掘和推理合成一个。实际应用中,镜像可以同时运行推理和挖掘这两个任务,注意其进度与单独运行时不同。 + + * 单独运行时,推理或者挖掘的进度值 `percent` 在 [0, 1] 区间,并通过 `monitor.write_monitor_logger(percent)` 记录在 `/out/monitor.txt` 中。 + + * 同时运行时, 假设先进行挖掘任务, 那么挖掘的进度值在 [0, 0.5] 区间,推理的进度度值在 [0.5, 1] 区间。 ## 训练过程 diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index 902f435..c3f3e7d 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -7,6 +7,7 @@ config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' cfg_options: '' metric: 'bbox' -val_interval: 1 # <0 means evaluation every interval +val_interval: 1 # <0 means evaluation every interval max_keep_checkpoints: 1 # <0 means save all weight file, 1 means save last and best weight files, k means save topk best weight files and topk epoch/step weigth files port: 12345 +ymir_saved_file_patterns: '' # custom saved files, support python regular expression, use , to split multiple pattern diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 61d305f..6b94381 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -27,7 +27,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = int(ymir_yolov5.gpu_id.split(',')[LOCAL_RANK]) + gpu = max(0, LOCAL_RANK) device = torch.device('cuda', gpu) ymir_yolov5.to(device) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index c250745..03ce300 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -8,8 +8,7 @@ from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw -from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process, - write_ymir_training_result) +from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process) from models.experimental import attempt_download from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file @@ -58,7 +57,8 @@ def _run_training(cfg: edict) -> None: num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 8)) model: str = cfg.param.model img_size: int = int(cfg.param.img_size) - save_period: int = max(1, min(epochs // 10, int(cfg.param.save_period))) + save_period: int = int(cfg.param.save_period) + save_best_only: bool = get_bool(cfg, key='save_best_only', default_value=True) args_options: str = cfg.param.args_options gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 @@ -91,6 +91,9 @@ def _run_training(cfg: edict) -> None: '--workers', str(num_workers_per_gpu) ]) + if save_best_only: + commands.append("--nosave") + if gpu_count > 1 and sync_bn: commands.append("--sync-bn") @@ -102,13 +105,6 @@ def _run_training(cfg: edict) -> None: subprocess.run(commands, check=True) monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) - # 3. convert to onnx and save model weight to design directory - opset = cfg.param.opset - command = f'python3 export.py --weights {models_dir}/best.pt --opset {opset} --include onnx' - logging.info(f'export onnx weight: {command}') - subprocess.run(command.split(), check=True) - - write_ymir_training_result(cfg, map50=0, files=[], id='last') # if task done, write 100% percent log monitor.write_monitor_logger(percent=1.0) diff --git a/det-yolov5-tmi/train.py b/det-yolov5-tmi/train.py index 6b5e8ee..54fd2e8 100644 --- a/det-yolov5-tmi/train.py +++ b/det-yolov5-tmi/train.py @@ -21,6 +21,7 @@ from copy import deepcopy from datetime import datetime from pathlib import Path +import subprocess import numpy as np import torch @@ -402,7 +403,7 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model - if (not nosave) or (final_epoch and not evolve): # if save + if (not nosave) or (best_fitness == fi) or (final_epoch and not evolve): # if save ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), @@ -416,7 +417,8 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) - if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): + write_ymir_training_result(ymir_cfg, map50=best_fitness, id='best', files=[str(best)]) + if (not nosave) and (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') weight_file = str(w / f'epoch{epoch}.pt') write_ymir_training_result(ymir_cfg, map50=results[2], id=f'epoch_{epoch}', files=[weight_file]) @@ -465,10 +467,20 @@ def lf(x): return (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear callbacks.run('on_train_end', last, best, plots, epoch, results) LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") + opset = ymir_cfg.param.opset + onnx_file: Path = best.with_suffix('.onnx') + command = f'python3 export.py --weights {best} --opset {opset} --include onnx' + LOGGER.info(f'export onnx weight: {command}') + subprocess.run(command.split(), check=True) + + if nosave: + # save best.pt and best.onnx + write_ymir_training_result(ymir_cfg, map50=best_fitness, id='best', files=[str(best), str(onnx_file)]) + else: + # set files = [] to save all files in /out/models + write_ymir_training_result(ymir_cfg, map50=best_fitness, id='best', files=[]) + torch.cuda.empty_cache() - # save the best and last weight file with other files in models_dir - if RANK in [-1, 0]: - write_ymir_training_result(ymir_cfg, map50=best_fitness, id=f'epoch_{epochs}', files=[]) return results diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index 4bd27b5..daaf476 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -12,9 +12,11 @@ export_format: 'ark:raw' model: 'yolov5s' batch_size_per_gpu: 16 num_workers_per_gpu: 8 -epochs: 300 +epochs: 100 img_size: 640 opset: 11 args_options: '--exist-ok' +save_best_only: True # save the best weight file only save_period: 10 -sync_bn: False # work for multi-gpu only +sync_bn: False # work for multi-gpu only +ymir_saved_file_patterns: '' # custom saved files, support python regular expression, use , to split multiple pattern From c0d64ba38a0c1eb7142b1b02adc8df27dc7dc4a1 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Tue, 18 Oct 2022 18:03:59 +0800 Subject: [PATCH 140/150] add random and aldd mining algorithm --- det-mmdetection-tmi/docker/Dockerfile.cuda102 | 2 +- det-mmdetection-tmi/docker/Dockerfile.cuda111 | 2 +- det-mmdetection-tmi/mining-template.yaml | 2 + det-mmdetection-tmi/mining_base.py | 137 ++++++++++++++++++ det-mmdetection-tmi/mmdet/utils/util_ymir.py | 10 +- det-mmdetection-tmi/start.py | 8 +- det-mmdetection-tmi/training-template.yaml | 5 +- det-mmdetection-tmi/ymir_mining_aldd.py | 58 ++++++++ .../{ymir_mining.py => ymir_mining_cald.py} | 26 ++-- det-mmdetection-tmi/ymir_mining_random.py | 85 +++++++++++ det-yolov5-tmi/start.py | 9 +- 11 files changed, 317 insertions(+), 27 deletions(-) create mode 100644 det-mmdetection-tmi/mining_base.py create mode 100644 det-mmdetection-tmi/ymir_mining_aldd.py rename det-mmdetection-tmi/{ymir_mining.py => ymir_mining_cald.py} (98%) create mode 100644 det-mmdetection-tmi/ymir_mining_random.py diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda102 b/det-mmdetection-tmi/docker/Dockerfile.cuda102 index 6d07aa6..2fd8643 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda102 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda102 @@ -28,7 +28,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC \ # Install ymir-exc sdk and MMCV (no cu102/torch1.8.1, use torch1.8.0 instead) RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html \ - && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0" \ && conda clean --all # Install det-mmdetection-tmi diff --git a/det-mmdetection-tmi/docker/Dockerfile.cuda111 b/det-mmdetection-tmi/docker/Dockerfile.cuda111 index c811c85..2306105 100644 --- a/det-mmdetection-tmi/docker/Dockerfile.cuda111 +++ b/det-mmdetection-tmi/docker/Dockerfile.cuda111 @@ -26,7 +26,7 @@ RUN apt-get update && apt-get install -y build-essential ffmpeg libsm6 libxext6 # Install ymir-exc sdk and MMCV RUN pip install --no-cache-dir --upgrade pip wheel setuptools \ && pip install --no-cache-dir mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html \ - && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0" \ && conda clean --all # Install det-mmdetection-tmi diff --git a/det-mmdetection-tmi/mining-template.yaml b/det-mmdetection-tmi/mining-template.yaml index 5649a3c..ed97f01 100644 --- a/det-mmdetection-tmi/mining-template.yaml +++ b/det-mmdetection-tmi/mining-template.yaml @@ -1,3 +1,5 @@ shm_size: '32G' export_format: 'ark:raw' cfg_options: '' +mining_algorithm: cald +class_distribution_scores: '' # 1.0,1.0,0.1,0.2 diff --git a/det-mmdetection-tmi/mining_base.py b/det-mmdetection-tmi/mining_base.py new file mode 100644 index 0000000..5922955 --- /dev/null +++ b/det-mmdetection-tmi/mining_base.py @@ -0,0 +1,137 @@ +import warnings +from typing import List + +import torch +import torch.nn.functional as F +from easydict import EasyDict as edict + + +def binary_classification_entropy(p: torch.Tensor) -> torch.Tensor: + """ + p: BCHW, the feature map after sigmoid, range in (0,1) + F.bce(x,y) = -(y * logx + (1-y) * log(1-x)) + """ + # return -(p * torch.log(p) + (1 - p) * torch.log(1 - p)) + return F.binary_cross_entropy(p, p, reduction='none') + + +def multiple_classification_entropy(p: torch.Tensor, activation: str) -> torch.Tensor: + """ + p: BCHW + + yolov5: sigmoid + nanodet: sigmoid + """ + assert activation in ['sigmoid', 'softmax'], f'classification type = {activation}, not in sigmoid, softmax' + + if activation == 'sigmoid': + entropy = F.binary_cross_entropy(p, p, reduction='none') + sum_entropy = torch.sum(entropy, dim=1, keepdim=True) + return sum_entropy + else: + # for origin aldd code, use tf.log(p + 1e-12) + entropy = -(p) * torch.log(p + 1e-7) + sum_entropy = torch.sum(entropy, dim=1, keepdim=True) + return sum_entropy + + +class FeatureMapBasedMining(object): + + def __init__(self, ymir_cfg: edict): + self.ymir_cfg = ymir_cfg + + def mining(self, feature_maps: List[torch.Tensor]) -> torch.Tensor: + raise Exception('not implement') + + +class ALDDMining(FeatureMapBasedMining): + """ + Active Learning for Deep Detection Neural Networks (ICCV 2019) + official code: https://gitlab.com/haghdam/deep_active_learning + + change from tensorflow code to pytorch code + 1. average pooling changed, pad or not? symmetrical pad or not? + 2. max pooling changed, ceil or not? + 3. the resize shape for aggregate feature map + + those small change cause 20%-40% difference for P@N, N=100 for total 1000 images. + P@5: 0.2 + P@10: 0.3 + P@20: 0.35 + P@50: 0.5 + P@100: 0.59 + P@200: 0.73 + P@500: 0.848 + """ + + def __init__(self, ymir_cfg: edict, resize_shape: List[int]): + super().__init__(ymir_cfg) + self.resize_shape = resize_shape + self.max_pool_size = 32 + self.avg_pool_size = 9 + self.align_corners = False + self.num_classes = len(ymir_cfg.param.class_names) + + def extract_conf(self, feature_maps: List[torch.Tensor], format='yolov5') -> List[torch.Tensor]: + """ + extract confidence feature map before sigmoid. + """ + if format == 'yolov5': + # feature_maps: [bs, 3, height, width, xywh + conf + num_classes] + return [f[:, :, :, :, 4] for f in feature_maps] + else: + warnings.warn(f'unknown feature map format {format}') + + return feature_maps + + def mining(self, feature_maps: List[torch.Tensor]) -> torch.Tensor: + """ + feature_maps: [BCHW] + 1. resizing followed by sigmoid + 2. get mining score + """ + # fmap = [Batch size, anchor number = 3, height, width, 5 + class_number] + + list_tmp = [] + for fmap in feature_maps: + resized_fmap = F.interpolate(fmap, self.resize_shape, mode='bilinear', align_corners=self.align_corners) + list_tmp.append(resized_fmap) + conf = torch.cat(list_tmp, dim=1).sigmoid() + scores = self.get_mining_score(conf) + return scores + + def get_mining_score(self, confidence_feature_map: torch.Tensor) -> torch.Tensor: + """ + confidence_feature_map: BCHW, value in (0, 1) + 1. A=sum(avg(entropy(fmap))) B,1,H,W + 2. B=sum(entropy(avg(fmap))) B,1,H,W + 3. C=max(B-A) B,1,h,w + 4. mean(C) B + """ + avg_entropy = F.avg_pool2d(self.get_entropy(confidence_feature_map), + kernel_size=self.avg_pool_size, + stride=1, + padding=0) + sum_avg_entropy = torch.sum(avg_entropy, dim=1, keepdim=True) + + entropy_avg = self.get_entropy( + F.avg_pool2d(confidence_feature_map, kernel_size=self.avg_pool_size, stride=1, padding=0)) + sum_entropy_avg = torch.sum(entropy_avg, dim=1, keepdim=True) + + uncertainty = sum_entropy_avg - sum_avg_entropy + + max_uncertainty = F.max_pool2d(uncertainty, + kernel_size=self.max_pool_size, + stride=self.max_pool_size, + padding=0, + ceil_mode=False) + + return torch.mean(max_uncertainty, dim=(1, 2, 3)) + + def get_entropy(self, feature_map: torch.Tensor) -> torch.Tensor: + if self.num_classes == 1: + # binary cross entropy + return binary_classification_entropy(feature_map) + else: + # multi-class cross entropy + return multiple_classification_entropy(feature_map, activation='sigmoid') diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 515c22a..24ef6e9 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -236,7 +236,7 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f raise Exception(f'please set valid environment variable YMIR_MODELS_DIR, invalid directory {WORK_DIR}') # assert only one model config file in work_dir - result_files = [osp.basename(f) for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + result_files = [f for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] if last: # save all output file @@ -245,8 +245,11 @@ def _write_latest_ymir_training_result(last: bool = False, key_score: Optional[f if max_keep_checkpoints > 0: topk_checkpoints = get_topk_checkpoints(result_files, max_keep_checkpoints) result_files = [f for f in result_files if not f.endswith(('.pth', '.pt'))] + topk_checkpoints + + result_files = [osp.basename(f) for f in result_files] rw.write_model_stage(files=result_files, mAP=float(map), stage_name='last') else: + result_files = [osp.basename(f) for f in result_files] # save newest weight file in format epoch_xxx.pth or iter_xxx.pth weight_files = [ osp.join(WORK_DIR, f) for f in result_files if f.startswith(('iter_', 'epoch_')) and f.endswith('.pth') @@ -285,13 +288,16 @@ def _write_ancient_ymir_training_result(key_score: Optional[float] = None): WORK_DIR = ymir_cfg.ymir.output.models_dir # assert only one model config file in work_dir - result_files = [osp.basename(f) for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] + result_files = [f for f in glob.glob(osp.join(WORK_DIR, '*')) if osp.basename(f) != 'result.yaml'] max_keep_checkpoints = int(ymir_cfg.param.get('max_keep_checkpoints', 1)) if max_keep_checkpoints > 0: topk_checkpoints = get_topk_checkpoints(result_files, max_keep_checkpoints) result_files = [f for f in result_files if not f.endswith(('.pth', '.pt'))] + topk_checkpoints + # convert to basename + result_files = [osp.basename(f) for f in result_files] + training_result_file = osp.join(WORK_DIR, 'result.yaml') if osp.exists(training_result_file): with open(training_result_file, 'r') as f: diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index b570b2d..220d373 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -37,11 +37,15 @@ def _run_training() -> None: def _run_mining(cfg: edict) -> None: gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count = len(gpu_id.split(',')) + mining_algorithm: str = cfg.param.get('mining_algorithm', 'aldd') + + supported_mining_algorithm = ['cald', 'aldd', 'random'] + assert mining_algorithm in supported_mining_algorithm, f'unknown mining_algorithm {mining_algorithm}, not in {supported_mining_algorithm}' if gpu_count <= 1: - command = 'python3 ymir_mining.py' + command = f'python3 ymir_mining_{mining_algorithm}.py' else: port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} ymir_mining.py' # noqa + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} ymir_mining_{mining_algorithm}.py' # noqa logging.info(f'start mining: {command}') subprocess.run(command.split(), check=True) diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index c3f3e7d..f04e51a 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -1,13 +1,12 @@ shm_size: '32G' export_format: 'ark:raw' -samples_per_gpu: 16 +samples_per_gpu: 16 # batch size per gpu workers_per_gpu: 8 -max_epochs: 300 +max_epochs: 100 config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' cfg_options: '' metric: 'bbox' val_interval: 1 # <0 means evaluation every interval max_keep_checkpoints: 1 # <0 means save all weight file, 1 means save last and best weight files, k means save topk best weight files and topk epoch/step weigth files -port: 12345 ymir_saved_file_patterns: '' # custom saved files, support python regular expression, use , to split multiple pattern diff --git a/det-mmdetection-tmi/ymir_mining_aldd.py b/det-mmdetection-tmi/ymir_mining_aldd.py new file mode 100644 index 0000000..4115d09 --- /dev/null +++ b/det-mmdetection-tmi/ymir_mining_aldd.py @@ -0,0 +1,58 @@ +import sys + +from easydict import EasyDict as edict +from mmcv.parallel import collate, scatter +from mmdet.datasets import replace_ImageToTensor +from mmdet.datasets.pipelines import Compose +from mmdet.models.detectors import SingleStageDetector, TwoStageDetector +from ymir_exc.util import get_merged_config + +from .mining_base import ALDDMining +from .ymir_infer import YmirModel +from .ymir_mining_random import RandomMiner + + +class ALDDMiner(RandomMiner): + + def __init__(self, cfg: edict): + super().__init__(cfg) + self.ymir_model = YmirModel(cfg) + mmdet_cfg = self.ymir_model.cfg + mmdet_cfg.data.test.pipeline = replace_ImageToTensor(mmdet_cfg.data.test.pipeline) + self.test_pipeline = Compose(cfg.data.test.pipeline) + self.aldd_miner = ALDDMining(cfg, [640, 640]) + + def compute_score(self, asset_path: str) -> int: + dict_data = dict(img_info=dict(filename=asset_path), img_prefix=None) + pipeline_data = self.test_pipeline(dict_data) + data = collate([pipeline_data], samples_per_gpu=1) + # just get the actual data from DataContainer + data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']] + data['img'] = [img.data[0] for img in data['img']] + # scatter to specified GPU + data = scatter(data, [self.device])[0] + + if isinstance(self.ymir_model.model, SingleStageDetector): + cls_score, bbox_pred = self.ymir_model.model.forward_dummy(data['img']) + mining_score = self.aldd_miner(bbox_pred) + + return mining_score + elif isinstance(self.ymir_model.model, TwoStageDetector): + # (rpn_outs, roi_outs) + # outs = self.ymir_model.model.forward_dummy(img) + raise NotImplementedError('aldd mining is currently not currently supported TwoStageDetector {}'.format( + self.ymir_model.model.__class__.__name__)) + else: + raise NotImplementedError('aldd mining is currently not currently supported with {}'.format( + self.ymir_model.model.__class__.__name__)) + + +def main(): + cfg = get_merged_config() + miner = ALDDMiner(cfg) + miner.mining() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-mmdetection-tmi/ymir_mining.py b/det-mmdetection-tmi/ymir_mining_cald.py similarity index 98% rename from det-mmdetection-tmi/ymir_mining.py rename to det-mmdetection-tmi/ymir_mining_cald.py index 506506d..fe437ff 100644 --- a/det-mmdetection-tmi/ymir_mining.py +++ b/det-mmdetection-tmi/ymir_mining_cald.py @@ -251,6 +251,7 @@ def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: class YmirMining(YmirModel): + def __init__(self, cfg: edict): super().__init__(cfg) if cfg.ymir.run_mining and cfg.ymir.run_infer: @@ -267,6 +268,8 @@ def __init__(self, cfg: edict): def mining(self): with open(self.cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] + + max_barrier_times = len(images) // WORLD_SIZE if RANK == -1: N = len(images) tbar = tqdm(images) @@ -282,9 +285,15 @@ def mining(self): idx = -1 beta = 1.3 mining_result = [] - for asset_path in tbar: + for idx, asset_path in enumerate(tbar): + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=self.task_idx, + task_num=self.task_num) + monitor.write_monitor_logger(percent=percent) # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1: + if WORLD_SIZE > 1 and idx < max_barrier_times: dist.barrier() img = cv2.imread(asset_path) @@ -332,16 +341,8 @@ def mining(self): consistency /= len(aug_results_dict) mining_result.append((asset_path, consistency)) - idx += 1 - - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, - p=idx / N, - task_idx=self.task_idx, - task_num=self.task_num) - monitor.write_monitor_logger(percent=percent) - if RANK != -1: + if WORLD_SIZE > 1: mining_result = collect_results_gpu(mining_result, len(images)) return mining_result @@ -393,8 +394,7 @@ def main(): cfg = get_merged_config() miner = YmirMining(cfg) - gpu_id: str = str(cfg.param.get('gpu_id', '0')) - gpu = int(gpu_id.split(',')[LOCAL_RANK]) + gpu = max(0, LOCAL_RANK) device = torch.device('cuda', gpu) miner.model.to(device) mining_result = miner.mining() diff --git a/det-mmdetection-tmi/ymir_mining_random.py b/det-mmdetection-tmi/ymir_mining_random.py new file mode 100644 index 0000000..097c000 --- /dev/null +++ b/det-mmdetection-tmi/ymir_mining_random.py @@ -0,0 +1,85 @@ +import os +import random +import sys + +import torch.distributed as dist +from easydict import EasyDict as edict +from mmcv.runner import init_dist +from mmdet.apis.test import collect_results_gpu +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, write_ymir_monitor_process + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +class RandomMiner(object): + + def __init__(self, cfg: edict): + if LOCAL_RANK != -1: + init_dist(launcher='pytorch', backend="nccl" if dist.is_nccl_available() else "gloo") + + self.cfg = cfg + gpu = max(0, LOCAL_RANK) + self.device = f'cuda:{gpu}' + + def mining(self): + with open(self.cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = len(images) // WORLD_SIZE + if RANK == -1: + N = len(images) + tbar = tqdm(images) + else: + images_rank = images[RANK::WORLD_SIZE] + N = len(images_rank) + if RANK == 0: + tbar = tqdm(images_rank) + else: + tbar = images_rank + + monitor_gap = max(1, N // 100) + + mining_result = [] + for idx, asset_path in enumerate(tbar): + if idx % monitor_gap == 0: + write_ymir_monitor_process(cfg=self.cfg, + task='mining', + naive_stage_percent=idx / N, + stage=YmirStage.TASK, + task_order='tmi') + + if WORLD_SIZE > 1 and idx < max_barrier_times: + dist.barrier() + + consistency = self.compute_score(asset_path=asset_path) + mining_result.append((asset_path, consistency)) + + if WORLD_SIZE > 1: + mining_result = collect_results_gpu(mining_result, len(images)) + + if RANK in [0, -1]: + rw.write_mining_result(mining_result=mining_result) + write_ymir_monitor_process(cfg=self.cfg, + task='mining', + naive_stage_percent=1, + stage=YmirStage.POSTPROCESS, + task_order='tmi') + return mining_result + + def compute_score(self, asset_path: str) -> float: + return random.random() + + +def main(): + cfg = get_merged_config() + miner = RandomMiner(cfg) + miner.mining() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 03ce300..3c8f483 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -5,13 +5,12 @@ import cv2 from easydict import EasyDict as edict +from models.experimental import attempt_download +from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw -from ymir_exc.util import (YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process) - -from models.experimental import attempt_download -from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file +from ymir_exc.util import YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process def start(cfg: edict) -> int: @@ -63,7 +62,7 @@ def _run_training(cfg: edict) -> None: gpu_id: str = str(cfg.param.get('gpu_id', '0')) gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 batch_size: int = batch_size_per_gpu * max(1, gpu_count) - port: int = int(cfg.param.get('port', 29500)) + port: int = find_free_port() sync_bn: bool = get_bool(cfg, key='sync_bn', default_value=False) weights = get_weight_file(cfg) From 3230af1e72088444098ca06b48285a49418bfc6c Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 19 Oct 2022 12:09:19 +0800 Subject: [PATCH 141/150] fix training class_number bug --- det-mmdetection-tmi/README.md | 6 ++- det-mmdetection-tmi/mining_base.py | 2 +- .../mmdet/core/evaluation/eval_hooks.py | 16 +++--- det-mmdetection-tmi/mmdet/utils/util_ymir.py | 22 ++++---- det-mmdetection-tmi/ymir_mining_aldd.py | 54 ++++++++++++------- det-mmdetection-tmi/ymir_mining_random.py | 4 +- 6 files changed, 63 insertions(+), 41 deletions(-) diff --git a/det-mmdetection-tmi/README.md b/det-mmdetection-tmi/README.md index 5c1934d..8795930 100644 --- a/det-mmdetection-tmi/README.md +++ b/det-mmdetection-tmi/README.md @@ -7,11 +7,12 @@ # build docker image ``` -docker build -t ymir-executor/mmdet:cuda102-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda102 . +docker build -t ymir-executor/mmdet:cuda102-tmi --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda102 . -docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda111 . +docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg YMIR=1.1.0 -f docker/Dockerfile.cuda111 . ``` + # changelog - modify `mmdet/datasets/coco.py`, save the evaluation result to `os.environ.get('COCO_EVAL_TMP_FILE')` with json format - modify `mmdet/core/evaluation/eval_hooks.py`, write training result file and monitor task process @@ -29,3 +30,4 @@ docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg SERVER_MODE=dev --bu --- - 2022/09/06: set `find_unused_parameters = True`, fix DDP bug +- 2022/10/18: add `random` and `aldd` mining algorithm. `aldd` algorithm support yolox only. diff --git a/det-mmdetection-tmi/mining_base.py b/det-mmdetection-tmi/mining_base.py index 5922955..27ba2f9 100644 --- a/det-mmdetection-tmi/mining_base.py +++ b/det-mmdetection-tmi/mining_base.py @@ -85,7 +85,7 @@ def extract_conf(self, feature_maps: List[torch.Tensor], format='yolov5') -> Lis return feature_maps def mining(self, feature_maps: List[torch.Tensor]) -> torch.Tensor: - """ + """mining for feature maps feature_maps: [BCHW] 1. resizing followed by sigmoid 2. get mining score diff --git a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py index b2e7dff..81a36bb 100644 --- a/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py +++ b/det-mmdetection-tmi/mmdet/core/evaluation/eval_hooks.py @@ -49,10 +49,10 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" if self.by_epoch: - monitor_interval = max(1, runner.max_epochs//1000) + monitor_interval = max(1, runner.max_epochs // 1000) if runner.epoch % monitor_interval == 0: percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) + stage=YmirStage.TASK, p=runner.epoch / runner.max_epochs) monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) @@ -62,10 +62,10 @@ def before_train_iter(self, runner): def after_train_iter(self, runner): if not self.by_epoch: - monitor_interval = max(1, runner.max_iters//1000) + monitor_interval = max(1, runner.max_iters // 1000) if runner.iter % monitor_interval == 0: percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.iter/runner.max_iters) + stage=YmirStage.TASK, p=runner.iter / runner.max_iters) monitor.write_monitor_logger(percent=percent) super().after_train_iter(runner) @@ -119,10 +119,10 @@ def before_train_epoch(self, runner): def after_train_epoch(self, runner): """Report the training process for ymir""" if self.by_epoch and runner.rank == 0: - monitor_interval = max(1, runner.max_epochs//1000) + monitor_interval = max(1, runner.max_epochs // 1000) if runner.epoch % monitor_interval == 0: percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.epoch/runner.max_epochs) + stage=YmirStage.TASK, p=runner.epoch / runner.max_epochs) monitor.write_monitor_logger(percent=percent) super().after_train_epoch(runner) @@ -132,10 +132,10 @@ def before_train_iter(self, runner): def after_train_iter(self, runner): if not self.by_epoch and runner.rank == 0: - monitor_interval = max(1, runner.max_iters//1000) + monitor_interval = max(1, runner.max_iters // 1000) if runner.iter % monitor_interval == 0: percent = get_ymir_process( - stage=YmirStage.TASK, p=runner.iter/runner.max_iters) + stage=YmirStage.TASK, p=runner.iter / runner.max_iters) monitor.write_monitor_logger(percent=percent) super().after_train_iter(runner) diff --git a/det-mmdetection-tmi/mmdet/utils/util_ymir.py b/det-mmdetection-tmi/mmdet/utils/util_ymir.py index 24ef6e9..6cb9ae2 100644 --- a/det-mmdetection-tmi/mmdet/utils/util_ymir.py +++ b/det-mmdetection-tmi/mmdet/utils/util_ymir.py @@ -5,12 +5,12 @@ import logging import os import os.path as osp -from typing import Any, Iterable, List, Optional +from typing import Any, Iterable, List, Optional, Union import mmcv import yaml from easydict import EasyDict as edict -from mmcv import Config +from mmcv import Config, ConfigDict from nptyping import NDArray, Shape, UInt8 from packaging.version import Version from ymir_exc import result_writer as rw @@ -27,7 +27,8 @@ def modify_mmcv_config(mmcv_cfg: Config, ymir_cfg: edict) -> None: - modify model output channel - modify epochs, checkpoint, tensorboard config """ - def recursive_modify_attribute(mmcv_cfg: Config, attribute_key: str, attribute_value: Any): + + def recursive_modify_attribute(mmcv_cfgdict: Union[Config, ConfigDict], attribute_key: str, attribute_value: Any): """ recursive modify mmcv_cfg: 1. mmcv_cfg.attribute_key to attribute_value @@ -35,14 +36,15 @@ def recursive_modify_attribute(mmcv_cfg: Config, attribute_key: str, attribute_v 3. mmcv_cfg.xxx[i].attribute_key to attribute_value (i=0, 1, 2 ...) 4. mmcv_cfg.xxx[i].xxx.xxx[j].attribute_key to attribute_value """ - for key in mmcv_cfg: + for key in mmcv_cfgdict: if key == attribute_key: - mmcv_cfg[key] = attribute_value - elif isinstance(mmcv_cfg[key], Config): - recursive_modify_attribute(mmcv_cfg[key], attribute_key, attribute_value) - elif isinstance(mmcv_cfg[key], Iterable): - for cfg in mmcv_cfg[key]: - if isinstance(cfg, Config): + mmcv_cfgdict[key] = attribute_value + logging.info(f'modify {mmcv_cfgdict}, {key} = {attribute_value}') + elif isinstance(mmcv_cfgdict[key], (Config, ConfigDict)): + recursive_modify_attribute(mmcv_cfgdict[key], attribute_key, attribute_value) + elif isinstance(mmcv_cfgdict[key], Iterable): + for cfg in mmcv_cfgdict[key]: + if isinstance(cfg, (Config, ConfigDict)): recursive_modify_attribute(cfg, attribute_key, attribute_value) # modify dataset config diff --git a/det-mmdetection-tmi/ymir_mining_aldd.py b/det-mmdetection-tmi/ymir_mining_aldd.py index 4115d09..51b5c13 100644 --- a/det-mmdetection-tmi/ymir_mining_aldd.py +++ b/det-mmdetection-tmi/ymir_mining_aldd.py @@ -1,15 +1,15 @@ import sys +import torch from easydict import EasyDict as edict +from mining_base import ALDDMining from mmcv.parallel import collate, scatter from mmdet.datasets import replace_ImageToTensor from mmdet.datasets.pipelines import Compose -from mmdet.models.detectors import SingleStageDetector, TwoStageDetector +from mmdet.models.detectors import YOLOX from ymir_exc.util import get_merged_config - -from .mining_base import ALDDMining -from .ymir_infer import YmirModel -from .ymir_mining_random import RandomMiner +from ymir_infer import YmirModel +from ymir_mining_random import RandomMiner class ALDDMiner(RandomMiner): @@ -17,12 +17,12 @@ class ALDDMiner(RandomMiner): def __init__(self, cfg: edict): super().__init__(cfg) self.ymir_model = YmirModel(cfg) - mmdet_cfg = self.ymir_model.cfg + mmdet_cfg = self.ymir_model.model.cfg mmdet_cfg.data.test.pipeline = replace_ImageToTensor(mmdet_cfg.data.test.pipeline) - self.test_pipeline = Compose(cfg.data.test.pipeline) + self.test_pipeline = Compose(mmdet_cfg.data.test.pipeline) self.aldd_miner = ALDDMining(cfg, [640, 640]) - def compute_score(self, asset_path: str) -> int: + def compute_score(self, asset_path: str) -> float: dict_data = dict(img_info=dict(filename=asset_path), img_prefix=None) pipeline_data = self.test_pipeline(dict_data) data = collate([pipeline_data], samples_per_gpu=1) @@ -32,19 +32,35 @@ def compute_score(self, asset_path: str) -> int: # scatter to specified GPU data = scatter(data, [self.device])[0] - if isinstance(self.ymir_model.model, SingleStageDetector): - cls_score, bbox_pred = self.ymir_model.model.forward_dummy(data['img']) - mining_score = self.aldd_miner(bbox_pred) + if isinstance(self.ymir_model.model, YOLOX): + # results = (cls_maps, reg_maps, iou_maps) + # cls_maps: [BxCx52x52, BxCx26x26, BxCx13x13] + # reg_maps: [Bx4x52x52, Bx4x26x26, Bx4x13x13] + # iou_maps: [Bx1x51x52, Bx1x26x26, Bx1x13x13] + results = self.ymir_model.model.forward_dummy(data['img'][0]) + feature_maps = [] + for cls, reg, iou in zip(results[0], results[1], results[2]): + maps = [reg, iou, cls] + feature_maps.append(torch.cat(maps, dim=1)) + mining_score = self.aldd_miner.mining(feature_maps) - return mining_score - elif isinstance(self.ymir_model.model, TwoStageDetector): - # (rpn_outs, roi_outs) - # outs = self.ymir_model.model.forward_dummy(img) - raise NotImplementedError('aldd mining is currently not currently supported TwoStageDetector {}'.format( - self.ymir_model.model.__class__.__name__)) + return mining_score.item() else: - raise NotImplementedError('aldd mining is currently not currently supported with {}'.format( - self.ymir_model.model.__class__.__name__)) + raise NotImplementedError( + 'aldd mining is currently not currently supported with {}, only support YOLOX'.format( + self.ymir_model.model.__class__.__name__)) + + # TODO support other SingleStageDetector + # if isinstance(self.ymir_model.model, SingleStageDetector): + # pass + # elif isinstance(self.ymir_model.model, TwoStageDetector): + # # (rpn_outs, roi_outs) + # # outs = self.ymir_model.model.forward_dummy(img) + # raise NotImplementedError('aldd mining is currently not currently supported TwoStageDetector {}'.format( + # self.ymir_model.model.__class__.__name__)) + # else: + # raise NotImplementedError('aldd mining is currently not currently supported with {}'.format( + # self.ymir_model.model.__class__.__name__)) def main(): diff --git a/det-mmdetection-tmi/ymir_mining_random.py b/det-mmdetection-tmi/ymir_mining_random.py index 097c000..0bb5afb 100644 --- a/det-mmdetection-tmi/ymir_mining_random.py +++ b/det-mmdetection-tmi/ymir_mining_random.py @@ -2,6 +2,7 @@ import random import sys +import torch import torch.distributed as dist from easydict import EasyDict as edict from mmcv.runner import init_dist @@ -55,7 +56,8 @@ def mining(self): if WORLD_SIZE > 1 and idx < max_barrier_times: dist.barrier() - consistency = self.compute_score(asset_path=asset_path) + with torch.no_grad(): + consistency = self.compute_score(asset_path=asset_path) mining_result.append((asset_path, consistency)) if WORLD_SIZE > 1: From cfd2d3b61ec37d25e38ef07543246a0650f2f3de Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 19 Oct 2022 15:09:29 +0800 Subject: [PATCH 142/150] change hyper-parameter --- det-mmdetection-tmi/README.md | 1 + det-mmdetection-tmi/infer-template.yaml | 2 +- det-mmdetection-tmi/mining-template.yaml | 2 +- det-mmdetection-tmi/tools/train.py | 1 - det-mmdetection-tmi/training-template.yaml | 4 ++-- det-yolov4-tmi/mining/infer-template.yaml | 2 +- det-yolov4-tmi/mining/mining-template.yaml | 2 +- det-yolov4-tmi/training-template.yaml | 2 +- det-yolov5-tmi/mining-template.yaml | 1 + det-yolov5-tmi/mining/ymir_infer.py | 11 +++-------- det-yolov5-tmi/mining/ymir_mining_aldd.py | 10 ++-------- det-yolov5-tmi/mining/ymir_mining_cald.py | 11 +++-------- det-yolov5-tmi/start.py | 2 +- det-yolov5-tmi/training-template.yaml | 4 ++-- det-yolov5-tmi/utils/ymir_yolov5.py | 12 ++++-------- live-code-executor/img-man/training-template.yaml | 2 +- 16 files changed, 25 insertions(+), 44 deletions(-) diff --git a/det-mmdetection-tmi/README.md b/det-mmdetection-tmi/README.md index 8795930..f1c0ab6 100644 --- a/det-mmdetection-tmi/README.md +++ b/det-mmdetection-tmi/README.md @@ -31,3 +31,4 @@ docker build -t ymir-executor/mmdet:cuda111-tmi --build-arg YMIR=1.1.0 -f docker - 2022/09/06: set `find_unused_parameters = True`, fix DDP bug - 2022/10/18: add `random` and `aldd` mining algorithm. `aldd` algorithm support yolox only. +- 2022/10/19: fix training class_number bug in `recursive_modify_attribute()` diff --git a/det-mmdetection-tmi/infer-template.yaml b/det-mmdetection-tmi/infer-template.yaml index bf61d79..de78f9c 100644 --- a/det-mmdetection-tmi/infer-template.yaml +++ b/det-mmdetection-tmi/infer-template.yaml @@ -1,4 +1,4 @@ -shm_size: '32G' +shm_size: '128G' export_format: 'ark:raw' cfg_options: '' conf_threshold: 0.2 diff --git a/det-mmdetection-tmi/mining-template.yaml b/det-mmdetection-tmi/mining-template.yaml index ed97f01..693463b 100644 --- a/det-mmdetection-tmi/mining-template.yaml +++ b/det-mmdetection-tmi/mining-template.yaml @@ -1,4 +1,4 @@ -shm_size: '32G' +shm_size: '128G' export_format: 'ark:raw' cfg_options: '' mining_algorithm: cald diff --git a/det-mmdetection-tmi/tools/train.py b/det-mmdetection-tmi/tools/train.py index df4f184..78fbe46 100644 --- a/det-mmdetection-tmi/tools/train.py +++ b/det-mmdetection-tmi/tools/train.py @@ -99,7 +99,6 @@ def main(): args = parse_args() ymir_cfg = get_merged_config() cfg = Config.fromfile(args.config) - print(cfg) # modify mmdet config from file modify_mmcv_config(mmcv_cfg=cfg, ymir_cfg=ymir_cfg) diff --git a/det-mmdetection-tmi/training-template.yaml b/det-mmdetection-tmi/training-template.yaml index f04e51a..05b11b2 100644 --- a/det-mmdetection-tmi/training-template.yaml +++ b/det-mmdetection-tmi/training-template.yaml @@ -1,7 +1,7 @@ -shm_size: '32G' +shm_size: '128G' export_format: 'ark:raw' samples_per_gpu: 16 # batch size per gpu -workers_per_gpu: 8 +workers_per_gpu: 4 max_epochs: 100 config_file: 'configs/yolox/yolox_tiny_8x8_300e_coco.py' args_options: '' diff --git a/det-yolov4-tmi/mining/infer-template.yaml b/det-yolov4-tmi/mining/infer-template.yaml index dce6501..11c6502 100644 --- a/det-yolov4-tmi/mining/infer-template.yaml +++ b/det-yolov4-tmi/mining/infer-template.yaml @@ -14,7 +14,7 @@ write_result: True confidence_thresh: 0.1 nms_thresh: 0.45 max_boxes: 50 -# shm_size: '16G' +shm_size: '128G' # gpu_id: '' # model_params_path: [] # class_names: diff --git a/det-yolov4-tmi/mining/mining-template.yaml b/det-yolov4-tmi/mining/mining-template.yaml index aeee009..2ff8270 100644 --- a/det-yolov4-tmi/mining/mining-template.yaml +++ b/det-yolov4-tmi/mining/mining-template.yaml @@ -18,7 +18,7 @@ anchors: '12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 4 confidence_thresh: 0.1 nms_thresh: 0.45 max_boxes: 50 -# shm_size: '16G' +shm_size: '128G' # gpu_id: '0,1,2,3' # model_params_path: [] # task_id: cycle-node-mined-0 diff --git a/det-yolov4-tmi/training-template.yaml b/det-yolov4-tmi/training-template.yaml index 17810f6..bb276dc 100644 --- a/det-yolov4-tmi/training-template.yaml +++ b/det-yolov4-tmi/training-template.yaml @@ -6,7 +6,7 @@ max_batches: 20000 warmup_iterations: 1000 batch: 64 subdivisions: 64 -shm_size: '16G' +shm_size: '128G' export_format: 'ark:raw' # class_names: # - cat diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml index 9011fe6..485c8bb 100644 --- a/det-yolov5-tmi/mining-template.yaml +++ b/det-yolov5-tmi/mining-template.yaml @@ -15,3 +15,4 @@ iou_thres: 0.45 batch_size_per_gpu: 16 num_workers_per_gpu: 4 pin_memory: False +shm_size: 128G diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py index 6b94381..ad1e0d2 100644 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ b/det-yolov5-tmi/mining/ymir_infer.py @@ -12,13 +12,12 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from tqdm import tqdm -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - from mining.util import YmirDataset, load_image_file +from tqdm import tqdm from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -124,10 +123,6 @@ def main() -> int: anns.append(ann) ymir_infer_result[img_file] = anns rw.write_infer_result(infer_result=ymir_infer_result) - - if LOCAL_RANK != -1: - print(f'rank: {RANK}, start destroy process group') - # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index 8151a1b..dc7d5af 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -17,13 +17,12 @@ import torch.nn.functional as F import torch.utils.data as td from easydict import EasyDict as edict +from mining.util import YmirDataset, load_image_file from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config -from mining.util import YmirDataset, load_image_file -from utils.ymir_yolov5 import YmirYolov5 - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) @@ -206,11 +205,6 @@ def main() -> int: for img_file, score in result.items(): ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - - if LOCAL_RANK != -1: - print(f'rank: {RANK}, start destroy process group') - # dist.destroy_process_group() - print(f'rank: {RANK}, finished destroy process group') return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index 343a501..6dfb01d 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -14,14 +14,13 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from tqdm import tqdm -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, update_consistency) +from tqdm import tqdm from utils.general import scale_coords from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -184,10 +183,6 @@ def main() -> int: for img_file, score in result.items(): ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - - if LOCAL_RANK != -1: - print(f'rank: {RANK}, start destroy process group') - # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 3c8f483..4ec6cc0 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -53,7 +53,7 @@ def _run_training(cfg: edict) -> None: # 2. training model epochs: int = int(cfg.param.epochs) batch_size_per_gpu: int = int(cfg.param.batch_size_per_gpu) - num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 8)) + num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) model: str = cfg.param.model img_size: int = int(cfg.param.img_size) save_period: int = int(cfg.param.save_period) diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml index daaf476..1cc4752 100644 --- a/det-yolov5-tmi/training-template.yaml +++ b/det-yolov5-tmi/training-template.yaml @@ -7,11 +7,11 @@ # pretrained_model_params: [] # class_names: [] -shm_size: '32G' +shm_size: '128G' export_format: 'ark:raw' model: 'yolov5s' batch_size_per_gpu: 16 -num_workers_per_gpu: 8 +num_workers_per_gpu: 4 epochs: 100 img_size: 640 opset: 11 diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py index e58c81d..c463ded 100644 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ b/det-yolov5-tmi/utils/ymir_yolov5.py @@ -1,8 +1,6 @@ """ utils function for ymir and yolov5 """ -import glob -import os import os.path as osp import shutil from typing import Any, List @@ -11,16 +9,14 @@ import torch import yaml from easydict import EasyDict as edict -from nptyping import NDArray, Shape, UInt8 -from packaging.version import Version -from ymir_exc import monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process - from models.common import DetectMultiBackend +from nptyping import NDArray, Shape, UInt8 from utils.augmentations import letterbox from utils.general import check_img_size, non_max_suppression, scale_coords from utils.torch_utils import select_device +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process BBOX = NDArray[Shape['*,4'], Any] CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] diff --git a/live-code-executor/img-man/training-template.yaml b/live-code-executor/img-man/training-template.yaml index 0ac8798..df87016 100644 --- a/live-code-executor/img-man/training-template.yaml +++ b/live-code-executor/img-man/training-template.yaml @@ -7,4 +7,4 @@ task_id: 'default-training-task' pretrained_model_params: [] class_names: [] export_format: 'ark:raw' -shm_size: '32G' +shm_size: '128G' From 59420c2abbafa15a07946c91b9b09eed42f8ce11 Mon Sep 17 00:00:00 2001 From: wxjf Date: Wed, 19 Oct 2022 15:38:50 +0800 Subject: [PATCH 143/150] add entropy,random for yolov5 --- det-yolov5-tmi/mining/mining_entropy.py | 82 +++++++++++++ det-yolov5-tmi/mining/ymir_mining_entropy.py | 123 +++++++++++++++++++ det-yolov5-tmi/mining/ymir_mining_random.py | 82 +++++++++++++ 3 files changed, 287 insertions(+) create mode 100644 det-yolov5-tmi/mining/mining_entropy.py create mode 100644 det-yolov5-tmi/mining/ymir_mining_entropy.py create mode 100644 det-yolov5-tmi/mining/ymir_mining_random.py diff --git a/det-yolov5-tmi/mining/mining_entropy.py b/det-yolov5-tmi/mining/mining_entropy.py new file mode 100644 index 0000000..ecf9262 --- /dev/null +++ b/det-yolov5-tmi/mining/mining_entropy.py @@ -0,0 +1,82 @@ +""" +Consistency-based Active Learning for Object Detection CVPR 2022 workshop +official code: https://github.com/we1pingyu/CALD/blob/master/cald_train.py +""" +import sys +from typing import Dict, List, Tuple + +import cv2 +import numpy as np +from easydict import EasyDict as edict +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from nptyping import NDArray +from scipy.stats import entropy +from tqdm import tqdm +from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process + +def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: + if len(result) > 0: + bboxes = result[:, :4].astype(np.int32) + conf = result[:, 4] + class_id = result[:, 5] + else: + bboxes = np.zeros(shape=(0, 4), dtype=np.int32) + conf = np.zeros(shape=(0, 1), dtype=np.float32) + class_id = np.zeros(shape=(0, 1), dtype=np.int32) + + return bboxes, conf, class_id + +class MiningEntropy(YmirYolov5): + def __init__(self, cfg: edict): + super().__init__(cfg) + + if cfg.ymir.run_mining and cfg.ymir.run_infer: + # multiple task, run mining first, infer later + mining_task_idx = 0 + task_num = 2 + else: + mining_task_idx = 0 + task_num = 1 + + self.task_idx = mining_task_idx + self.task_num = task_num + + def mining(self) -> List: + N = dr.items_count(env.DatasetType.CANDIDATE) + monitor_gap = max(1, N // 1000) + idx = -1 + beta = 1.3 + mining_result = [] + for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): + img = cv2.imread(asset_path) + # xyxy,conf,cls + result = self.predict(img,nms=False) + bboxes, conf, _ = split_result(result) + if len(result) == 0: + # no result for the image without augmentation + mining_result.append((asset_path, -10)) + continue + mining_result.append((asset_path,-np.sum(conf*np.log2(conf)))) + idx += 1 + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, + task_idx=self.task_idx, task_num=self.task_num) + monitor.write_monitor_logger(percent=percent) + + return mining_result + +def main(): + cfg = get_merged_config() + miner = MiningEntropy(cfg) + mining_result = miner.mining() + rw.write_mining_result(mining_result=mining_result) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/det-yolov5-tmi/mining/ymir_mining_entropy.py b/det-yolov5-tmi/mining/ymir_mining_entropy.py new file mode 100644 index 0000000..9ccc01e --- /dev/null +++ b/det-yolov5-tmi/mining/ymir_mining_entropy.py @@ -0,0 +1,123 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import sys +from functools import partial + +import numpy as np +import torch +import torch.distributed as dist +import torch.utils.data as td +from easydict import EasyDict as edict +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, + update_consistency) +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu + gpu_count: int = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 + num_workers_per_gpu = min([ + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu + # origin dataset + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + results = [] + mining_results = dict() + beta = 1.3 + dataset_size = len(images_rank) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1 and idx < max_barrier_times: + dist.barrier() + + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=False) + + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + preprocess_image_shape = batch['image'].shape[2:] + for inner_idx, det in enumerate(pred): # per image + result_per_image = [] + image_file = batch['image_file'][inner_idx] + if len(det): + conf = det[:, 4].data.cpu().numpy() + mining_results[image_file] = -np.sum(conf*np.log2(conf)) + else: + mining_results[image_file] = -10 + continue + + torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if LOCAL_RANK != -1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + + if LOCAL_RANK != -1: + print(f'rank: {RANK}, start destroy process group') + # dist.destroy_process_group() + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining_random.py b/det-yolov5-tmi/mining/ymir_mining_random.py new file mode 100644 index 0000000..418e7f2 --- /dev/null +++ b/det-yolov5-tmi/mining/ymir_mining_random.py @@ -0,0 +1,82 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import random +import sys +from functools import partial + +import numpy as np +import torch +import torch.distributed as dist +import torch.utils.data as td +from easydict import EasyDict as edict +from tqdm import tqdm +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, + update_consistency) +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + images_rank = images[RANK::WORLD_SIZE] + mining_results=dict() + for image in images_rank: + mining_results[image] = random.random() + + torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if LOCAL_RANK != -1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + + if LOCAL_RANK != -1: + print(f'rank: {RANK}, start destroy process group') + # dist.destroy_process_group() + return 0 + + +if __name__ == '__main__': + sys.exit(main()) From d760d8dd3bf1b89ead99a906d95ed513de8a186e Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Wed, 19 Oct 2022 17:19:15 +0800 Subject: [PATCH 144/150] update readme --- README.MD | 4 +++- README_zh-CN.MD | 4 +++- det-demo-tmi/README.md | 6 ++---- docs/mining-images-overview.md | 13 +++++++++++++ .../official-docker-image.md | 0 5 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 docs/mining-images-overview.md rename official-docker-image.md => docs/official-docker-image.md (100%) diff --git a/README.MD b/README.MD index 823419c..50ace8d 100644 --- a/README.MD +++ b/README.MD @@ -4,7 +4,9 @@ - [wiki](https://github.com/modelai/ymir-executor-fork/wiki) -- [ymir executor](./official-docker-image.md) +- [ymir executor](./docs/official-docker-image.md) + +- [ymir mining algorithm](./docs/mining-images-overview.md) ## overview diff --git a/README_zh-CN.MD b/README_zh-CN.MD index e1d9960..3579823 100644 --- a/README_zh-CN.MD +++ b/README_zh-CN.MD @@ -4,7 +4,9 @@ - [说明文档](https://github.com/modelai/ymir-executor-fork/wiki) -- [ymir镜像](./official-docker-image.md) +- [ymir镜像](./docs/official-docker-image.md) + +- [ymir 挖掘算法](./docs/mining-images-overview.md) ## 比较 diff --git a/det-demo-tmi/README.md b/det-demo-tmi/README.md index 715eb47..abccece 100644 --- a/det-demo-tmi/README.md +++ b/det-demo-tmi/README.md @@ -137,7 +137,7 @@ ymir 通过 mir train / mir mining / mir infer 命令启动镜像,遵循以下 2. 镜像框架相关的所有内容都在 `ymir_exc` 包中,包括以下部分: - 安装方式 `pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0"`, 注意通过 `pip install ymir_exc` 的方式安装的版本不具有 `ymir_exc.util` 包。 + 安装方式 `pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.3.0"`, 注意通过 ~~`pip install ymir_exc`~~ 的方式安装的版本不具有 `ymir_exc.util` 包。前者在后者的代码基础上进行了扩展,提供了更多的功能(如 `ymir_exc.util`)。 * `env`:环境,提供任务类型,任务 id 等信息 @@ -198,9 +198,7 @@ for idx, line in enumerate(lines): * 例如,如果需要保存 stage_name 为 'epoch-5000' 的模型,则需要把这些模型文件保存到 `os.path.join(cfg.ymir.output.model_dir, 'epoch-5000')` 目录下 - * 之后,可以使用 `result_writer.write_model_stage()` 方法保存训练结果的摘要,这些内容包括:不带目录的模型名称列表,mAP. - - * 也可以使用 `util.write_ymir_training_result()` 方法保存训练结果,它的兼容性与容错性更好。 + * 推荐使用 `util.write_ymir_training_result()` 方法保存训练结果 (不带目录的模型名称列表,mAP等) ,它对 `result_writer.write_model_stage()` 进行了封装,兼容性与容错性更好。 * 需要保存的模型实际记录在`cfg.ymir.output.training_result_file`中,ymir将依据此文件进行文件打包,供用户下载、迭代训练及推理挖掘。 diff --git a/docs/mining-images-overview.md b/docs/mining-images-overview.md new file mode 100644 index 0000000..e9557c1 --- /dev/null +++ b/docs/mining-images-overview.md @@ -0,0 +1,13 @@ +# ymir mining images overview + +| docker images | random | cald | aldd | entropy | +| - | - | - | - | - | +| yolov5 | ✔️ | ✔️ | ✔️ | ✔️ | +| mmdetection | ✔️ | ✔️ | ✔️ | ❌ | +| yolov4 | ❌ | ✔️ | ✔️ | ❌ | +| yolov7 | ❌ | ❌ | ✔️ | ❌ | +| nanodet | ❌ | ❌ | ✔️ | ❌ | +| vidt |❌ | ✔️ | ❌ | ❌ | +| detectron2 | ❌ | ✔️ | ❌ | ❌ | + +view [ALBench: Active Learning Benchmark](https://github.com/modelai/ALBench) for detail diff --git a/official-docker-image.md b/docs/official-docker-image.md similarity index 100% rename from official-docker-image.md rename to docs/official-docker-image.md From 61910668e3736f9c38ac72d33b2246d8383f7d7b Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 20 Oct 2022 09:35:04 +0800 Subject: [PATCH 145/150] add mining overview --- docs/mining-images-overview.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/mining-images-overview.md b/docs/mining-images-overview.md index e9557c1..cec5f86 100644 --- a/docs/mining-images-overview.md +++ b/docs/mining-images-overview.md @@ -11,3 +11,11 @@ | detectron2 | ❌ | ✔️ | ❌ | ❌ | view [ALBench: Active Learning Benchmark](https://github.com/modelai/ALBench) for detail + +## reference + +- entropy: `Multi-class active learning for image classification. CVPR 2009` + +- cald: `Consistency-based Active Learning for Object Detection. CVPR 2022 workshop` + +- aldd: `Active Learning for Deep Detection Neural Networks. ICCV 2019` From 5e605ec8d02eaac4533140dc4a8ca4c35832ed9f Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 20 Oct 2022 11:13:38 +0800 Subject: [PATCH 146/150] merge multi-gpu and single gpu mining mode --- det-yolov5-tmi/mining/mining_cald.py | 162 ------------------- det-yolov5-tmi/mining/mining_entropy.py | 82 ---------- det-yolov5-tmi/mining/util.py | 23 ++- det-yolov5-tmi/mining/ymir_mining_aldd.py | 4 +- det-yolov5-tmi/mining/ymir_mining_cald.py | 2 +- det-yolov5-tmi/mining/ymir_mining_entropy.py | 21 +-- det-yolov5-tmi/mining/ymir_mining_random.py | 27 ++-- det-yolov5-tmi/start.py | 6 +- 8 files changed, 40 insertions(+), 287 deletions(-) delete mode 100644 det-yolov5-tmi/mining/mining_cald.py delete mode 100644 det-yolov5-tmi/mining/mining_entropy.py diff --git a/det-yolov5-tmi/mining/mining_cald.py b/det-yolov5-tmi/mining/mining_cald.py deleted file mode 100644 index ab458ff..0000000 --- a/det-yolov5-tmi/mining/mining_cald.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -Consistency-based Active Learning for Object Detection CVPR 2022 workshop -official code: https://github.com/we1pingyu/CALD/blob/master/cald_train.py -""" -import sys -from typing import Dict, List, Tuple - -import cv2 -import numpy as np -from easydict import EasyDict as edict -from nptyping import NDArray -from scipy.stats import entropy -from tqdm import tqdm -from ymir_exc import dataset_reader as dr -from ymir_exc import env, monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process - -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 - - -def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: - if len(result) > 0: - bboxes = result[:, :4].astype(np.int32) - conf = result[:, 4] - class_id = result[:, 5] - else: - bboxes = np.zeros(shape=(0, 4), dtype=np.int32) - conf = np.zeros(shape=(0, 1), dtype=np.float32) - class_id = np.zeros(shape=(0, 1), dtype=np.int32) - - return bboxes, conf, class_id - - -class MiningCald(YmirYolov5): - - def __init__(self, cfg: edict): - super().__init__(cfg) - - if cfg.ymir.run_mining and cfg.ymir.run_infer: - # multiple task, run mining first, infer later - mining_task_idx = 0 - task_num = 2 - else: - mining_task_idx = 0 - task_num = 1 - - self.task_idx = mining_task_idx - self.task_num = task_num - - def mining(self) -> List: - N = dr.items_count(env.DatasetType.CANDIDATE) - monitor_gap = max(1, N // 1000) - idx = -1 - beta = 1.3 - mining_result = [] - for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): - img = cv2.imread(asset_path) - # xyxy,conf,cls - result = self.predict(img) - bboxes, conf, _ = split_result(result) - if len(result) == 0: - # no result for the image without augmentation - mining_result.append((asset_path, -beta)) - continue - - consistency = 0.0 - aug_bboxes_dict, aug_results_dict = self.aug_predict(img, bboxes) - for key in aug_results_dict: - # no result for the image with augmentation f'{key}' - if len(aug_results_dict[key]) == 0: - consistency += beta - continue - - bboxes_key, conf_key, _ = split_result(aug_results_dict[key]) - cls_scores_aug = 1 - conf_key - cls_scores = 1 - conf - - consistency_per_aug = 2.0 - ious = get_ious(bboxes_key, aug_bboxes_dict[key]) - aug_idxs = np.argmax(ious, axis=0) - for origin_idx, aug_idx in enumerate(aug_idxs): - max_iou = ious[aug_idx, origin_idx] - if max_iou == 0: - consistency_per_aug = min(consistency_per_aug, beta) - p = cls_scores_aug[aug_idx] - q = cls_scores[origin_idx] - m = (p + q) / 2. - js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) - if js < 0: - js = 0 - consistency_box = max_iou - consistency_cls = 0.5 * (conf[origin_idx] + conf_key[aug_idx]) * (1 - js) - consistency_per_inst = abs(consistency_box + consistency_cls - beta) - consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) - - consistency += consistency_per_aug - - consistency /= len(aug_results_dict) - - mining_result.append((asset_path, consistency)) - idx += 1 - - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, - p=idx / N, - task_idx=self.task_idx, - task_num=self.task_num) - monitor.write_monitor_logger(percent=percent) - - return mining_result - - def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], Dict[str, NDArray]]: - """ - for different augmentation methods: flip, cutout, rotate and resize - augment the image and bbox and use model to predict them. - - return the predict result and augment bbox. - """ - aug_dict = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) - - aug_bboxes = dict() - aug_results = dict() - for key in aug_dict: - aug_img, aug_bbox = aug_dict[key](image, bboxes) - - aug_result = self.predict(aug_img) - aug_bboxes[key] = aug_bbox - aug_results[key] = aug_result - - return aug_bboxes, aug_results - - -def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: - """ - args: - boxes1: np.array, (N, 4), xyxy - boxes2: np.array, (M, 4), xyxy - return: - iou: np.array, (N, M) - """ - area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) - area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) - iner_area = intersect(boxes1, boxes2) - area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) - area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) - iou = iner_area / (area1 + area2 - iner_area + 1e-14) - return iou - - -def main(): - cfg = get_merged_config() - miner = MiningCald(cfg) - mining_result = miner.mining() - rw.write_mining_result(mining_result=mining_result) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/det-yolov5-tmi/mining/mining_entropy.py b/det-yolov5-tmi/mining/mining_entropy.py deleted file mode 100644 index ecf9262..0000000 --- a/det-yolov5-tmi/mining/mining_entropy.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Consistency-based Active Learning for Object Detection CVPR 2022 workshop -official code: https://github.com/we1pingyu/CALD/blob/master/cald_train.py -""" -import sys -from typing import Dict, List, Tuple - -import cv2 -import numpy as np -from easydict import EasyDict as edict -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from nptyping import NDArray -from scipy.stats import entropy -from tqdm import tqdm -from utils.ymir_yolov5 import BBOX, CV_IMAGE, YmirYolov5 -from ymir_exc import dataset_reader as dr -from ymir_exc import env, monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process - -def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: - if len(result) > 0: - bboxes = result[:, :4].astype(np.int32) - conf = result[:, 4] - class_id = result[:, 5] - else: - bboxes = np.zeros(shape=(0, 4), dtype=np.int32) - conf = np.zeros(shape=(0, 1), dtype=np.float32) - class_id = np.zeros(shape=(0, 1), dtype=np.int32) - - return bboxes, conf, class_id - -class MiningEntropy(YmirYolov5): - def __init__(self, cfg: edict): - super().__init__(cfg) - - if cfg.ymir.run_mining and cfg.ymir.run_infer: - # multiple task, run mining first, infer later - mining_task_idx = 0 - task_num = 2 - else: - mining_task_idx = 0 - task_num = 1 - - self.task_idx = mining_task_idx - self.task_num = task_num - - def mining(self) -> List: - N = dr.items_count(env.DatasetType.CANDIDATE) - monitor_gap = max(1, N // 1000) - idx = -1 - beta = 1.3 - mining_result = [] - for asset_path, _ in tqdm(dr.item_paths(dataset_type=env.DatasetType.CANDIDATE)): - img = cv2.imread(asset_path) - # xyxy,conf,cls - result = self.predict(img,nms=False) - bboxes, conf, _ = split_result(result) - if len(result) == 0: - # no result for the image without augmentation - mining_result.append((asset_path, -10)) - continue - mining_result.append((asset_path,-np.sum(conf*np.log2(conf)))) - idx += 1 - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, - task_idx=self.task_idx, task_num=self.task_num) - monitor.write_monitor_logger(percent=percent) - - return mining_result - -def main(): - cfg = get_merged_config() - miner = MiningEntropy(cfg) - mining_result = miner.mining() - rw.write_mining_result(mining_result=mining_result) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py index 54ef5dd..c69343c 100644 --- a/det-yolov5-tmi/mining/util.py +++ b/det-yolov5-tmi/mining/util.py @@ -19,18 +19,35 @@ import cv2 import numpy as np import torch.utils.data as td +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from nptyping import NDArray from scipy.stats import entropy from torch.utils.data._utils.collate import default_collate - -from mining.data_augment import cutout, horizontal_flip, resize, rotate -from mining.mining_cald import get_ious from utils.augmentations import letterbox +from utils.ymir_yolov5 import BBOX LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) +def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: + """ + args: + boxes1: np.array, (N, 4), xyxy + boxes2: np.array, (M, 4), xyxy + return: + iou: np.array, (N, M) + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + iner_area = intersect(boxes1, boxes2) + area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) + area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) + iou = iner_area / (area1 + area2 - iner_area + 1e-14) + return iou + + def preprocess(img, img_size, stride): img1 = letterbox(img, img_size, stride=stride, auto=False)[0] diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py index dc7d5af..8d6a27c 100644 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/mining/ymir_mining_aldd.py @@ -174,7 +174,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): if RANK in [-1, 0]: ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') def main() -> int: @@ -191,9 +191,7 @@ def main() -> int: # wait all process to save the mining result if LOCAL_RANK != -1: - print(f'rank: {RANK}, sync start before merge') dist.barrier() - print(f'rank: {RANK}, sync finished before merge') if RANK in [0, -1]: results = [] diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py index 6dfb01d..b4c6147 100644 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/mining/ymir_mining_cald.py @@ -155,7 +155,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): image_file = batch['image_file'][inner_idx] mining_results[image_file] = batch_consistency[inner_idx] - torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') def main() -> int: diff --git a/det-yolov5-tmi/mining/ymir_mining_entropy.py b/det-yolov5-tmi/mining/ymir_mining_entropy.py index 9ccc01e..df5a1ff 100644 --- a/det-yolov5-tmi/mining/ymir_mining_entropy.py +++ b/det-yolov5-tmi/mining/ymir_mining_entropy.py @@ -14,15 +14,12 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict +from mining.util import YmirDataset, load_image_file from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config -from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, - update_consistency) -from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) @@ -58,9 +55,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): pin_memory=ymir_yolov5.pin_memory, drop_last=False) - results = [] mining_results = dict() - beta = 1.3 dataset_size = len(images_rank) pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader for idx, batch in enumerate(pbar): @@ -73,18 +68,16 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): if RANK in [-1, 0]: ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - preprocess_image_shape = batch['image'].shape[2:] for inner_idx, det in enumerate(pred): # per image - result_per_image = [] image_file = batch['image_file'][inner_idx] if len(det): conf = det[:, 4].data.cpu().numpy() - mining_results[image_file] = -np.sum(conf*np.log2(conf)) + mining_results[image_file] = -np.sum(conf * np.log2(conf)) else: mining_results[image_file] = -10 continue - torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') def main() -> int: @@ -99,7 +92,7 @@ def main() -> int: run(ymir_cfg, ymir_yolov5) # wait all process to save the mining result - if LOCAL_RANK != -1: + if WORLD_SIZE > 1: dist.barrier() if RANK in [0, -1]: @@ -112,10 +105,6 @@ def main() -> int: for img_file, score in result.items(): ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - - if LOCAL_RANK != -1: - print(f'rank: {RANK}, start destroy process group') - # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/mining/ymir_mining_random.py b/det-yolov5-tmi/mining/ymir_mining_random.py index 418e7f2..30fb099 100644 --- a/det-yolov5-tmi/mining/ymir_mining_random.py +++ b/det-yolov5-tmi/mining/ymir_mining_random.py @@ -8,22 +8,15 @@ import os import random import sys -from functools import partial -import numpy as np import torch import torch.distributed as dist -import torch.utils.data as td from easydict import EasyDict as edict from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config -from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, - update_consistency) -from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 - LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) @@ -34,16 +27,20 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 device = torch.device('cuda', gpu) ymir_yolov5.to(device) - + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] images_rank = images[RANK::WORLD_SIZE] - mining_results=dict() - for image in images_rank: + mining_results = dict() + dataset_size = len(images_rank) + pbar = tqdm(images_rank) if RANK == 0 else images_rank + for idx, image in enumerate(pbar): + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx / dataset_size) mining_results[image] = random.random() - torch.save(mining_results, f'/out/mining_results_{RANK}.pt') + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') def main() -> int: @@ -58,7 +55,7 @@ def main() -> int: run(ymir_cfg, ymir_yolov5) # wait all process to save the mining result - if LOCAL_RANK != -1: + if WORLD_SIZE > 1: dist.barrier() if RANK in [0, -1]: @@ -71,10 +68,6 @@ def main() -> int: for img_file, score in result.items(): ymir_mining_result.append((img_file, score)) rw.write_mining_result(mining_result=ymir_mining_result) - - if LOCAL_RANK != -1: - print(f'rank: {RANK}, start destroy process group') - # dist.destroy_process_group() return 0 diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py index 4ec6cc0..6c82844 100644 --- a/det-yolov5-tmi/start.py +++ b/det-yolov5-tmi/start.py @@ -119,12 +119,12 @@ def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 mining_algorithm = cfg.param.get('mining_algorithm', 'aldd') - support_mining_algorithms = ['aldd', 'cald'] + support_mining_algorithms = ['aldd', 'cald', 'random', 'entropy'] if mining_algorithm not in support_mining_algorithms: raise Exception(f'unknown mining algorithm {mining_algorithm}, not in {support_mining_algorithms}') - if gpu_count <= 1 and mining_algorithm in ['cald']: - command = 'python3 mining/mining_cald.py' + if gpu_count <= 1: + command = f'python3 mining/ymir_mining_{mining_algorithm}.py' else: port = find_free_port() command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining_{mining_algorithm}.py' # noqa From 35a000eec3133669445ba7ce6911d86fc61dc7fe Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 20 Oct 2022 11:22:27 +0800 Subject: [PATCH 147/150] mv files --- det-yolov5-tmi/ymir/README.md | 43 ++++ det-yolov5-tmi/ymir/docker/cuda102.dockerfile | 40 ++++ det-yolov5-tmi/ymir/docker/cuda111.dockerfile | 43 ++++ .../ymir/img-man/infer-template.yaml | 15 ++ .../ymir/img-man/mining-template.yaml | 18 ++ .../ymir/img-man/training-template.yaml | 22 ++ det-yolov5-tmi/ymir/mining/data_augment.py | 204 +++++++++++++++++ det-yolov5-tmi/ymir/mining/util.py | 149 +++++++++++++ det-yolov5-tmi/ymir/mining/ymir_infer.py | 130 +++++++++++ .../ymir/mining/ymir_mining_aldd.py | 210 ++++++++++++++++++ .../ymir/mining/ymir_mining_cald.py | 190 ++++++++++++++++ .../ymir/mining/ymir_mining_entropy.py | 112 ++++++++++ .../ymir/mining/ymir_mining_random.py | 75 +++++++ det-yolov5-tmi/ymir/start.py | 191 ++++++++++++++++ det-yolov5-tmi/ymir/ymir_yolov5.py | 187 ++++++++++++++++ 15 files changed, 1629 insertions(+) create mode 100644 det-yolov5-tmi/ymir/README.md create mode 100644 det-yolov5-tmi/ymir/docker/cuda102.dockerfile create mode 100644 det-yolov5-tmi/ymir/docker/cuda111.dockerfile create mode 100644 det-yolov5-tmi/ymir/img-man/infer-template.yaml create mode 100644 det-yolov5-tmi/ymir/img-man/mining-template.yaml create mode 100644 det-yolov5-tmi/ymir/img-man/training-template.yaml create mode 100644 det-yolov5-tmi/ymir/mining/data_augment.py create mode 100644 det-yolov5-tmi/ymir/mining/util.py create mode 100644 det-yolov5-tmi/ymir/mining/ymir_infer.py create mode 100644 det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py create mode 100644 det-yolov5-tmi/ymir/mining/ymir_mining_cald.py create mode 100644 det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py create mode 100644 det-yolov5-tmi/ymir/mining/ymir_mining_random.py create mode 100644 det-yolov5-tmi/ymir/start.py create mode 100644 det-yolov5-tmi/ymir/ymir_yolov5.py diff --git a/det-yolov5-tmi/ymir/README.md b/det-yolov5-tmi/ymir/README.md new file mode 100644 index 0000000..6bf9151 --- /dev/null +++ b/det-yolov5-tmi/ymir/README.md @@ -0,0 +1,43 @@ +# yolov5-ymir readme +- [yolov5 readme](./README_yolov5.md) + +``` +docker build -t ymir/ymir-executor:ymir1.1.0-cuda102-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda102.dockerfile . + +docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . +``` + +## main change log + +- add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining + +- add `utils/ymir_yolov5.py` for useful functions + + - `get_merged_config()` add ymir path config `cfg.yaml` and hyper-parameter `cfg.param` + + - `convert_ymir_to_yolov5()` generate yolov5 dataset config file `data.yaml` + + - `write_ymir_training_result()` save model weight, map and other files. + + - `get_weight_file()` get pretrained weight or init weight file from ymir system + +- modify `utils/datasets.py` for ymir dataset format + +- modify `train.py` for training process monitor + +- add `mining/data_augment.py` and `mining/mining_cald.py` for mining + +- add `training/infer/mining-template.yaml` for `/img-man/training/infer/mining-template.yaml` + +- add `cuda102/111.dockerfile`, remove origin `Dockerfile` + +- modify `requirements.txt` + +- other modify support onnx export, not important. + +## new features + +- 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) +- 2022/09/14: support change hyper-parameter `num_workers_per_gpu` +- 2022/09/16: support change activation, view [rknn](https://github.com/airockchip/rknn_model_zoo/tree/main/models/vision/object_detection/yolov5-pytorch) +- 2022/10/09: fix dist.destroy_process_group() hang diff --git a/det-yolov5-tmi/ymir/docker/cuda102.dockerfile b/det-yolov5-tmi/ymir/docker/cuda102.dockerfile new file mode 100644 index 0000000..0014b60 --- /dev/null +++ b/det-yolov5-tmi/ymir/docker/cuda102.dockerfile @@ -0,0 +1,40 @@ +ARG PYTORCH="1.8.1" +ARG CUDA="10.2" +ARG CUDNN="7" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +# support YMIR=1.0.0, 1.1.0 or 1.2.0 +ARG YMIR="1.1.0" + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 +ENV YMIR_VERSION=${YMIR} + +# Install linux package +RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ + libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ + build-essential ninja-build \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# install ymir-exc sdk +RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" + +# Copy file from host to docker and install requirements +COPY . /app +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ + && pip install -r /app/requirements.txt + +# Download pretrained weight and font file +RUN cd /app && bash data/scripts/download_weights.sh \ + && mkdir -p /root/.config/Ultralytics \ + && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf + +# Make PYTHONPATH find local package +ENV PYTHONPATH=. + +WORKDIR /app +RUN echo "python3 /app/start.py" > /usr/bin/start.sh +CMD bash /usr/bin/start.sh diff --git a/det-yolov5-tmi/ymir/docker/cuda111.dockerfile b/det-yolov5-tmi/ymir/docker/cuda111.dockerfile new file mode 100644 index 0000000..84427a8 --- /dev/null +++ b/det-yolov5-tmi/ymir/docker/cuda111.dockerfile @@ -0,0 +1,43 @@ +ARG PYTORCH="1.8.0" +ARG CUDA="11.1" +ARG CUDNN="8" + +# cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime +# support YMIR=1.0.0, 1.1.0 or 1.2.0 +ARG YMIR="1.1.0" + + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" +ENV LANG=C.UTF-8 +ENV YMIR_VERSION=$YMIR + +# Install linux package +RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ + libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ + build-essential ninja-build \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY ./requirements.txt /workspace/ +# install ymir-exc sdk and requirements +RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ + && pip install -r /workspace/requirements.txt + +# Copy file from host to docker and install requirements +COPY . /app +RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ + +# Download pretrained weight and font file +RUN cd /app && bash data/scripts/download_weights.sh \ + && mkdir -p /root/.config/Ultralytics \ + && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf + +# Make PYTHONPATH find local package +ENV PYTHONPATH=. + +WORKDIR /app +RUN echo "python3 /app/start.py" > /usr/bin/start.sh +CMD bash /usr/bin/start.sh diff --git a/det-yolov5-tmi/ymir/img-man/infer-template.yaml b/det-yolov5-tmi/ymir/img-man/infer-template.yaml new file mode 100644 index 0000000..329887a --- /dev/null +++ b/det-yolov5-tmi/ymir/img-man/infer-template.yaml @@ -0,0 +1,15 @@ +# infer template for your executor app +# after build image, it should at /img-man/infer-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +# gpu_id: '0' +# task_id: 'default-infer-task' +# model_params_path: [] +# class_names: [] + +img_size: 640 +conf_thres: 0.25 +iou_thres: 0.45 +batch_size_per_gpu: 16 +num_workers_per_gpu: 4 +pin_memory: False diff --git a/det-yolov5-tmi/ymir/img-man/mining-template.yaml b/det-yolov5-tmi/ymir/img-man/mining-template.yaml new file mode 100644 index 0000000..485c8bb --- /dev/null +++ b/det-yolov5-tmi/ymir/img-man/mining-template.yaml @@ -0,0 +1,18 @@ +# mining template for your executor app +# after build image, it should at /img-man/mining-template.yaml +# key: gpu_id, task_id, model_params_path, class_names should be preserved + +# gpu_id: '0' +# task_id: 'default-training-task' +# model_params_path: [] +# class_names: [] + +img_size: 640 +mining_algorithm: aldd +class_distribution_scores: '' # 1.0,1.0,0.1,0.2 +conf_thres: 0.25 +iou_thres: 0.45 +batch_size_per_gpu: 16 +num_workers_per_gpu: 4 +pin_memory: False +shm_size: 128G diff --git a/det-yolov5-tmi/ymir/img-man/training-template.yaml b/det-yolov5-tmi/ymir/img-man/training-template.yaml new file mode 100644 index 0000000..1cc4752 --- /dev/null +++ b/det-yolov5-tmi/ymir/img-man/training-template.yaml @@ -0,0 +1,22 @@ +# training template for your executor app +# after build image, it should at /img-man/training-template.yaml +# key: gpu_id, task_id, pretrained_model_params, class_names should be preserved + +# gpu_id: '0' +# task_id: 'default-training-task' +# pretrained_model_params: [] +# class_names: [] + +shm_size: '128G' +export_format: 'ark:raw' +model: 'yolov5s' +batch_size_per_gpu: 16 +num_workers_per_gpu: 4 +epochs: 100 +img_size: 640 +opset: 11 +args_options: '--exist-ok' +save_best_only: True # save the best weight file only +save_period: 10 +sync_bn: False # work for multi-gpu only +ymir_saved_file_patterns: '' # custom saved files, support python regular expression, use , to split multiple pattern diff --git a/det-yolov5-tmi/ymir/mining/data_augment.py b/det-yolov5-tmi/ymir/mining/data_augment.py new file mode 100644 index 0000000..cfafaa7 --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/data_augment.py @@ -0,0 +1,204 @@ +""" +data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout +official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py +""" +import random +from typing import Any, List, Tuple + +import cv2 +import numpy as np +from nptyping import NDArray + +from utils.ymir_yolov5 import BBOX, CV_IMAGE + + +def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: + ''' + Find intersection of every box combination between two sets of box + boxes1: bounding boxes 1, a tensor of dimensions (n1, 4) + boxes2: bounding boxes 2, a tensor of dimensions (n2, 4) + + Out: Intersection each of boxes1 with respect to each of boxes2, + a tensor of dimensions (n1, n2) + ''' + n1 = boxes1.shape[0] + n2 = boxes2.shape[0] + max_xy = np.minimum( + np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + + min_xy = np.maximum( + np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) + return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) + + +def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ + -> Tuple[CV_IMAGE, BBOX]: + """ + image: opencv image, [height,width,channels] + bbox: numpy.ndarray, [N,4] --> [x1,y1,x2,y2] + """ + image = image.copy() + + width = image.shape[1] + # Flip image horizontally + image = image[:, ::-1, :] + if len(bbox) > 0: + bbox = bbox.copy() + # Flip bbox horizontally + bbox[:, [0, 2]] = width - bbox[:, [2, 0]] + return image, bbox + + +def cutout(image: CV_IMAGE, + bbox: BBOX, + cut_num: int = 2, + fill_val: int = 0, + bbox_remove_thres: float = 0.4, + bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: + ''' + Cutout augmentation + image: A PIL image + boxes: bounding boxes, a tensor of dimensions (#objects, 4) + labels: labels of object, a tensor of dimensions (#objects) + fill_val: Value filled in cut out + bbox_remove_thres: Theshold to remove bbox cut by cutout + + Out: new image, new_boxes, new_labels + ''' + image = image.copy() + bbox = bbox.copy() + + if len(bbox) == 0: + return image, bbox + + original_h, original_w, original_channel = image.shape + count = 0 + for _ in range(50): + # Random cutout size: [0.15, 0.5] of original dimension + cutout_size_h = random.uniform(0.05 * original_h, 0.2 * original_h) + cutout_size_w = random.uniform(0.05 * original_w, 0.2 * original_w) + + # Random position for cutout + left = random.uniform(0, original_w - cutout_size_w) + right = left + cutout_size_w + top = random.uniform(0, original_h - cutout_size_h) + bottom = top + cutout_size_h + cutout = np.array([[float(left), float(top), float(right), float(bottom)]]) + + # Calculate intersect between cutout and bounding boxes + overlap_size = intersect(cutout, bbox) + area_boxes = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) + ratio = overlap_size / (area_boxes + 1e-14) + # If all boxes have Iou greater than bbox_remove_thres, try again + if ratio.max() > bbox_remove_thres or ratio.max() < bbox_min_thres: + continue + + image[int(top):int(bottom), int(left):int(right), :] = fill_val + count += 1 + if count >= cut_num: + break + return image, bbox + + +def rotate(image: CV_IMAGE, bbox: BBOX, rot: float = 5) -> Tuple[CV_IMAGE, BBOX]: + image = image.copy() + bbox = bbox.copy() + h, w, c = image.shape + center = np.array([w / 2.0, h / 2.0]) + s = max(h, w) * 1.0 + trans = get_affine_transform(center, s, rot, [w, h]) + if len(bbox) > 0: + for i in range(bbox.shape[0]): + x1, y1 = affine_transform(bbox[i, :2], trans) + x2, y2 = affine_transform(bbox[i, 2:], trans) + x3, y3 = affine_transform(bbox[i, [2, 1]], trans) + x4, y4 = affine_transform(bbox[i, [0, 3]], trans) + bbox[i, :2] = [min(x1, x2, x3, x4), min(y1, y2, y3, y4)] + bbox[i, 2:] = [max(x1, x2, x3, x4), max(y1, y2, y3, y4)] + image = cv2.warpAffine(image, trans, (w, h), flags=cv2.INTER_LINEAR) + return image, bbox + + +def get_3rd_point(a: NDArray, b: NDArray) -> NDArray: + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_dir(src_point: NDArray, rot_rad: float) -> List: + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result + + +def transform_preds(coords: NDArray, center: NDArray, scale: Any, rot: float, output_size: List) -> NDArray: + trans = get_affine_transform(center, scale, rot, output_size, inv=True) + target_coords = affine_transform(coords, trans) + return target_coords + + +def get_affine_transform(center: NDArray, + scale: Any, + rot: float, + output_size: List, + shift: NDArray = np.array([0, 0], dtype=np.float32), + inv: bool = False) -> NDArray: + if not isinstance(scale, np.ndarray) and not isinstance(scale, list): + scale = np.array([scale, scale], dtype=np.float32) + + scale_tmp = scale + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = get_dir([0, src_w * -0.5], rot_rad) + dst_dir = np.array([0, dst_w * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def affine_transform(pt: NDArray, t: NDArray) -> NDArray: + new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def resize(img: CV_IMAGE, boxes: BBOX, ratio: float = 0.8) -> Tuple[CV_IMAGE, BBOX]: + """ + ratio: <= 1.0 + """ + assert ratio <= 1.0, f'resize ratio {ratio} must <= 1.0' + + h, w, _ = img.shape + ow = int(w * ratio) + oh = int(h * ratio) + resize_img = cv2.resize(img, (ow, oh)) + new_img = np.zeros_like(img) + new_img[:oh, :ow] = resize_img + + if len(boxes) == 0: + return new_img, boxes + else: + return new_img, boxes * ratio diff --git a/det-yolov5-tmi/ymir/mining/util.py b/det-yolov5-tmi/ymir/mining/util.py new file mode 100644 index 0000000..c69343c --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/util.py @@ -0,0 +1,149 @@ +"""run.py: +img --(model)--> pred --(augmentation)--> (aug1_pred, aug2_pred, ..., augN_pred) +img --(augmentation)--> aug1_img --(model)--> pred1 +img --(augmentation)--> aug2_img --(model)--> pred2 +... +img --(augmentation)--> augN_img --(model)--> predN + +dataload(img) --(model)--> pred +dataload(img, pred) --(augmentation1)--> (aug1_img, aug1_pred) --(model)--> pred1 + +1. split dataset with DDP sampler +2. use DDP model to infer sampled dataloader +3. gather infer result + +""" +import os +from typing import Any, List + +import cv2 +import numpy as np +import torch.utils.data as td +from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from nptyping import NDArray +from scipy.stats import entropy +from torch.utils.data._utils.collate import default_collate +from utils.augmentations import letterbox +from utils.ymir_yolov5 import BBOX + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: + """ + args: + boxes1: np.array, (N, 4), xyxy + boxes2: np.array, (M, 4), xyxy + return: + iou: np.array, (N, M) + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + iner_area = intersect(boxes1, boxes2) + area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) + area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) + iou = iner_area / (area1 + area2 - iner_area + 1e-14) + return iou + + +def preprocess(img, img_size, stride): + img1 = letterbox(img, img_size, stride=stride, auto=False)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + # img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + return img1 + + +def load_image_file(img_file: str, img_size, stride): + img = cv2.imread(img_file) + img1 = letterbox(img, img_size, stride=stride, auto=False)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + # img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + # img1.unsqueeze_(dim=0) # expand for batch dim + return dict(image=img1, origin_shape=img.shape[0:2], image_file=img_file) + # return img1 + + +def load_image_file_with_ann(image_info: dict, img_size, stride): + img_file = image_info['image_file'] + # xyxy(int) conf(float) class_index(int) + bboxes = image_info['results'][:, :4].astype(np.int32) + img = cv2.imread(img_file) + aug_dict = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) + + data = dict(image_file=img_file, origin_shape=img.shape[0:2]) + for key in aug_dict: + aug_img, aug_bbox = aug_dict[key](img, bboxes) + preprocess_aug_img = preprocess(aug_img, img_size, stride) + data[f'image_{key}'] = preprocess_aug_img + data[f'bboxes_{key}'] = aug_bbox + data[f'origin_shape_{key}'] = aug_img.shape[0:2] + + data.update(image_info) + return data + + +def collate_fn_with_fake_ann(batch): + new_batch = dict() + for key in ['flip', 'cutout', 'rotate', 'resize']: + new_batch[f'bboxes_{key}_list'] = [data[f'bboxes_{key}'] for data in batch] + + new_batch[f'image_{key}'] = default_collate([data[f'image_{key}'] for data in batch]) + + new_batch[f'origin_shape_{key}'] = default_collate([data[f'origin_shape_{key}'] for data in batch]) + + new_batch['results_list'] = [data['results'] for data in batch] + new_batch['image_file'] = [data['image_file'] for data in batch] + + return new_batch + + +def update_consistency(consistency, consistency_per_aug, beta, pred_bboxes_key, pred_conf_key, aug_bboxes_key, + aug_conf): + cls_scores_aug = 1 - pred_conf_key + cls_scores = 1 - aug_conf + + consistency_per_aug = 2.0 + ious = get_ious(pred_bboxes_key, aug_bboxes_key) + aug_idxs = np.argmax(ious, axis=0) + for origin_idx, aug_idx in enumerate(aug_idxs): + max_iou = ious[aug_idx, origin_idx] + if max_iou == 0: + consistency_per_aug = min(consistency_per_aug, beta) + p = cls_scores_aug[aug_idx] + q = cls_scores[origin_idx] + m = (p + q) / 2. + js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) + if js < 0: + js = 0 + consistency_box = max_iou + consistency_cls = 0.5 * (aug_conf[origin_idx] + pred_conf_key[aug_idx]) * (1 - js) + consistency_per_inst = abs(consistency_box + consistency_cls - beta) + consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) + + consistency += consistency_per_aug + return consistency + + +class YmirDataset(td.Dataset): + def __init__(self, images: List[Any], load_fn=None): + super().__init__() + self.images = images + self.load_fn = load_fn + + def __getitem__(self, index): + return self.load_fn(self.images[index]) + + def __len__(self): + return len(self.images) diff --git a/det-yolov5-tmi/ymir/mining/ymir_infer.py b/det-yolov5-tmi/ymir/mining/ymir_infer.py new file mode 100644 index 0000000..ad1e0d2 --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/ymir_infer.py @@ -0,0 +1,130 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. save splited result with `torch.save(results, f'results_{RANK}.pt')` +3. merge result +""" +import os +import sys +import warnings +from functools import partial + +import torch +import torch.distributed as dist +import torch.utils.data as td +from easydict import EasyDict as edict +from mining.util import YmirDataset, load_image_file +from tqdm import tqdm +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = max(0, LOCAL_RANK) + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu + gpu_count = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 + num_workers_per_gpu = min([ + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = len(images) // max(1, WORLD_SIZE) // batch_size_per_gpu + # origin dataset + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + results = [] + dataset_size = len(images_rank) + monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1 and idx < max_barrier_times: + dist.barrier() + + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) + + if idx % monitor_gap == 0: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + + preprocess_image_shape = batch['image'].shape[2:] + for idx, det in enumerate(pred): # per image + result_per_image = [] + image_file = batch['image_file'][idx] + if len(det): + origin_image_shape = (batch['origin_shape'][0][idx], batch['origin_shape'][1][idx]) + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + results.append(dict(image_file=image_file, result=result_per_image)) + + torch.save(results, f'/out/infer_results_{RANK}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='infer') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the infer result + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/infer_results_{rank}.pt')) + + ymir_infer_result = dict() + for result in results: + for img_data in result: + img_file = img_data['image_file'] + anns = [] + for each_det in img_data['result']: + each_det_np = each_det.data.cpu().numpy() + for i in range(each_det_np.shape[0]): + xmin, ymin, xmax, ymax, conf, cls = each_det_np[i, :6].tolist() + if conf < ymir_yolov5.conf_thres: + continue + if int(cls) >= len(ymir_yolov5.class_names): + warnings.warn(f'class index {int(cls)} out of range for {ymir_yolov5.class_names}') + continue + ann = rw.Annotation(class_name=ymir_yolov5.class_names[int(cls)], + score=conf, + box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), + h=int(ymax - ymin))) + anns.append(ann) + ymir_infer_result[img_file] = anns + rw.write_infer_result(infer_result=ymir_infer_result) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py b/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py new file mode 100644 index 0000000..8d6a27c --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py @@ -0,0 +1,210 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import sys +import warnings +from functools import partial +from typing import Any, List + +import numpy as np +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torch.utils.data as td +from easydict import EasyDict as edict +from mining.util import YmirDataset, load_image_file +from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +class ALDD(object): + + def __init__(self, ymir_cfg: edict): + self.avg_pool_size = 9 + self.max_pool_size = 32 + self.avg_pool_pad = (self.avg_pool_size - 1) // 2 + + self.num_classes = len(ymir_cfg.param.class_names) + if ymir_cfg.param.get('class_distribution_scores', ''): + scores = [float(x.strip()) for x in ymir_cfg.param.class_distribution_scores.split(',')] + if len(scores) < self.num_classes: + warnings.warn('extend 1.0 to class_distribution_scores') + scores.extend([1.0] * (self.num_classes - len(scores))) + self.class_distribution_scores = np.array(scores[0:self.num_classes], dtype=np.float32) + else: + self.class_distribution_scores = np.array([1.0] * self.num_classes, dtype=np.float32) + + def calc_unc_val(self, heatmap: torch.Tensor) -> torch.Tensor: + # mean of entropy + ent = F.binary_cross_entropy(heatmap, heatmap, reduction='none') + avg_ent = F.avg_pool2d(ent, + kernel_size=self.avg_pool_size, + stride=1, + padding=self.avg_pool_pad, + count_include_pad=False) # N, 1, H, W + mean_of_entropy = torch.sum(avg_ent, dim=1, keepdim=True) # N, 1, H, W + + # entropy of mean + avg_heatmap = F.avg_pool2d(heatmap, + kernel_size=self.avg_pool_size, + stride=1, + padding=self.avg_pool_pad, + count_include_pad=False) # N, C, H, W + ent_avg = F.binary_cross_entropy(avg_heatmap, avg_heatmap, reduction='none') + entropy_of_mean = torch.sum(ent_avg, dim=1, keepdim=True) # N, 1, H, W + + uncertainty = entropy_of_mean - mean_of_entropy + unc = F.max_pool2d(uncertainty, + kernel_size=self.max_pool_size, + stride=self.max_pool_size, + padding=0, + ceil_mode=False) + + # aggregating + scores = torch.mean(unc, dim=(1, 2, 3)) # (N,) + return scores + + def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: Any): + """ + args: + imgs: list[np.array(H, W, C)] + returns: + scores: list of float + """ + if not isinstance(net_input_shape, (list, tuple)): + net_input_shape = (net_input_shape, net_input_shape) + + # CLASS_DISTRIBUTION_SCORE = np.array([1.0] * num_of_class) + scores_list = [] + + for feature_map in net_output: + feature_map.sigmoid_() + + for each_class_index in range(self.num_classes): + feature_map_list: List[torch.Tensor] = [] + + # each_output_feature_map: [bs, 3, h, w, 5 + num_classes] + for each_output_feature_map in net_output: + net_output_conf = each_output_feature_map[:, :, :, :, 4] + net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] + # feature_map_reshape: [bs, 3, h, w] + feature_map_reshape = F.interpolate(net_output_cls_mult_conf, + net_input_shape, + mode='bilinear', + align_corners=False) + feature_map_list.append(feature_map_reshape) + + # len(net_output) = 3 + # feature_map_concate: [bs, 9, h, w] + feature_map_concate = torch.cat(feature_map_list, 1) + # scores: [bs, 1] for each class + scores = self.calc_unc_val(feature_map_concate) + scores = scores.cpu().detach().numpy() + scores_list.append(scores) + + # total_scores: [bs, num_classes] + total_scores = np.stack(scores_list, axis=1) + total_scores = total_scores * self.class_distribution_scores + total_scores = np.sum(total_scores, axis=1) + + return total_scores + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu + gpu_count: int = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 + num_workers_per_gpu = min([ + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu + + # origin dataset + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + mining_results = dict() + dataset_size = len(images_rank) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + miner = ALDD(ymir_cfg) + for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1 and idx < max_barrier_times: + dist.barrier() + + with torch.no_grad(): + featuremap_output = ymir_yolov5.model.model(batch['image'].float().to(device))[1] + unc_scores = miner.compute_aldd_score(featuremap_output, ymir_yolov5.img_size) + + for each_imgname, each_score in zip(batch["image_file"], unc_scores): + mining_results[each_imgname] = each_score + + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + # note select_device(gpu_id) will set os.environ['CUDA_VISIBLE_DEVICES'] to gpu_id + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if LOCAL_RANK != -1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py b/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py new file mode 100644 index 0000000..b4c6147 --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py @@ -0,0 +1,190 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import sys +from functools import partial + +import numpy as np +import torch +import torch.distributed as dist +import torch.utils.data as td +from easydict import EasyDict as edict +from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, + update_consistency) +from tqdm import tqdm +from utils.general import scale_coords +from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu + gpu_count: int = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 + num_workers_per_gpu = min([ + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu + # origin dataset + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + results = [] + mining_results = dict() + beta = 1.3 + dataset_size = len(images_rank) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1 and idx < max_barrier_times: + dist.barrier() + + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) + + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + preprocess_image_shape = batch['image'].shape[2:] + for inner_idx, det in enumerate(pred): # per image + result_per_image = [] + image_file = batch['image_file'][inner_idx] + if len(det): + origin_image_shape = (batch['origin_shape'][0][inner_idx], batch['origin_shape'][1][inner_idx]) + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + else: + mining_results[image_file] = -beta + continue + + results_per_image = torch.cat(result_per_image, dim=0).data.cpu().numpy() + results.append(dict(image_file=image_file, origin_shape=origin_image_shape, results=results_per_image)) + + aug_load_fn = partial(load_image_file_with_ann, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + aug_dataset = YmirDataset(results, load_fn=aug_load_fn) + aug_dataset_loader = td.DataLoader(aug_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + collate_fn=collate_fn_with_fake_ann, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + # cannot sync here!!! + dataset_size = len(results) + monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) + pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader + for idx, batch in enumerate(pbar): + if idx % monitor_gap == 0 and RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + + batch_consistency = [0.0 for _ in range(len(batch['image_file']))] + aug_keys = ['flip', 'cutout', 'rotate', 'resize'] + + pred_result = dict() + for key in aug_keys: + with torch.no_grad(): + pred_result[key] = ymir_yolov5.forward(batch[f'image_{key}'].float().to(device), nms=True) + + for inner_idx in range(len(batch['image_file'])): + for key in aug_keys: + preprocess_image_shape = batch[f'image_{key}'].shape[2:] + result_per_image = [] + det = pred_result[key][inner_idx] + if len(det) == 0: + # no result for the image with augmentation f'{key}' + batch_consistency[inner_idx] += beta + continue + + # prediction result from origin image + fake_ann = batch['results_list'][inner_idx] + # bboxes = fake_ann[:, :4].data.cpu().numpy().astype(np.int32) + conf = fake_ann[:, 4] + + # augmentated bbox from bboxes, aug_conf = conf + aug_bboxes_key = batch[f'bboxes_{key}_list'][inner_idx].astype(np.int32) + + origin_image_shape = (batch[f'origin_shape_{key}'][0][inner_idx], + batch[f'origin_shape_{key}'][1][inner_idx]) + + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() + result_per_image.append(det) + + pred_bboxes_key = det[:, :4].data.cpu().numpy().astype(np.int32) + pred_conf_key = det[:, 4].data.cpu().numpy() + batch_consistency[inner_idx] = update_consistency(consistency=batch_consistency[inner_idx], + consistency_per_aug=2.0, + beta=beta, + pred_bboxes_key=pred_bboxes_key, + pred_conf_key=pred_conf_key, + aug_bboxes_key=aug_bboxes_key, + aug_conf=conf) + + for inner_idx in range(len(batch['image_file'])): + batch_consistency[inner_idx] /= len(aug_keys) + image_file = batch['image_file'][inner_idx] + mining_results[image_file] = batch_consistency[inner_idx] + + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if LOCAL_RANK != -1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py b/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py new file mode 100644 index 0000000..df5a1ff --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py @@ -0,0 +1,112 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import sys +from functools import partial + +import numpy as np +import torch +import torch.distributed as dist +import torch.utils.data as td +from easydict import EasyDict as edict +from mining.util import YmirDataset, load_image_file +from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) + batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu + gpu_count: int = ymir_yolov5.gpu_count + cpu_count: int = os.cpu_count() or 1 + num_workers_per_gpu = min([ + cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, + ymir_yolov5.num_workers_per_gpu + ]) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu + # origin dataset + images_rank = images[RANK::WORLD_SIZE] + origin_dataset = YmirDataset(images_rank, load_fn=load_fn) + origin_dataset_loader = td.DataLoader(origin_dataset, + batch_size=batch_size_per_gpu, + shuffle=False, + sampler=None, + num_workers=num_workers_per_gpu, + pin_memory=ymir_yolov5.pin_memory, + drop_last=False) + + mining_results = dict() + dataset_size = len(images_rank) + pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader + for idx, batch in enumerate(pbar): + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1 and idx < max_barrier_times: + dist.barrier() + + with torch.no_grad(): + pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=False) + + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) + for inner_idx, det in enumerate(pred): # per image + image_file = batch['image_file'][inner_idx] + if len(det): + conf = det[:, 4].data.cpu().numpy() + mining_results[image_file] = -np.sum(conf * np.log2(conf)) + else: + mining_results[image_file] = -10 + continue + + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if WORLD_SIZE > 1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_random.py b/det-yolov5-tmi/ymir/mining/ymir_mining_random.py new file mode 100644 index 0000000..30fb099 --- /dev/null +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_random.py @@ -0,0 +1,75 @@ +"""use fake DDP to infer +1. split data with `images_rank = images[RANK::WORLD_SIZE]` +2. infer on the origin dataset +3. infer on the augmentation dataset +4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` +5. merge mining result +""" +import os +import random +import sys + +import torch +import torch.distributed as dist +from easydict import EasyDict as edict +from tqdm import tqdm +from utils.ymir_yolov5 import YmirYolov5 +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): + # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. + gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 + device = torch.device('cuda', gpu) + ymir_yolov5.to(device) + + with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + images_rank = images[RANK::WORLD_SIZE] + mining_results = dict() + dataset_size = len(images_rank) + pbar = tqdm(images_rank) if RANK == 0 else images_rank + for idx, image in enumerate(pbar): + if RANK in [-1, 0]: + ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx / dataset_size) + mining_results[image] = random.random() + + torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') + + +def main() -> int: + ymir_cfg = get_merged_config() + ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') + + if LOCAL_RANK != -1: + assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' + torch.cuda.set_device(LOCAL_RANK) + dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") + + run(ymir_cfg, ymir_yolov5) + + # wait all process to save the mining result + if WORLD_SIZE > 1: + dist.barrier() + + if RANK in [0, -1]: + results = [] + for rank in range(WORLD_SIZE): + results.append(torch.load(f'/out/mining_results_{rank}.pt')) + + ymir_mining_result = [] + for result in results: + for img_file, score in result.items(): + ymir_mining_result.append((img_file, score)) + rw.write_mining_result(mining_result=ymir_mining_result) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/det-yolov5-tmi/ymir/start.py b/det-yolov5-tmi/ymir/start.py new file mode 100644 index 0000000..6c82844 --- /dev/null +++ b/det-yolov5-tmi/ymir/start.py @@ -0,0 +1,191 @@ +import logging +import os +import subprocess +import sys + +import cv2 +from easydict import EasyDict as edict +from models.experimental import attempt_download +from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file +from ymir_exc import dataset_reader as dr +from ymir_exc import env, monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process + + +def start(cfg: edict) -> int: + logging.info(f'merged config: {cfg}') + + if cfg.ymir.run_training: + _run_training(cfg) + else: + if cfg.ymir.run_mining and cfg.ymir.run_infer: + # multiple task, run mining first, infer later + mining_task_idx = 0 + infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + infer_task_idx = 0 + task_num = 1 + + if cfg.ymir.run_mining: + _run_mining(cfg, mining_task_idx, task_num) + if cfg.ymir.run_infer: + _run_infer(cfg, infer_task_idx, task_num) + + return 0 + + +def _run_training(cfg: edict) -> None: + """ + function for training task + 1. convert dataset + 2. training model + 3. save model weight/hyperparameter/... to design directory + """ + # 1. convert dataset + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) + + # 2. training model + epochs: int = int(cfg.param.epochs) + batch_size_per_gpu: int = int(cfg.param.batch_size_per_gpu) + num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) + model: str = cfg.param.model + img_size: int = int(cfg.param.img_size) + save_period: int = int(cfg.param.save_period) + save_best_only: bool = get_bool(cfg, key='save_best_only', default_value=True) + args_options: str = cfg.param.args_options + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 + batch_size: int = batch_size_per_gpu * max(1, gpu_count) + port: int = find_free_port() + sync_bn: bool = get_bool(cfg, key='sync_bn', default_value=False) + + weights = get_weight_file(cfg) + if not weights: + # download pretrained weight + weights = attempt_download(f'{model}.pt') + + models_dir = cfg.ymir.output.models_dir + project = os.path.dirname(models_dir) + name = os.path.basename(models_dir) + assert os.path.join(project, name) == models_dir + + commands = ['python3'] + device = gpu_id or 'cpu' + if gpu_count > 1: + commands.extend(f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split()) + + commands.extend([ + 'train.py', '--epochs', + str(epochs), '--batch-size', + str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', project, '--cfg', f'models/{model}.yaml', + '--name', name, '--weights', weights, '--img-size', + str(img_size), '--save-period', + str(save_period), '--device', device, + '--workers', str(num_workers_per_gpu) + ]) + + if save_best_only: + commands.append("--nosave") + + if gpu_count > 1 and sync_bn: + commands.append("--sync-bn") + + if args_options: + commands.extend(args_options.split()) + + logging.info(f'start training: {commands}') + + subprocess.run(commands, check=True) + monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) + + # if task done, write 100% percent log + monitor.write_monitor_logger(percent=1.0) + + +def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: + # generate data.yaml for mining + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 + + mining_algorithm = cfg.param.get('mining_algorithm', 'aldd') + support_mining_algorithms = ['aldd', 'cald', 'random', 'entropy'] + if mining_algorithm not in support_mining_algorithms: + raise Exception(f'unknown mining algorithm {mining_algorithm}, not in {support_mining_algorithms}') + + if gpu_count <= 1: + command = f'python3 mining/ymir_mining_{mining_algorithm}.py' + else: + port = find_free_port() + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining_{mining_algorithm}.py' # noqa + + logging.info(f'mining: {command}') + subprocess.run(command.split(), check=True) + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + + +def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: + # generate data.yaml for infer + out_dir = cfg.ymir.output.root_dir + convert_ymir_to_yolov5(cfg) + logging.info(f'generate {out_dir}/data.yaml') + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 + + if gpu_count <= 1: + N = dr.items_count(env.DatasetType.CANDIDATE) + infer_result = dict() + model = YmirYolov5(cfg) + idx = -1 + + monitor_gap = max(1, N // 100) + for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): + img = cv2.imread(asset_path) + result = model.infer(img) + infer_result[asset_path] = result + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) + monitor.write_monitor_logger(percent=percent) + + rw.write_infer_result(infer_result=infer_result) + else: + port = find_free_port() + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_infer.py' # noqa + + logging.info(f'infer: {command}') + subprocess.run(command.split(), check=True) + + monitor.write_monitor_logger( + percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) + + +if __name__ == '__main__': + logging.basicConfig(stream=sys.stdout, + format='%(levelname)-8s: [%(asctime)s] %(message)s', + datefmt='%Y%m%d-%H:%M:%S', + level=logging.INFO) + + cfg = get_merged_config() + os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') + + # activation: relu + activation: str = cfg.param.get('activation', '') + if activation: + os.environ.setdefault('ACTIVATION', activation) + sys.exit(start(cfg)) diff --git a/det-yolov5-tmi/ymir/ymir_yolov5.py b/det-yolov5-tmi/ymir/ymir_yolov5.py new file mode 100644 index 0000000..c463ded --- /dev/null +++ b/det-yolov5-tmi/ymir/ymir_yolov5.py @@ -0,0 +1,187 @@ +""" +utils function for ymir and yolov5 +""" +import os.path as osp +import shutil +from typing import Any, List + +import numpy as np +import torch +import yaml +from easydict import EasyDict as edict +from models.common import DetectMultiBackend +from nptyping import NDArray, Shape, UInt8 +from utils.augmentations import letterbox +from utils.general import check_img_size, non_max_suppression, scale_coords +from utils.torch_utils import select_device +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process + +BBOX = NDArray[Shape['*,4'], Any] +CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] + + +def get_weight_file(cfg: edict) -> str: + """ + return the weight file path by priority + find weight file in cfg.param.model_params_path or cfg.param.model_params_path + """ + weight_files = get_weight_files(cfg, suffix=('.pt')) + # choose weight file by priority, best.pt > xxx.pt + for p in weight_files: + if p.endswith('best.pt'): + return p + + if len(weight_files) > 0: + return max(weight_files, key=osp.getctime) + + return "" + + +class YmirYolov5(torch.nn.Module): + """ + used for mining and inference to init detector and predict. + """ + def __init__(self, cfg: edict, task='infer'): + super().__init__() + self.cfg = cfg + if cfg.ymir.run_mining and cfg.ymir.run_infer: + # multiple task, run mining first, infer later + if task == 'infer': + self.task_idx = 1 + elif task == 'mining': + self.task_idx = 0 + else: + raise Exception(f'unknown task {task}') + + self.task_num = 2 + else: + self.task_idx = 0 + self.task_num = 1 + + self.gpu_id: str = str(cfg.param.get('gpu_id', '0')) + device = select_device(self.gpu_id) # will set CUDA_VISIBLE_DEVICES=self.gpu_id + self.gpu_count: int = len(self.gpu_id.split(',')) if self.gpu_id else 0 + self.batch_size_per_gpu: int = int(cfg.param.get('batch_size_per_gpu', 4)) + self.num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) + self.pin_memory: bool = get_bool(cfg, 'pin_memory', False) + self.batch_size: int = self.batch_size_per_gpu * self.gpu_count + self.model = self.init_detector(device) + self.model.eval() + self.device = device + self.class_names: List[str] = cfg.param.class_names + self.stride = self.model.stride + self.conf_thres: float = float(cfg.param.conf_thres) + self.iou_thres: float = float(cfg.param.iou_thres) + + img_size = int(cfg.param.img_size) + imgsz = [img_size, img_size] + imgsz = check_img_size(imgsz, s=self.stride) + + self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup + self.img_size: List[int] = imgsz + + def extract_feats(self, x): + """ + return the feature maps before sigmoid for mining + """ + return self.model.model(x)[1] + + def forward(self, x, nms=False): + pred = self.model(x) + if not nms: + return pred + + pred = non_max_suppression(pred, + conf_thres=self.conf_thres, + iou_thres=self.iou_thres, + classes=None, # not filter class_idx + agnostic=False, + max_det=100) + return pred + + def init_detector(self, device: torch.device) -> DetectMultiBackend: + weights = get_weight_file(self.cfg) + + if not weights: + raise Exception("no weights file specified!") + + data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') + model = DetectMultiBackend( + weights=weights, + device=device, + dnn=False, # not use opencv dnn for onnx inference + data=data_yaml) # dataset.yaml path + + return model + + def predict(self, img: CV_IMAGE) -> NDArray: + """ + predict single image and return bbox information + img: opencv BGR, uint8 format + """ + # preprocess: padded resize + img1 = letterbox(img, self.img_size, stride=self.stride, auto=True)[0] + + # preprocess: convert data format + img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img1 = np.ascontiguousarray(img1) + img1 = torch.from_numpy(img1).to(self.device) + + img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 + img1.unsqueeze_(dim=0) # expand for batch dim + pred = self.forward(img1, nms=True) + + result = [] + for det in pred: + if len(det): + # Rescale boxes from img_size to img size + det[:, :4] = scale_coords(img1.shape[2:], det[:, :4], img.shape).round() + result.append(det) + + # xyxy, conf, cls + if len(result) > 0: + tensor_result = torch.cat(result, dim=0) + numpy_result = tensor_result.data.cpu().numpy() + else: + numpy_result = np.zeros(shape=(0, 6), dtype=np.float32) + + return numpy_result + + def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: + anns = [] + result = self.predict(img) + + for i in range(result.shape[0]): + xmin, ymin, xmax, ymax, conf, cls = result[i, :6].tolist() + ann = rw.Annotation(class_name=self.class_names[int(cls)], + score=conf, + box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), h=int(ymax - ymin))) + + anns.append(ann) + + return anns + + def write_monitor_logger(self, stage: YmirStage, p: float): + monitor.write_monitor_logger( + percent=get_ymir_process(stage=stage, p=p, task_idx=self.task_idx, task_num=self.task_num)) + + +def convert_ymir_to_yolov5(cfg: edict, out_dir: str = None): + """ + convert ymir format dataset to yolov5 format + generate data.yaml for training/mining/infer + """ + + out_dir = out_dir or cfg.ymir.output.root_dir + data = dict(path=out_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) + for split, prefix in zip(['train', 'val', 'test'], ['training', 'val', 'candidate']): + src_file = getattr(cfg.ymir.input, f'{prefix}_index_file') + if osp.exists(src_file): + shutil.copy(src_file, f'{out_dir}/{split}.tsv') + + data[split] = f'{split}.tsv' + + with open(osp.join(out_dir, 'data.yaml'), 'w') as fw: + fw.write(yaml.safe_dump(data)) From 2f36c1c3533ec062a8628e02c45f9d9c93258151 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 20 Oct 2022 11:24:47 +0800 Subject: [PATCH 148/150] mv folder --- det-yolov5-tmi/README.md | 307 +++++++++++++++++-- det-yolov5-tmi/README_yolov5.md | 304 ------------------ det-yolov5-tmi/cuda102.dockerfile | 40 --- det-yolov5-tmi/cuda111.dockerfile | 43 --- det-yolov5-tmi/infer-template.yaml | 15 - det-yolov5-tmi/mining-template.yaml | 18 -- det-yolov5-tmi/mining/data_augment.py | 204 ------------ det-yolov5-tmi/mining/util.py | 149 --------- det-yolov5-tmi/mining/ymir_infer.py | 130 -------- det-yolov5-tmi/mining/ymir_mining_aldd.py | 210 ------------- det-yolov5-tmi/mining/ymir_mining_cald.py | 190 ------------ det-yolov5-tmi/mining/ymir_mining_entropy.py | 112 ------- det-yolov5-tmi/mining/ymir_mining_random.py | 75 ----- det-yolov5-tmi/start.py | 191 ------------ det-yolov5-tmi/training-template.yaml | 22 -- det-yolov5-tmi/utils/ymir_yolov5.py | 187 ----------- 16 files changed, 284 insertions(+), 1913 deletions(-) delete mode 100644 det-yolov5-tmi/README_yolov5.md delete mode 100644 det-yolov5-tmi/cuda102.dockerfile delete mode 100644 det-yolov5-tmi/cuda111.dockerfile delete mode 100644 det-yolov5-tmi/infer-template.yaml delete mode 100644 det-yolov5-tmi/mining-template.yaml delete mode 100644 det-yolov5-tmi/mining/data_augment.py delete mode 100644 det-yolov5-tmi/mining/util.py delete mode 100644 det-yolov5-tmi/mining/ymir_infer.py delete mode 100644 det-yolov5-tmi/mining/ymir_mining_aldd.py delete mode 100644 det-yolov5-tmi/mining/ymir_mining_cald.py delete mode 100644 det-yolov5-tmi/mining/ymir_mining_entropy.py delete mode 100644 det-yolov5-tmi/mining/ymir_mining_random.py delete mode 100644 det-yolov5-tmi/start.py delete mode 100644 det-yolov5-tmi/training-template.yaml delete mode 100644 det-yolov5-tmi/utils/ymir_yolov5.py diff --git a/det-yolov5-tmi/README.md b/det-yolov5-tmi/README.md index 6bf9151..b03a7c5 100644 --- a/det-yolov5-tmi/README.md +++ b/det-yolov5-tmi/README.md @@ -1,43 +1,304 @@ -# yolov5-ymir readme -- [yolov5 readme](./README_yolov5.md) +
    +

    + + +

    +
    +
    + CI CPU testing + YOLOv5 Citation + Docker Pulls +
    + Open In Colab + Open In Kaggle + Join Forum +
    +
    +

    +YOLOv5 🚀 is a family of object detection architectures and models pretrained on the COCO dataset, and represents Ultralytics + open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. +

    + + + + + +
    + +##
    Documentation
    + +See the [YOLOv5 Docs](https://docs.ultralytics.com) for full documentation on training, testing and deployment. + +##
    Quick Start Examples
    + +
    +Install + +Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a +[**Python>=3.7.0**](https://www.python.org/) environment, including +[**PyTorch>=1.7**](https://pytorch.org/get-started/locally/). + +```bash +git clone https://github.com/ultralytics/yolov5 # clone +cd yolov5 +pip install -r requirements.txt # install ``` -docker build -t ymir/ymir-executor:ymir1.1.0-cuda102-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda102.dockerfile . -docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERVER_MODE=dev --build-arg YMIR=1.1.0 -f cuda111.dockerfile . +
    + +
    +Inference + +Inference with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36) +. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest +YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). + +```python +import torch + +# Model +model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # or yolov5m, yolov5l, yolov5x, custom + +# Images +img = 'https://ultralytics.com/images/zidane.jpg' # or file, Path, PIL, OpenCV, numpy, list + +# Inference +results = model(img) + +# Results +results.print() # or .show(), .save(), .crop(), .pandas(), etc. +``` + +
    + + + +
    +Inference with detect.py + +`detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from +the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. + +```bash +python detect.py --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + path/*.jpg # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream ``` -## main change log +
    + +
    +Training + +The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) +results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) +and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest +YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are +1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://github.com/ultralytics/yolov5/issues/475) times faster). Use the +largest `--batch-size` possible, or pass `--batch-size -1` for +YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB. + +```bash +python train.py --data coco.yaml --cfg yolov5n.yaml --weights '' --batch-size 128 + yolov5s 64 + yolov5m 40 + yolov5l 24 + yolov5x 16 +``` + + + +
    + +
    +Tutorials + +* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED +* [Tips for Best Training Results](https://github.com/ultralytics/yolov5/wiki/Tips-for-Best-Training-Results)  ☘️ + RECOMMENDED +* [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW +* [Roboflow for Datasets, Labeling, and Active Learning](https://github.com/ultralytics/yolov5/issues/4975)  🌟 NEW +* [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) +* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW +* [TFLite, ONNX, CoreML, TensorRT Export](https://github.com/ultralytics/yolov5/issues/251) 🚀 +* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) +* [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) +* [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) +* [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) +* [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW +* [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) + +
    + +##
    Environments
    + +Get started in seconds with our verified environments. Click each icon below for details. + + + +##
    Integrations
    + + + +|Weights and Biases|Roboflow ⭐ NEW| +|:-:|:-:| +|Automatically track and visualize all your YOLOv5 training runs in the cloud with [Weights & Biases](https://wandb.ai/site?utm_campaign=repo_yolo_readme)|Label and export your custom datasets directly to YOLOv5 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | + + + + +##
    Why YOLOv5
    + +

    +
    + YOLOv5-P5 640 Figure (click to expand) + +

    +
    +
    + Figure Notes (click to expand) -- add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining +* **COCO AP val** denotes mAP@0.5:0.95 metric measured on the 5000-image [COCO val2017](http://cocodataset.org) dataset over various inference sizes from 256 to 1536. +* **GPU Speed** measures average inference time per image on [COCO val2017](http://cocodataset.org) dataset using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) V100 instance at batch-size 32. +* **EfficientDet** data from [google/automl](https://github.com/google/automl) at batch size 8. +* **Reproduce** by `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt` +
    -- add `utils/ymir_yolov5.py` for useful functions +### Pretrained Checkpoints - - `get_merged_config()` add ymir path config `cfg.yaml` and hyper-parameter `cfg.param` +[assets]: https://github.com/ultralytics/yolov5/releases - - `convert_ymir_to_yolov5()` generate yolov5 dataset config file `data.yaml` +[TTA]: https://github.com/ultralytics/yolov5/issues/303 - - `write_ymir_training_result()` save model weight, map and other files. +|Model |size
    (pixels) |mAPval
    0.5:0.95 |mAPval
    0.5 |Speed
    CPU b1
    (ms) |Speed
    V100 b1
    (ms) |Speed
    V100 b32
    (ms) |params
    (M) |FLOPs
    @640 (B) +|--- |--- |--- |--- |--- |--- |--- |--- |--- +|[YOLOv5n][assets] |640 |28.0 |45.7 |**45** |**6.3**|**0.6**|**1.9**|**4.5** +|[YOLOv5s][assets] |640 |37.4 |56.8 |98 |6.4 |0.9 |7.2 |16.5 +|[YOLOv5m][assets] |640 |45.4 |64.1 |224 |8.2 |1.7 |21.2 |49.0 +|[YOLOv5l][assets] |640 |49.0 |67.3 |430 |10.1 |2.7 |46.5 |109.1 +|[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7 +| | | | | | | | | +|[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6 +|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6 +|[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0 +|[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4 +|[YOLOv5x6][assets]
    + [TTA][TTA]|1280
    1536 |55.0
    **55.8** |72.7
    **72.7** |3136
    - |26.2
    - |19.4
    - |140.7
    - |209.8
    - - - `get_weight_file()` get pretrained weight or init weight file from ymir system +
    + Table Notes (click to expand) -- modify `utils/datasets.py` for ymir dataset format +* All checkpoints are trained to 300 epochs with default settings. Nano and Small models use [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) hyps, all others use [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml). +* **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
    Reproduce by `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` +* **Speed** averaged over COCO val images using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) instance. NMS times (~1 ms/img) not included.
    Reproduce by `python val.py --data coco.yaml --img 640 --task speed --batch 1` +* **TTA** [Test Time Augmentation](https://github.com/ultralytics/yolov5/issues/303) includes reflection and scale augmentations.
    Reproduce by `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment` -- modify `train.py` for training process monitor +
    -- add `mining/data_augment.py` and `mining/mining_cald.py` for mining +##
    Contribute
    -- add `training/infer/mining-template.yaml` for `/img-man/training/infer/mining-template.yaml` +We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible. Please see our [Contributing Guide](CONTRIBUTING.md) to get started, and fill out the [YOLOv5 Survey](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experiences. Thank you to all our contributors! -- add `cuda102/111.dockerfile`, remove origin `Dockerfile` + -- modify `requirements.txt` +##
    Contact
    -- other modify support onnx export, not important. +For YOLOv5 bugs and feature requests please visit [GitHub Issues](https://github.com/ultralytics/yolov5/issues). For business inquiries or +professional support requests please visit [https://ultralytics.com/contact](https://ultralytics.com/contact). -## new features +
    -- 2022/09/08: add aldd active learning algorithm for mining task. [Active Learning for Deep Detection Neural Networks (ICCV 2019)](https://gitlab.com/haghdam/deep_active_learning) -- 2022/09/14: support change hyper-parameter `num_workers_per_gpu` -- 2022/09/16: support change activation, view [rknn](https://github.com/airockchip/rknn_model_zoo/tree/main/models/vision/object_detection/yolov5-pytorch) -- 2022/10/09: fix dist.destroy_process_group() hang + diff --git a/det-yolov5-tmi/README_yolov5.md b/det-yolov5-tmi/README_yolov5.md deleted file mode 100644 index b03a7c5..0000000 --- a/det-yolov5-tmi/README_yolov5.md +++ /dev/null @@ -1,304 +0,0 @@ -
    -

    - - -

    -
    -
    - CI CPU testing - YOLOv5 Citation - Docker Pulls -
    - Open In Colab - Open In Kaggle - Join Forum -
    - -
    -

    -YOLOv5 🚀 is a family of object detection architectures and models pretrained on the COCO dataset, and represents Ultralytics - open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development. -

    - - - - - -
    - -##
    Documentation
    - -See the [YOLOv5 Docs](https://docs.ultralytics.com) for full documentation on training, testing and deployment. - -##
    Quick Start Examples
    - -
    -Install - -Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a -[**Python>=3.7.0**](https://www.python.org/) environment, including -[**PyTorch>=1.7**](https://pytorch.org/get-started/locally/). - -```bash -git clone https://github.com/ultralytics/yolov5 # clone -cd yolov5 -pip install -r requirements.txt # install -``` - -
    - -
    -Inference - -Inference with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36) -. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest -YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). - -```python -import torch - -# Model -model = torch.hub.load('ultralytics/yolov5', 'yolov5s') # or yolov5m, yolov5l, yolov5x, custom - -# Images -img = 'https://ultralytics.com/images/zidane.jpg' # or file, Path, PIL, OpenCV, numpy, list - -# Inference -results = model(img) - -# Results -results.print() # or .show(), .save(), .crop(), .pandas(), etc. -``` - -
    - - - -
    -Inference with detect.py - -`detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from -the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. - -```bash -python detect.py --source 0 # webcam - img.jpg # image - vid.mp4 # video - path/ # directory - path/*.jpg # glob - 'https://youtu.be/Zgi9g1ksQHc' # YouTube - 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream -``` - -
    - -
    -Training - -The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) -results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) -and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest -YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are -1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://github.com/ultralytics/yolov5/issues/475) times faster). Use the -largest `--batch-size` possible, or pass `--batch-size -1` for -YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB. - -```bash -python train.py --data coco.yaml --cfg yolov5n.yaml --weights '' --batch-size 128 - yolov5s 64 - yolov5m 40 - yolov5l 24 - yolov5x 16 -``` - - - -
    - -
    -Tutorials - -* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED -* [Tips for Best Training Results](https://github.com/ultralytics/yolov5/wiki/Tips-for-Best-Training-Results)  ☘️ - RECOMMENDED -* [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW -* [Roboflow for Datasets, Labeling, and Active Learning](https://github.com/ultralytics/yolov5/issues/4975)  🌟 NEW -* [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) -* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW -* [TFLite, ONNX, CoreML, TensorRT Export](https://github.com/ultralytics/yolov5/issues/251) 🚀 -* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) -* [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) -* [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) -* [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) -* [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW -* [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) - -
    - -##
    Environments
    - -Get started in seconds with our verified environments. Click each icon below for details. - - - -##
    Integrations
    - - - -|Weights and Biases|Roboflow ⭐ NEW| -|:-:|:-:| -|Automatically track and visualize all your YOLOv5 training runs in the cloud with [Weights & Biases](https://wandb.ai/site?utm_campaign=repo_yolo_readme)|Label and export your custom datasets directly to YOLOv5 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | - - - - -##
    Why YOLOv5
    - -

    -
    - YOLOv5-P5 640 Figure (click to expand) - -

    -
    -
    - Figure Notes (click to expand) - -* **COCO AP val** denotes mAP@0.5:0.95 metric measured on the 5000-image [COCO val2017](http://cocodataset.org) dataset over various inference sizes from 256 to 1536. -* **GPU Speed** measures average inference time per image on [COCO val2017](http://cocodataset.org) dataset using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) V100 instance at batch-size 32. -* **EfficientDet** data from [google/automl](https://github.com/google/automl) at batch size 8. -* **Reproduce** by `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt` -
    - -### Pretrained Checkpoints - -[assets]: https://github.com/ultralytics/yolov5/releases - -[TTA]: https://github.com/ultralytics/yolov5/issues/303 - -|Model |size
    (pixels) |mAPval
    0.5:0.95 |mAPval
    0.5 |Speed
    CPU b1
    (ms) |Speed
    V100 b1
    (ms) |Speed
    V100 b32
    (ms) |params
    (M) |FLOPs
    @640 (B) -|--- |--- |--- |--- |--- |--- |--- |--- |--- -|[YOLOv5n][assets] |640 |28.0 |45.7 |**45** |**6.3**|**0.6**|**1.9**|**4.5** -|[YOLOv5s][assets] |640 |37.4 |56.8 |98 |6.4 |0.9 |7.2 |16.5 -|[YOLOv5m][assets] |640 |45.4 |64.1 |224 |8.2 |1.7 |21.2 |49.0 -|[YOLOv5l][assets] |640 |49.0 |67.3 |430 |10.1 |2.7 |46.5 |109.1 -|[YOLOv5x][assets] |640 |50.7 |68.9 |766 |12.1 |4.8 |86.7 |205.7 -| | | | | | | | | -|[YOLOv5n6][assets] |1280 |36.0 |54.4 |153 |8.1 |2.1 |3.2 |4.6 -|[YOLOv5s6][assets] |1280 |44.8 |63.7 |385 |8.2 |3.6 |16.8 |12.6 -|[YOLOv5m6][assets] |1280 |51.3 |69.3 |887 |11.1 |6.8 |35.7 |50.0 -|[YOLOv5l6][assets] |1280 |53.7 |71.3 |1784 |15.8 |10.5 |76.8 |111.4 -|[YOLOv5x6][assets]
    + [TTA][TTA]|1280
    1536 |55.0
    **55.8** |72.7
    **72.7** |3136
    - |26.2
    - |19.4
    - |140.7
    - |209.8
    - - -
    - Table Notes (click to expand) - -* All checkpoints are trained to 300 epochs with default settings. Nano and Small models use [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) hyps, all others use [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml). -* **mAPval** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.
    Reproduce by `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` -* **Speed** averaged over COCO val images using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p3/) instance. NMS times (~1 ms/img) not included.
    Reproduce by `python val.py --data coco.yaml --img 640 --task speed --batch 1` -* **TTA** [Test Time Augmentation](https://github.com/ultralytics/yolov5/issues/303) includes reflection and scale augmentations.
    Reproduce by `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment` - -
    - -##
    Contribute
    - -We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible. Please see our [Contributing Guide](CONTRIBUTING.md) to get started, and fill out the [YOLOv5 Survey](https://ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experiences. Thank you to all our contributors! - - - -##
    Contact
    - -For YOLOv5 bugs and feature requests please visit [GitHub Issues](https://github.com/ultralytics/yolov5/issues). For business inquiries or -professional support requests please visit [https://ultralytics.com/contact](https://ultralytics.com/contact). - -
    - - diff --git a/det-yolov5-tmi/cuda102.dockerfile b/det-yolov5-tmi/cuda102.dockerfile deleted file mode 100644 index 0014b60..0000000 --- a/det-yolov5-tmi/cuda102.dockerfile +++ /dev/null @@ -1,40 +0,0 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" - -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime -# support YMIR=1.0.0, 1.1.0 or 1.2.0 -ARG YMIR="1.1.0" - -ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" -ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" -ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" -ENV LANG=C.UTF-8 -ENV YMIR_VERSION=${YMIR} - -# Install linux package -RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ - libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ - build-essential ninja-build \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# install ymir-exc sdk -RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" - -# Copy file from host to docker and install requirements -COPY . /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ \ - && pip install -r /app/requirements.txt - -# Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh \ - && mkdir -p /root/.config/Ultralytics \ - && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf - -# Make PYTHONPATH find local package -ENV PYTHONPATH=. - -WORKDIR /app -RUN echo "python3 /app/start.py" > /usr/bin/start.sh -CMD bash /usr/bin/start.sh diff --git a/det-yolov5-tmi/cuda111.dockerfile b/det-yolov5-tmi/cuda111.dockerfile deleted file mode 100644 index 84427a8..0000000 --- a/det-yolov5-tmi/cuda111.dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -ARG PYTORCH="1.8.0" -ARG CUDA="11.1" -ARG CUDNN="8" - -# cuda11.1 + pytorch 1.9.0 + cudnn8 not work!!! -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-runtime -# support YMIR=1.0.0, 1.1.0 or 1.2.0 -ARG YMIR="1.1.0" - - -ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" -ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" -ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" -ENV LANG=C.UTF-8 -ENV YMIR_VERSION=$YMIR - -# Install linux package -RUN apt-get update && apt-get install -y gnupg2 git libglib2.0-0 \ - libgl1-mesa-glx libsm6 libxext6 libxrender-dev curl wget zip vim \ - build-essential ninja-build \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -COPY ./requirements.txt /workspace/ -# install ymir-exc sdk and requirements -RUN pip install "git+https://github.com/modelai/ymir-executor-sdk.git@ymir1.0.0" \ - && pip install -r /workspace/requirements.txt - -# Copy file from host to docker and install requirements -COPY . /app -RUN mkdir /img-man && mv /app/*-template.yaml /img-man/ - -# Download pretrained weight and font file -RUN cd /app && bash data/scripts/download_weights.sh \ - && mkdir -p /root/.config/Ultralytics \ - && wget https://ultralytics.com/assets/Arial.ttf -O /root/.config/Ultralytics/Arial.ttf - -# Make PYTHONPATH find local package -ENV PYTHONPATH=. - -WORKDIR /app -RUN echo "python3 /app/start.py" > /usr/bin/start.sh -CMD bash /usr/bin/start.sh diff --git a/det-yolov5-tmi/infer-template.yaml b/det-yolov5-tmi/infer-template.yaml deleted file mode 100644 index 329887a..0000000 --- a/det-yolov5-tmi/infer-template.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# infer template for your executor app -# after build image, it should at /img-man/infer-template.yaml -# key: gpu_id, task_id, model_params_path, class_names should be preserved - -# gpu_id: '0' -# task_id: 'default-infer-task' -# model_params_path: [] -# class_names: [] - -img_size: 640 -conf_thres: 0.25 -iou_thres: 0.45 -batch_size_per_gpu: 16 -num_workers_per_gpu: 4 -pin_memory: False diff --git a/det-yolov5-tmi/mining-template.yaml b/det-yolov5-tmi/mining-template.yaml deleted file mode 100644 index 485c8bb..0000000 --- a/det-yolov5-tmi/mining-template.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# mining template for your executor app -# after build image, it should at /img-man/mining-template.yaml -# key: gpu_id, task_id, model_params_path, class_names should be preserved - -# gpu_id: '0' -# task_id: 'default-training-task' -# model_params_path: [] -# class_names: [] - -img_size: 640 -mining_algorithm: aldd -class_distribution_scores: '' # 1.0,1.0,0.1,0.2 -conf_thres: 0.25 -iou_thres: 0.45 -batch_size_per_gpu: 16 -num_workers_per_gpu: 4 -pin_memory: False -shm_size: 128G diff --git a/det-yolov5-tmi/mining/data_augment.py b/det-yolov5-tmi/mining/data_augment.py deleted file mode 100644 index cfafaa7..0000000 --- a/det-yolov5-tmi/mining/data_augment.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout -official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py -""" -import random -from typing import Any, List, Tuple - -import cv2 -import numpy as np -from nptyping import NDArray - -from utils.ymir_yolov5 import BBOX, CV_IMAGE - - -def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: - ''' - Find intersection of every box combination between two sets of box - boxes1: bounding boxes 1, a tensor of dimensions (n1, 4) - boxes2: bounding boxes 2, a tensor of dimensions (n2, 4) - - Out: Intersection each of boxes1 with respect to each of boxes2, - a tensor of dimensions (n1, n2) - ''' - n1 = boxes1.shape[0] - n2 = boxes2.shape[0] - max_xy = np.minimum( - np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) - - min_xy = np.maximum( - np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), - np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) - inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) - return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) - - -def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ - -> Tuple[CV_IMAGE, BBOX]: - """ - image: opencv image, [height,width,channels] - bbox: numpy.ndarray, [N,4] --> [x1,y1,x2,y2] - """ - image = image.copy() - - width = image.shape[1] - # Flip image horizontally - image = image[:, ::-1, :] - if len(bbox) > 0: - bbox = bbox.copy() - # Flip bbox horizontally - bbox[:, [0, 2]] = width - bbox[:, [2, 0]] - return image, bbox - - -def cutout(image: CV_IMAGE, - bbox: BBOX, - cut_num: int = 2, - fill_val: int = 0, - bbox_remove_thres: float = 0.4, - bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: - ''' - Cutout augmentation - image: A PIL image - boxes: bounding boxes, a tensor of dimensions (#objects, 4) - labels: labels of object, a tensor of dimensions (#objects) - fill_val: Value filled in cut out - bbox_remove_thres: Theshold to remove bbox cut by cutout - - Out: new image, new_boxes, new_labels - ''' - image = image.copy() - bbox = bbox.copy() - - if len(bbox) == 0: - return image, bbox - - original_h, original_w, original_channel = image.shape - count = 0 - for _ in range(50): - # Random cutout size: [0.15, 0.5] of original dimension - cutout_size_h = random.uniform(0.05 * original_h, 0.2 * original_h) - cutout_size_w = random.uniform(0.05 * original_w, 0.2 * original_w) - - # Random position for cutout - left = random.uniform(0, original_w - cutout_size_w) - right = left + cutout_size_w - top = random.uniform(0, original_h - cutout_size_h) - bottom = top + cutout_size_h - cutout = np.array([[float(left), float(top), float(right), float(bottom)]]) - - # Calculate intersect between cutout and bounding boxes - overlap_size = intersect(cutout, bbox) - area_boxes = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) - ratio = overlap_size / (area_boxes + 1e-14) - # If all boxes have Iou greater than bbox_remove_thres, try again - if ratio.max() > bbox_remove_thres or ratio.max() < bbox_min_thres: - continue - - image[int(top):int(bottom), int(left):int(right), :] = fill_val - count += 1 - if count >= cut_num: - break - return image, bbox - - -def rotate(image: CV_IMAGE, bbox: BBOX, rot: float = 5) -> Tuple[CV_IMAGE, BBOX]: - image = image.copy() - bbox = bbox.copy() - h, w, c = image.shape - center = np.array([w / 2.0, h / 2.0]) - s = max(h, w) * 1.0 - trans = get_affine_transform(center, s, rot, [w, h]) - if len(bbox) > 0: - for i in range(bbox.shape[0]): - x1, y1 = affine_transform(bbox[i, :2], trans) - x2, y2 = affine_transform(bbox[i, 2:], trans) - x3, y3 = affine_transform(bbox[i, [2, 1]], trans) - x4, y4 = affine_transform(bbox[i, [0, 3]], trans) - bbox[i, :2] = [min(x1, x2, x3, x4), min(y1, y2, y3, y4)] - bbox[i, 2:] = [max(x1, x2, x3, x4), max(y1, y2, y3, y4)] - image = cv2.warpAffine(image, trans, (w, h), flags=cv2.INTER_LINEAR) - return image, bbox - - -def get_3rd_point(a: NDArray, b: NDArray) -> NDArray: - direct = a - b - return b + np.array([-direct[1], direct[0]], dtype=np.float32) - - -def get_dir(src_point: NDArray, rot_rad: float) -> List: - sn, cs = np.sin(rot_rad), np.cos(rot_rad) - - src_result = [0, 0] - src_result[0] = src_point[0] * cs - src_point[1] * sn - src_result[1] = src_point[0] * sn + src_point[1] * cs - - return src_result - - -def transform_preds(coords: NDArray, center: NDArray, scale: Any, rot: float, output_size: List) -> NDArray: - trans = get_affine_transform(center, scale, rot, output_size, inv=True) - target_coords = affine_transform(coords, trans) - return target_coords - - -def get_affine_transform(center: NDArray, - scale: Any, - rot: float, - output_size: List, - shift: NDArray = np.array([0, 0], dtype=np.float32), - inv: bool = False) -> NDArray: - if not isinstance(scale, np.ndarray) and not isinstance(scale, list): - scale = np.array([scale, scale], dtype=np.float32) - - scale_tmp = scale - src_w = scale_tmp[0] - dst_w = output_size[0] - dst_h = output_size[1] - - rot_rad = np.pi * rot / 180 - src_dir = get_dir([0, src_w * -0.5], rot_rad) - dst_dir = np.array([0, dst_w * -0.5], np.float32) - - src = np.zeros((3, 2), dtype=np.float32) - dst = np.zeros((3, 2), dtype=np.float32) - src[0, :] = center + scale_tmp * shift - src[1, :] = center + src_dir + scale_tmp * shift - dst[0, :] = [dst_w * 0.5, dst_h * 0.5] - dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir - - src[2:, :] = get_3rd_point(src[0, :], src[1, :]) - dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) - - if inv: - trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) - else: - trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) - - return trans - - -def affine_transform(pt: NDArray, t: NDArray) -> NDArray: - new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T - new_pt = np.dot(t, new_pt) - return new_pt[:2] - - -def resize(img: CV_IMAGE, boxes: BBOX, ratio: float = 0.8) -> Tuple[CV_IMAGE, BBOX]: - """ - ratio: <= 1.0 - """ - assert ratio <= 1.0, f'resize ratio {ratio} must <= 1.0' - - h, w, _ = img.shape - ow = int(w * ratio) - oh = int(h * ratio) - resize_img = cv2.resize(img, (ow, oh)) - new_img = np.zeros_like(img) - new_img[:oh, :ow] = resize_img - - if len(boxes) == 0: - return new_img, boxes - else: - return new_img, boxes * ratio diff --git a/det-yolov5-tmi/mining/util.py b/det-yolov5-tmi/mining/util.py deleted file mode 100644 index c69343c..0000000 --- a/det-yolov5-tmi/mining/util.py +++ /dev/null @@ -1,149 +0,0 @@ -"""run.py: -img --(model)--> pred --(augmentation)--> (aug1_pred, aug2_pred, ..., augN_pred) -img --(augmentation)--> aug1_img --(model)--> pred1 -img --(augmentation)--> aug2_img --(model)--> pred2 -... -img --(augmentation)--> augN_img --(model)--> predN - -dataload(img) --(model)--> pred -dataload(img, pred) --(augmentation1)--> (aug1_img, aug1_pred) --(model)--> pred1 - -1. split dataset with DDP sampler -2. use DDP model to infer sampled dataloader -3. gather infer result - -""" -import os -from typing import Any, List - -import cv2 -import numpy as np -import torch.utils.data as td -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate -from nptyping import NDArray -from scipy.stats import entropy -from torch.utils.data._utils.collate import default_collate -from utils.augmentations import letterbox -from utils.ymir_yolov5 import BBOX - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: - """ - args: - boxes1: np.array, (N, 4), xyxy - boxes2: np.array, (M, 4), xyxy - return: - iou: np.array, (N, M) - """ - area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) - area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) - iner_area = intersect(boxes1, boxes2) - area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) - area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) - iou = iner_area / (area1 + area2 - iner_area + 1e-14) - return iou - - -def preprocess(img, img_size, stride): - img1 = letterbox(img, img_size, stride=stride, auto=False)[0] - - # preprocess: convert data format - img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img1 = np.ascontiguousarray(img1) - # img1 = torch.from_numpy(img1).to(self.device) - - img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 - return img1 - - -def load_image_file(img_file: str, img_size, stride): - img = cv2.imread(img_file) - img1 = letterbox(img, img_size, stride=stride, auto=False)[0] - - # preprocess: convert data format - img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img1 = np.ascontiguousarray(img1) - # img1 = torch.from_numpy(img1).to(self.device) - - img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 - # img1.unsqueeze_(dim=0) # expand for batch dim - return dict(image=img1, origin_shape=img.shape[0:2], image_file=img_file) - # return img1 - - -def load_image_file_with_ann(image_info: dict, img_size, stride): - img_file = image_info['image_file'] - # xyxy(int) conf(float) class_index(int) - bboxes = image_info['results'][:, :4].astype(np.int32) - img = cv2.imread(img_file) - aug_dict = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) - - data = dict(image_file=img_file, origin_shape=img.shape[0:2]) - for key in aug_dict: - aug_img, aug_bbox = aug_dict[key](img, bboxes) - preprocess_aug_img = preprocess(aug_img, img_size, stride) - data[f'image_{key}'] = preprocess_aug_img - data[f'bboxes_{key}'] = aug_bbox - data[f'origin_shape_{key}'] = aug_img.shape[0:2] - - data.update(image_info) - return data - - -def collate_fn_with_fake_ann(batch): - new_batch = dict() - for key in ['flip', 'cutout', 'rotate', 'resize']: - new_batch[f'bboxes_{key}_list'] = [data[f'bboxes_{key}'] for data in batch] - - new_batch[f'image_{key}'] = default_collate([data[f'image_{key}'] for data in batch]) - - new_batch[f'origin_shape_{key}'] = default_collate([data[f'origin_shape_{key}'] for data in batch]) - - new_batch['results_list'] = [data['results'] for data in batch] - new_batch['image_file'] = [data['image_file'] for data in batch] - - return new_batch - - -def update_consistency(consistency, consistency_per_aug, beta, pred_bboxes_key, pred_conf_key, aug_bboxes_key, - aug_conf): - cls_scores_aug = 1 - pred_conf_key - cls_scores = 1 - aug_conf - - consistency_per_aug = 2.0 - ious = get_ious(pred_bboxes_key, aug_bboxes_key) - aug_idxs = np.argmax(ious, axis=0) - for origin_idx, aug_idx in enumerate(aug_idxs): - max_iou = ious[aug_idx, origin_idx] - if max_iou == 0: - consistency_per_aug = min(consistency_per_aug, beta) - p = cls_scores_aug[aug_idx] - q = cls_scores[origin_idx] - m = (p + q) / 2. - js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) - if js < 0: - js = 0 - consistency_box = max_iou - consistency_cls = 0.5 * (aug_conf[origin_idx] + pred_conf_key[aug_idx]) * (1 - js) - consistency_per_inst = abs(consistency_box + consistency_cls - beta) - consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) - - consistency += consistency_per_aug - return consistency - - -class YmirDataset(td.Dataset): - def __init__(self, images: List[Any], load_fn=None): - super().__init__() - self.images = images - self.load_fn = load_fn - - def __getitem__(self, index): - return self.load_fn(self.images[index]) - - def __len__(self): - return len(self.images) diff --git a/det-yolov5-tmi/mining/ymir_infer.py b/det-yolov5-tmi/mining/ymir_infer.py deleted file mode 100644 index ad1e0d2..0000000 --- a/det-yolov5-tmi/mining/ymir_infer.py +++ /dev/null @@ -1,130 +0,0 @@ -"""use fake DDP to infer -1. split data with `images_rank = images[RANK::WORLD_SIZE]` -2. save splited result with `torch.save(results, f'results_{RANK}.pt')` -3. merge result -""" -import os -import sys -import warnings -from functools import partial - -import torch -import torch.distributed as dist -import torch.utils.data as td -from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file -from tqdm import tqdm -from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): - # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = max(0, LOCAL_RANK) - device = torch.device('cuda', gpu) - ymir_yolov5.to(device) - - load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - batch_size_per_gpu = ymir_yolov5.batch_size_per_gpu - gpu_count = ymir_yolov5.gpu_count - cpu_count: int = os.cpu_count() or 1 - num_workers_per_gpu = min([ - cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, - ymir_yolov5.num_workers_per_gpu - ]) - - with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: - images = [line.strip() for line in f.readlines()] - - max_barrier_times = len(images) // max(1, WORLD_SIZE) // batch_size_per_gpu - # origin dataset - images_rank = images[RANK::WORLD_SIZE] - origin_dataset = YmirDataset(images_rank, load_fn=load_fn) - origin_dataset_loader = td.DataLoader(origin_dataset, - batch_size=batch_size_per_gpu, - shuffle=False, - sampler=None, - num_workers=num_workers_per_gpu, - pin_memory=ymir_yolov5.pin_memory, - drop_last=False) - - results = [] - dataset_size = len(images_rank) - monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) - pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader - for idx, batch in enumerate(pbar): - # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1 and idx < max_barrier_times: - dist.barrier() - - with torch.no_grad(): - pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) - - if idx % monitor_gap == 0: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - - preprocess_image_shape = batch['image'].shape[2:] - for idx, det in enumerate(pred): # per image - result_per_image = [] - image_file = batch['image_file'][idx] - if len(det): - origin_image_shape = (batch['origin_shape'][0][idx], batch['origin_shape'][1][idx]) - # Rescale boxes from img_size to img size - det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() - result_per_image.append(det) - results.append(dict(image_file=image_file, result=result_per_image)) - - torch.save(results, f'/out/infer_results_{RANK}.pt') - - -def main() -> int: - ymir_cfg = get_merged_config() - ymir_yolov5 = YmirYolov5(ymir_cfg, task='infer') - - if LOCAL_RANK != -1: - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - run(ymir_cfg, ymir_yolov5) - - # wait all process to save the infer result - dist.barrier() - - if RANK in [0, -1]: - results = [] - for rank in range(WORLD_SIZE): - results.append(torch.load(f'/out/infer_results_{rank}.pt')) - - ymir_infer_result = dict() - for result in results: - for img_data in result: - img_file = img_data['image_file'] - anns = [] - for each_det in img_data['result']: - each_det_np = each_det.data.cpu().numpy() - for i in range(each_det_np.shape[0]): - xmin, ymin, xmax, ymax, conf, cls = each_det_np[i, :6].tolist() - if conf < ymir_yolov5.conf_thres: - continue - if int(cls) >= len(ymir_yolov5.class_names): - warnings.warn(f'class index {int(cls)} out of range for {ymir_yolov5.class_names}') - continue - ann = rw.Annotation(class_name=ymir_yolov5.class_names[int(cls)], - score=conf, - box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), - h=int(ymax - ymin))) - anns.append(ann) - ymir_infer_result[img_file] = anns - rw.write_infer_result(infer_result=ymir_infer_result) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining_aldd.py b/det-yolov5-tmi/mining/ymir_mining_aldd.py deleted file mode 100644 index 8d6a27c..0000000 --- a/det-yolov5-tmi/mining/ymir_mining_aldd.py +++ /dev/null @@ -1,210 +0,0 @@ -"""use fake DDP to infer -1. split data with `images_rank = images[RANK::WORLD_SIZE]` -2. infer on the origin dataset -3. infer on the augmentation dataset -4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` -5. merge mining result -""" -import os -import sys -import warnings -from functools import partial -from typing import Any, List - -import numpy as np -import torch -import torch.distributed as dist -import torch.nn.functional as F -import torch.utils.data as td -from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file -from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -class ALDD(object): - - def __init__(self, ymir_cfg: edict): - self.avg_pool_size = 9 - self.max_pool_size = 32 - self.avg_pool_pad = (self.avg_pool_size - 1) // 2 - - self.num_classes = len(ymir_cfg.param.class_names) - if ymir_cfg.param.get('class_distribution_scores', ''): - scores = [float(x.strip()) for x in ymir_cfg.param.class_distribution_scores.split(',')] - if len(scores) < self.num_classes: - warnings.warn('extend 1.0 to class_distribution_scores') - scores.extend([1.0] * (self.num_classes - len(scores))) - self.class_distribution_scores = np.array(scores[0:self.num_classes], dtype=np.float32) - else: - self.class_distribution_scores = np.array([1.0] * self.num_classes, dtype=np.float32) - - def calc_unc_val(self, heatmap: torch.Tensor) -> torch.Tensor: - # mean of entropy - ent = F.binary_cross_entropy(heatmap, heatmap, reduction='none') - avg_ent = F.avg_pool2d(ent, - kernel_size=self.avg_pool_size, - stride=1, - padding=self.avg_pool_pad, - count_include_pad=False) # N, 1, H, W - mean_of_entropy = torch.sum(avg_ent, dim=1, keepdim=True) # N, 1, H, W - - # entropy of mean - avg_heatmap = F.avg_pool2d(heatmap, - kernel_size=self.avg_pool_size, - stride=1, - padding=self.avg_pool_pad, - count_include_pad=False) # N, C, H, W - ent_avg = F.binary_cross_entropy(avg_heatmap, avg_heatmap, reduction='none') - entropy_of_mean = torch.sum(ent_avg, dim=1, keepdim=True) # N, 1, H, W - - uncertainty = entropy_of_mean - mean_of_entropy - unc = F.max_pool2d(uncertainty, - kernel_size=self.max_pool_size, - stride=self.max_pool_size, - padding=0, - ceil_mode=False) - - # aggregating - scores = torch.mean(unc, dim=(1, 2, 3)) # (N,) - return scores - - def compute_aldd_score(self, net_output: List[torch.Tensor], net_input_shape: Any): - """ - args: - imgs: list[np.array(H, W, C)] - returns: - scores: list of float - """ - if not isinstance(net_input_shape, (list, tuple)): - net_input_shape = (net_input_shape, net_input_shape) - - # CLASS_DISTRIBUTION_SCORE = np.array([1.0] * num_of_class) - scores_list = [] - - for feature_map in net_output: - feature_map.sigmoid_() - - for each_class_index in range(self.num_classes): - feature_map_list: List[torch.Tensor] = [] - - # each_output_feature_map: [bs, 3, h, w, 5 + num_classes] - for each_output_feature_map in net_output: - net_output_conf = each_output_feature_map[:, :, :, :, 4] - net_output_cls_mult_conf = net_output_conf * each_output_feature_map[:, :, :, :, 5 + each_class_index] - # feature_map_reshape: [bs, 3, h, w] - feature_map_reshape = F.interpolate(net_output_cls_mult_conf, - net_input_shape, - mode='bilinear', - align_corners=False) - feature_map_list.append(feature_map_reshape) - - # len(net_output) = 3 - # feature_map_concate: [bs, 9, h, w] - feature_map_concate = torch.cat(feature_map_list, 1) - # scores: [bs, 1] for each class - scores = self.calc_unc_val(feature_map_concate) - scores = scores.cpu().detach().numpy() - scores_list.append(scores) - - # total_scores: [bs, num_classes] - total_scores = np.stack(scores_list, axis=1) - total_scores = total_scores * self.class_distribution_scores - total_scores = np.sum(total_scores, axis=1) - - return total_scores - - -def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): - # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - device = torch.device('cuda', gpu) - ymir_yolov5.to(device) - - load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu - gpu_count: int = ymir_yolov5.gpu_count - cpu_count: int = os.cpu_count() or 1 - num_workers_per_gpu = min([ - cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, - ymir_yolov5.num_workers_per_gpu - ]) - - with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: - images = [line.strip() for line in f.readlines()] - - max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu - - # origin dataset - if RANK != -1: - images_rank = images[RANK::WORLD_SIZE] - else: - images_rank = images - origin_dataset = YmirDataset(images_rank, load_fn=load_fn) - origin_dataset_loader = td.DataLoader(origin_dataset, - batch_size=batch_size_per_gpu, - shuffle=False, - sampler=None, - num_workers=num_workers_per_gpu, - pin_memory=ymir_yolov5.pin_memory, - drop_last=False) - - mining_results = dict() - dataset_size = len(images_rank) - pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader - miner = ALDD(ymir_cfg) - for idx, batch in enumerate(pbar): - # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1 and idx < max_barrier_times: - dist.barrier() - - with torch.no_grad(): - featuremap_output = ymir_yolov5.model.model(batch['image'].float().to(device))[1] - unc_scores = miner.compute_aldd_score(featuremap_output, ymir_yolov5.img_size) - - for each_imgname, each_score in zip(batch["image_file"], unc_scores): - mining_results[each_imgname] = each_score - - if RANK in [-1, 0]: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - - torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') - - -def main() -> int: - ymir_cfg = get_merged_config() - # note select_device(gpu_id) will set os.environ['CUDA_VISIBLE_DEVICES'] to gpu_id - ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') - - if LOCAL_RANK != -1: - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - run(ymir_cfg, ymir_yolov5) - - # wait all process to save the mining result - if LOCAL_RANK != -1: - dist.barrier() - - if RANK in [0, -1]: - results = [] - for rank in range(WORLD_SIZE): - results.append(torch.load(f'/out/mining_results_{rank}.pt')) - - ymir_mining_result = [] - for result in results: - for img_file, score in result.items(): - ymir_mining_result.append((img_file, score)) - rw.write_mining_result(mining_result=ymir_mining_result) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining_cald.py b/det-yolov5-tmi/mining/ymir_mining_cald.py deleted file mode 100644 index b4c6147..0000000 --- a/det-yolov5-tmi/mining/ymir_mining_cald.py +++ /dev/null @@ -1,190 +0,0 @@ -"""use fake DDP to infer -1. split data with `images_rank = images[RANK::WORLD_SIZE]` -2. infer on the origin dataset -3. infer on the augmentation dataset -4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` -5. merge mining result -""" -import os -import sys -from functools import partial - -import numpy as np -import torch -import torch.distributed as dist -import torch.utils.data as td -from easydict import EasyDict as edict -from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, - update_consistency) -from tqdm import tqdm -from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): - # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - device = torch.device('cuda', gpu) - ymir_yolov5.to(device) - - load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu - gpu_count: int = ymir_yolov5.gpu_count - cpu_count: int = os.cpu_count() or 1 - num_workers_per_gpu = min([ - cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, - ymir_yolov5.num_workers_per_gpu - ]) - - with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: - images = [line.strip() for line in f.readlines()] - - max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu - # origin dataset - images_rank = images[RANK::WORLD_SIZE] - origin_dataset = YmirDataset(images_rank, load_fn=load_fn) - origin_dataset_loader = td.DataLoader(origin_dataset, - batch_size=batch_size_per_gpu, - shuffle=False, - sampler=None, - num_workers=num_workers_per_gpu, - pin_memory=ymir_yolov5.pin_memory, - drop_last=False) - - results = [] - mining_results = dict() - beta = 1.3 - dataset_size = len(images_rank) - pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader - for idx, batch in enumerate(pbar): - # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1 and idx < max_barrier_times: - dist.barrier() - - with torch.no_grad(): - pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=True) - - if RANK in [-1, 0]: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - preprocess_image_shape = batch['image'].shape[2:] - for inner_idx, det in enumerate(pred): # per image - result_per_image = [] - image_file = batch['image_file'][inner_idx] - if len(det): - origin_image_shape = (batch['origin_shape'][0][inner_idx], batch['origin_shape'][1][inner_idx]) - # Rescale boxes from img_size to img size - det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() - result_per_image.append(det) - else: - mining_results[image_file] = -beta - continue - - results_per_image = torch.cat(result_per_image, dim=0).data.cpu().numpy() - results.append(dict(image_file=image_file, origin_shape=origin_image_shape, results=results_per_image)) - - aug_load_fn = partial(load_image_file_with_ann, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - aug_dataset = YmirDataset(results, load_fn=aug_load_fn) - aug_dataset_loader = td.DataLoader(aug_dataset, - batch_size=batch_size_per_gpu, - shuffle=False, - sampler=None, - collate_fn=collate_fn_with_fake_ann, - num_workers=num_workers_per_gpu, - pin_memory=ymir_yolov5.pin_memory, - drop_last=False) - - # cannot sync here!!! - dataset_size = len(results) - monitor_gap = max(1, dataset_size // 1000 // batch_size_per_gpu) - pbar = tqdm(aug_dataset_loader) if RANK == 0 else aug_dataset_loader - for idx, batch in enumerate(pbar): - if idx % monitor_gap == 0 and RANK in [-1, 0]: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - - batch_consistency = [0.0 for _ in range(len(batch['image_file']))] - aug_keys = ['flip', 'cutout', 'rotate', 'resize'] - - pred_result = dict() - for key in aug_keys: - with torch.no_grad(): - pred_result[key] = ymir_yolov5.forward(batch[f'image_{key}'].float().to(device), nms=True) - - for inner_idx in range(len(batch['image_file'])): - for key in aug_keys: - preprocess_image_shape = batch[f'image_{key}'].shape[2:] - result_per_image = [] - det = pred_result[key][inner_idx] - if len(det) == 0: - # no result for the image with augmentation f'{key}' - batch_consistency[inner_idx] += beta - continue - - # prediction result from origin image - fake_ann = batch['results_list'][inner_idx] - # bboxes = fake_ann[:, :4].data.cpu().numpy().astype(np.int32) - conf = fake_ann[:, 4] - - # augmentated bbox from bboxes, aug_conf = conf - aug_bboxes_key = batch[f'bboxes_{key}_list'][inner_idx].astype(np.int32) - - origin_image_shape = (batch[f'origin_shape_{key}'][0][inner_idx], - batch[f'origin_shape_{key}'][1][inner_idx]) - - # Rescale boxes from img_size to img size - det[:, :4] = scale_coords(preprocess_image_shape, det[:, :4], origin_image_shape).round() - result_per_image.append(det) - - pred_bboxes_key = det[:, :4].data.cpu().numpy().astype(np.int32) - pred_conf_key = det[:, 4].data.cpu().numpy() - batch_consistency[inner_idx] = update_consistency(consistency=batch_consistency[inner_idx], - consistency_per_aug=2.0, - beta=beta, - pred_bboxes_key=pred_bboxes_key, - pred_conf_key=pred_conf_key, - aug_bboxes_key=aug_bboxes_key, - aug_conf=conf) - - for inner_idx in range(len(batch['image_file'])): - batch_consistency[inner_idx] /= len(aug_keys) - image_file = batch['image_file'][inner_idx] - mining_results[image_file] = batch_consistency[inner_idx] - - torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') - - -def main() -> int: - ymir_cfg = get_merged_config() - ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') - - if LOCAL_RANK != -1: - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - run(ymir_cfg, ymir_yolov5) - - # wait all process to save the mining result - if LOCAL_RANK != -1: - dist.barrier() - - if RANK in [0, -1]: - results = [] - for rank in range(WORLD_SIZE): - results.append(torch.load(f'/out/mining_results_{rank}.pt')) - - ymir_mining_result = [] - for result in results: - for img_file, score in result.items(): - ymir_mining_result.append((img_file, score)) - rw.write_mining_result(mining_result=ymir_mining_result) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining_entropy.py b/det-yolov5-tmi/mining/ymir_mining_entropy.py deleted file mode 100644 index df5a1ff..0000000 --- a/det-yolov5-tmi/mining/ymir_mining_entropy.py +++ /dev/null @@ -1,112 +0,0 @@ -"""use fake DDP to infer -1. split data with `images_rank = images[RANK::WORLD_SIZE]` -2. infer on the origin dataset -3. infer on the augmentation dataset -4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` -5. merge mining result -""" -import os -import sys -from functools import partial - -import numpy as np -import torch -import torch.distributed as dist -import torch.utils.data as td -from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file -from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): - # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - device = torch.device('cuda', gpu) - ymir_yolov5.to(device) - - load_fn = partial(load_image_file, img_size=ymir_yolov5.img_size, stride=ymir_yolov5.stride) - batch_size_per_gpu: int = ymir_yolov5.batch_size_per_gpu - gpu_count: int = ymir_yolov5.gpu_count - cpu_count: int = os.cpu_count() or 1 - num_workers_per_gpu = min([ - cpu_count // max(gpu_count, 1), batch_size_per_gpu if batch_size_per_gpu > 1 else 0, - ymir_yolov5.num_workers_per_gpu - ]) - - with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: - images = [line.strip() for line in f.readlines()] - - max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu - # origin dataset - images_rank = images[RANK::WORLD_SIZE] - origin_dataset = YmirDataset(images_rank, load_fn=load_fn) - origin_dataset_loader = td.DataLoader(origin_dataset, - batch_size=batch_size_per_gpu, - shuffle=False, - sampler=None, - num_workers=num_workers_per_gpu, - pin_memory=ymir_yolov5.pin_memory, - drop_last=False) - - mining_results = dict() - dataset_size = len(images_rank) - pbar = tqdm(origin_dataset_loader) if RANK == 0 else origin_dataset_loader - for idx, batch in enumerate(pbar): - # batch-level sync, avoid 30min time-out error - if LOCAL_RANK != -1 and idx < max_barrier_times: - dist.barrier() - - with torch.no_grad(): - pred = ymir_yolov5.forward(batch['image'].float().to(device), nms=False) - - if RANK in [-1, 0]: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx * batch_size_per_gpu / dataset_size) - for inner_idx, det in enumerate(pred): # per image - image_file = batch['image_file'][inner_idx] - if len(det): - conf = det[:, 4].data.cpu().numpy() - mining_results[image_file] = -np.sum(conf * np.log2(conf)) - else: - mining_results[image_file] = -10 - continue - - torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') - - -def main() -> int: - ymir_cfg = get_merged_config() - ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') - - if LOCAL_RANK != -1: - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - run(ymir_cfg, ymir_yolov5) - - # wait all process to save the mining result - if WORLD_SIZE > 1: - dist.barrier() - - if RANK in [0, -1]: - results = [] - for rank in range(WORLD_SIZE): - results.append(torch.load(f'/out/mining_results_{rank}.pt')) - - ymir_mining_result = [] - for result in results: - for img_file, score in result.items(): - ymir_mining_result.append((img_file, score)) - rw.write_mining_result(mining_result=ymir_mining_result) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/det-yolov5-tmi/mining/ymir_mining_random.py b/det-yolov5-tmi/mining/ymir_mining_random.py deleted file mode 100644 index 30fb099..0000000 --- a/det-yolov5-tmi/mining/ymir_mining_random.py +++ /dev/null @@ -1,75 +0,0 @@ -"""use fake DDP to infer -1. split data with `images_rank = images[RANK::WORLD_SIZE]` -2. infer on the origin dataset -3. infer on the augmentation dataset -4. save splited mining result with `torch.save(results, f'/out/mining_results_{RANK}.pt')` -5. merge mining result -""" -import os -import random -import sys - -import torch -import torch.distributed as dist -from easydict import EasyDict as edict -from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_merged_config - -LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html -RANK = int(os.getenv('RANK', -1)) -WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) - - -def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): - # eg: gpu_id = 1,3,5,7 for LOCAL_RANK = 2, will use gpu 5. - gpu = LOCAL_RANK if LOCAL_RANK >= 0 else 0 - device = torch.device('cuda', gpu) - ymir_yolov5.to(device) - - with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: - images = [line.strip() for line in f.readlines()] - - images_rank = images[RANK::WORLD_SIZE] - mining_results = dict() - dataset_size = len(images_rank) - pbar = tqdm(images_rank) if RANK == 0 else images_rank - for idx, image in enumerate(pbar): - if RANK in [-1, 0]: - ymir_yolov5.write_monitor_logger(stage=YmirStage.TASK, p=idx / dataset_size) - mining_results[image] = random.random() - - torch.save(mining_results, f'/out/mining_results_{max(0,RANK)}.pt') - - -def main() -> int: - ymir_cfg = get_merged_config() - ymir_yolov5 = YmirYolov5(ymir_cfg, task='mining') - - if LOCAL_RANK != -1: - assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' - torch.cuda.set_device(LOCAL_RANK) - dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") - - run(ymir_cfg, ymir_yolov5) - - # wait all process to save the mining result - if WORLD_SIZE > 1: - dist.barrier() - - if RANK in [0, -1]: - results = [] - for rank in range(WORLD_SIZE): - results.append(torch.load(f'/out/mining_results_{rank}.pt')) - - ymir_mining_result = [] - for result in results: - for img_file, score in result.items(): - ymir_mining_result.append((img_file, score)) - rw.write_mining_result(mining_result=ymir_mining_result) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/det-yolov5-tmi/start.py b/det-yolov5-tmi/start.py deleted file mode 100644 index 6c82844..0000000 --- a/det-yolov5-tmi/start.py +++ /dev/null @@ -1,191 +0,0 @@ -import logging -import os -import subprocess -import sys - -import cv2 -from easydict import EasyDict as edict -from models.experimental import attempt_download -from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file -from ymir_exc import dataset_reader as dr -from ymir_exc import env, monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, find_free_port, get_bool, get_merged_config, get_ymir_process - - -def start(cfg: edict) -> int: - logging.info(f'merged config: {cfg}') - - if cfg.ymir.run_training: - _run_training(cfg) - else: - if cfg.ymir.run_mining and cfg.ymir.run_infer: - # multiple task, run mining first, infer later - mining_task_idx = 0 - infer_task_idx = 1 - task_num = 2 - else: - mining_task_idx = 0 - infer_task_idx = 0 - task_num = 1 - - if cfg.ymir.run_mining: - _run_mining(cfg, mining_task_idx, task_num) - if cfg.ymir.run_infer: - _run_infer(cfg, infer_task_idx, task_num) - - return 0 - - -def _run_training(cfg: edict) -> None: - """ - function for training task - 1. convert dataset - 2. training model - 3. save model weight/hyperparameter/... to design directory - """ - # 1. convert dataset - out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg) - logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0)) - - # 2. training model - epochs: int = int(cfg.param.epochs) - batch_size_per_gpu: int = int(cfg.param.batch_size_per_gpu) - num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) - model: str = cfg.param.model - img_size: int = int(cfg.param.img_size) - save_period: int = int(cfg.param.save_period) - save_best_only: bool = get_bool(cfg, key='save_best_only', default_value=True) - args_options: str = cfg.param.args_options - gpu_id: str = str(cfg.param.get('gpu_id', '0')) - gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 - batch_size: int = batch_size_per_gpu * max(1, gpu_count) - port: int = find_free_port() - sync_bn: bool = get_bool(cfg, key='sync_bn', default_value=False) - - weights = get_weight_file(cfg) - if not weights: - # download pretrained weight - weights = attempt_download(f'{model}.pt') - - models_dir = cfg.ymir.output.models_dir - project = os.path.dirname(models_dir) - name = os.path.basename(models_dir) - assert os.path.join(project, name) == models_dir - - commands = ['python3'] - device = gpu_id or 'cpu' - if gpu_count > 1: - commands.extend(f'-m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port}'.split()) - - commands.extend([ - 'train.py', '--epochs', - str(epochs), '--batch-size', - str(batch_size), '--data', f'{out_dir}/data.yaml', '--project', project, '--cfg', f'models/{model}.yaml', - '--name', name, '--weights', weights, '--img-size', - str(img_size), '--save-period', - str(save_period), '--device', device, - '--workers', str(num_workers_per_gpu) - ]) - - if save_best_only: - commands.append("--nosave") - - if gpu_count > 1 and sync_bn: - commands.append("--sync-bn") - - if args_options: - commands.extend(args_options.split()) - - logging.info(f'start training: {commands}') - - subprocess.run(commands, check=True) - monitor.write_monitor_logger(percent=get_ymir_process(stage=YmirStage.TASK, p=1.0)) - - # if task done, write 100% percent log - monitor.write_monitor_logger(percent=1.0) - - -def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: - # generate data.yaml for mining - out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg) - logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) - gpu_id: str = str(cfg.param.get('gpu_id', '0')) - gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 - - mining_algorithm = cfg.param.get('mining_algorithm', 'aldd') - support_mining_algorithms = ['aldd', 'cald', 'random', 'entropy'] - if mining_algorithm not in support_mining_algorithms: - raise Exception(f'unknown mining algorithm {mining_algorithm}, not in {support_mining_algorithms}') - - if gpu_count <= 1: - command = f'python3 mining/ymir_mining_{mining_algorithm}.py' - else: - port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining_{mining_algorithm}.py' # noqa - - logging.info(f'mining: {command}') - subprocess.run(command.split(), check=True) - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) - - -def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: - # generate data.yaml for infer - out_dir = cfg.ymir.output.root_dir - convert_ymir_to_yolov5(cfg) - logging.info(f'generate {out_dir}/data.yaml') - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.PREPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) - - gpu_id: str = str(cfg.param.get('gpu_id', '0')) - gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 - - if gpu_count <= 1: - N = dr.items_count(env.DatasetType.CANDIDATE) - infer_result = dict() - model = YmirYolov5(cfg) - idx = -1 - - monitor_gap = max(1, N // 100) - for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): - img = cv2.imread(asset_path) - result = model.infer(img) - infer_result[asset_path] = result - idx += 1 - - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) - monitor.write_monitor_logger(percent=percent) - - rw.write_infer_result(infer_result=infer_result) - else: - port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_infer.py' # noqa - - logging.info(f'infer: {command}') - subprocess.run(command.split(), check=True) - - monitor.write_monitor_logger( - percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) - - -if __name__ == '__main__': - logging.basicConfig(stream=sys.stdout, - format='%(levelname)-8s: [%(asctime)s] %(message)s', - datefmt='%Y%m%d-%H:%M:%S', - level=logging.INFO) - - cfg = get_merged_config() - os.environ.setdefault('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'python') - - # activation: relu - activation: str = cfg.param.get('activation', '') - if activation: - os.environ.setdefault('ACTIVATION', activation) - sys.exit(start(cfg)) diff --git a/det-yolov5-tmi/training-template.yaml b/det-yolov5-tmi/training-template.yaml deleted file mode 100644 index 1cc4752..0000000 --- a/det-yolov5-tmi/training-template.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# training template for your executor app -# after build image, it should at /img-man/training-template.yaml -# key: gpu_id, task_id, pretrained_model_params, class_names should be preserved - -# gpu_id: '0' -# task_id: 'default-training-task' -# pretrained_model_params: [] -# class_names: [] - -shm_size: '128G' -export_format: 'ark:raw' -model: 'yolov5s' -batch_size_per_gpu: 16 -num_workers_per_gpu: 4 -epochs: 100 -img_size: 640 -opset: 11 -args_options: '--exist-ok' -save_best_only: True # save the best weight file only -save_period: 10 -sync_bn: False # work for multi-gpu only -ymir_saved_file_patterns: '' # custom saved files, support python regular expression, use , to split multiple pattern diff --git a/det-yolov5-tmi/utils/ymir_yolov5.py b/det-yolov5-tmi/utils/ymir_yolov5.py deleted file mode 100644 index c463ded..0000000 --- a/det-yolov5-tmi/utils/ymir_yolov5.py +++ /dev/null @@ -1,187 +0,0 @@ -""" -utils function for ymir and yolov5 -""" -import os.path as osp -import shutil -from typing import Any, List - -import numpy as np -import torch -import yaml -from easydict import EasyDict as edict -from models.common import DetectMultiBackend -from nptyping import NDArray, Shape, UInt8 -from utils.augmentations import letterbox -from utils.general import check_img_size, non_max_suppression, scale_coords -from utils.torch_utils import select_device -from ymir_exc import monitor -from ymir_exc import result_writer as rw -from ymir_exc.util import YmirStage, get_bool, get_weight_files, get_ymir_process - -BBOX = NDArray[Shape['*,4'], Any] -CV_IMAGE = NDArray[Shape['*,*,3'], UInt8] - - -def get_weight_file(cfg: edict) -> str: - """ - return the weight file path by priority - find weight file in cfg.param.model_params_path or cfg.param.model_params_path - """ - weight_files = get_weight_files(cfg, suffix=('.pt')) - # choose weight file by priority, best.pt > xxx.pt - for p in weight_files: - if p.endswith('best.pt'): - return p - - if len(weight_files) > 0: - return max(weight_files, key=osp.getctime) - - return "" - - -class YmirYolov5(torch.nn.Module): - """ - used for mining and inference to init detector and predict. - """ - def __init__(self, cfg: edict, task='infer'): - super().__init__() - self.cfg = cfg - if cfg.ymir.run_mining and cfg.ymir.run_infer: - # multiple task, run mining first, infer later - if task == 'infer': - self.task_idx = 1 - elif task == 'mining': - self.task_idx = 0 - else: - raise Exception(f'unknown task {task}') - - self.task_num = 2 - else: - self.task_idx = 0 - self.task_num = 1 - - self.gpu_id: str = str(cfg.param.get('gpu_id', '0')) - device = select_device(self.gpu_id) # will set CUDA_VISIBLE_DEVICES=self.gpu_id - self.gpu_count: int = len(self.gpu_id.split(',')) if self.gpu_id else 0 - self.batch_size_per_gpu: int = int(cfg.param.get('batch_size_per_gpu', 4)) - self.num_workers_per_gpu: int = int(cfg.param.get('num_workers_per_gpu', 4)) - self.pin_memory: bool = get_bool(cfg, 'pin_memory', False) - self.batch_size: int = self.batch_size_per_gpu * self.gpu_count - self.model = self.init_detector(device) - self.model.eval() - self.device = device - self.class_names: List[str] = cfg.param.class_names - self.stride = self.model.stride - self.conf_thres: float = float(cfg.param.conf_thres) - self.iou_thres: float = float(cfg.param.iou_thres) - - img_size = int(cfg.param.img_size) - imgsz = [img_size, img_size] - imgsz = check_img_size(imgsz, s=self.stride) - - self.model.warmup(imgsz=(1, 3, *imgsz), half=False) # warmup - self.img_size: List[int] = imgsz - - def extract_feats(self, x): - """ - return the feature maps before sigmoid for mining - """ - return self.model.model(x)[1] - - def forward(self, x, nms=False): - pred = self.model(x) - if not nms: - return pred - - pred = non_max_suppression(pred, - conf_thres=self.conf_thres, - iou_thres=self.iou_thres, - classes=None, # not filter class_idx - agnostic=False, - max_det=100) - return pred - - def init_detector(self, device: torch.device) -> DetectMultiBackend: - weights = get_weight_file(self.cfg) - - if not weights: - raise Exception("no weights file specified!") - - data_yaml = osp.join(self.cfg.ymir.output.root_dir, 'data.yaml') - model = DetectMultiBackend( - weights=weights, - device=device, - dnn=False, # not use opencv dnn for onnx inference - data=data_yaml) # dataset.yaml path - - return model - - def predict(self, img: CV_IMAGE) -> NDArray: - """ - predict single image and return bbox information - img: opencv BGR, uint8 format - """ - # preprocess: padded resize - img1 = letterbox(img, self.img_size, stride=self.stride, auto=True)[0] - - # preprocess: convert data format - img1 = img1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB - img1 = np.ascontiguousarray(img1) - img1 = torch.from_numpy(img1).to(self.device) - - img1 = img1 / 255 # 0 - 255 to 0.0 - 1.0 - img1.unsqueeze_(dim=0) # expand for batch dim - pred = self.forward(img1, nms=True) - - result = [] - for det in pred: - if len(det): - # Rescale boxes from img_size to img size - det[:, :4] = scale_coords(img1.shape[2:], det[:, :4], img.shape).round() - result.append(det) - - # xyxy, conf, cls - if len(result) > 0: - tensor_result = torch.cat(result, dim=0) - numpy_result = tensor_result.data.cpu().numpy() - else: - numpy_result = np.zeros(shape=(0, 6), dtype=np.float32) - - return numpy_result - - def infer(self, img: CV_IMAGE) -> List[rw.Annotation]: - anns = [] - result = self.predict(img) - - for i in range(result.shape[0]): - xmin, ymin, xmax, ymax, conf, cls = result[i, :6].tolist() - ann = rw.Annotation(class_name=self.class_names[int(cls)], - score=conf, - box=rw.Box(x=int(xmin), y=int(ymin), w=int(xmax - xmin), h=int(ymax - ymin))) - - anns.append(ann) - - return anns - - def write_monitor_logger(self, stage: YmirStage, p: float): - monitor.write_monitor_logger( - percent=get_ymir_process(stage=stage, p=p, task_idx=self.task_idx, task_num=self.task_num)) - - -def convert_ymir_to_yolov5(cfg: edict, out_dir: str = None): - """ - convert ymir format dataset to yolov5 format - generate data.yaml for training/mining/infer - """ - - out_dir = out_dir or cfg.ymir.output.root_dir - data = dict(path=out_dir, nc=len(cfg.param.class_names), names=cfg.param.class_names) - for split, prefix in zip(['train', 'val', 'test'], ['training', 'val', 'candidate']): - src_file = getattr(cfg.ymir.input, f'{prefix}_index_file') - if osp.exists(src_file): - shutil.copy(src_file, f'{out_dir}/{split}.tsv') - - data[split] = f'{split}.tsv' - - with open(osp.join(out_dir, 'data.yaml'), 'w') as fw: - fw.write(yaml.safe_dump(data)) From 8980e0a9586cd407511ecf31d24c906807b2ba79 Mon Sep 17 00:00:00 2001 From: youdaoyzbx Date: Thu, 20 Oct 2022 11:40:17 +0800 Subject: [PATCH 149/150] merge single-gpu and mutiple gpu infer --- det-yolov5-tmi/mypy.ini | 1 - det-yolov5-tmi/ymir/README.md | 4 +-- det-yolov5-tmi/ymir/mining/data_augment.py | 3 +- det-yolov5-tmi/ymir/mining/util.py | 4 +-- det-yolov5-tmi/ymir/mining/ymir_infer.py | 11 ++++--- .../ymir/mining/ymir_mining_aldd.py | 4 +-- .../ymir/mining/ymir_mining_cald.py | 11 ++++--- .../ymir/mining/ymir_mining_entropy.py | 9 ++++-- .../ymir/mining/ymir_mining_random.py | 7 +++-- det-yolov5-tmi/ymir/start.py | 30 +++++-------------- 10 files changed, 39 insertions(+), 45 deletions(-) diff --git a/det-yolov5-tmi/mypy.ini b/det-yolov5-tmi/mypy.ini index bb96738..6a356a3 100644 --- a/det-yolov5-tmi/mypy.ini +++ b/det-yolov5-tmi/mypy.ini @@ -2,7 +2,6 @@ ignore_missing_imports = True disallow_untyped_defs = False exclude = [utils/general.py, models/*.py, utils/*.py] -files = mining/*.py, utils/ymir_yolov5.py, start.py, train.py [mypy-torch.*] ignore_errors = True diff --git a/det-yolov5-tmi/ymir/README.md b/det-yolov5-tmi/ymir/README.md index 6bf9151..1936a93 100644 --- a/det-yolov5-tmi/ymir/README.md +++ b/det-yolov5-tmi/ymir/README.md @@ -9,9 +9,9 @@ docker build -t ymir/ymir-executor:ymir1.1.0-cuda111-yolov5-tmi --build-arg SERV ## main change log -- add `start.py` and `utils/ymir_yolov5.py` for train/infer/mining +- add `start.py` and `ymir/ymir_yolov5.py` for train/infer/mining -- add `utils/ymir_yolov5.py` for useful functions +- add `ymir/ymir_yolov5.py` for useful functions - `get_merged_config()` add ymir path config `cfg.yaml` and hyper-parameter `cfg.param` diff --git a/det-yolov5-tmi/ymir/mining/data_augment.py b/det-yolov5-tmi/ymir/mining/data_augment.py index cfafaa7..d88a86d 100644 --- a/det-yolov5-tmi/ymir/mining/data_augment.py +++ b/det-yolov5-tmi/ymir/mining/data_augment.py @@ -8,8 +8,7 @@ import cv2 import numpy as np from nptyping import NDArray - -from utils.ymir_yolov5 import BBOX, CV_IMAGE +from ymir.ymir_yolov5 import BBOX, CV_IMAGE def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: diff --git a/det-yolov5-tmi/ymir/mining/util.py b/det-yolov5-tmi/ymir/mining/util.py index c69343c..0e9e3f5 100644 --- a/det-yolov5-tmi/ymir/mining/util.py +++ b/det-yolov5-tmi/ymir/mining/util.py @@ -19,12 +19,12 @@ import cv2 import numpy as np import torch.utils.data as td -from mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate from nptyping import NDArray from scipy.stats import entropy from torch.utils.data._utils.collate import default_collate from utils.augmentations import letterbox -from utils.ymir_yolov5 import BBOX +from ymir.mining.data_augment import cutout, horizontal_flip, intersect, resize, rotate +from ymir.ymir_yolov5 import BBOX LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) diff --git a/det-yolov5-tmi/ymir/mining/ymir_infer.py b/det-yolov5-tmi/ymir/mining/ymir_infer.py index ad1e0d2..bd1c237 100644 --- a/det-yolov5-tmi/ymir/mining/ymir_infer.py +++ b/det-yolov5-tmi/ymir/mining/ymir_infer.py @@ -12,10 +12,10 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file from tqdm import tqdm from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 +from ymir.mining.util import YmirDataset, load_image_file +from ymir.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -44,7 +44,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): max_barrier_times = len(images) // max(1, WORLD_SIZE) // batch_size_per_gpu # origin dataset - images_rank = images[RANK::WORLD_SIZE] + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images origin_dataset = YmirDataset(images_rank, load_fn=load_fn) origin_dataset_loader = td.DataLoader(origin_dataset, batch_size=batch_size_per_gpu, @@ -80,7 +83,7 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): result_per_image.append(det) results.append(dict(image_file=image_file, result=result_per_image)) - torch.save(results, f'/out/infer_results_{RANK}.pt') + torch.save(results, f'/out/infer_results_{max(0,RANK)}.pt') def main() -> int: diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py b/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py index 8d6a27c..0a90e3f 100644 --- a/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_aldd.py @@ -17,9 +17,9 @@ import torch.nn.functional as F import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 +from ymir.mining.util import YmirDataset, load_image_file +from ymir.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py b/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py index b4c6147..4a07d32 100644 --- a/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_cald.py @@ -14,11 +14,11 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, - update_consistency) from tqdm import tqdm from utils.general import scale_coords -from utils.ymir_yolov5 import YmirYolov5 +from ymir.mining.util import (YmirDataset, collate_fn_with_fake_ann, load_image_file, load_image_file_with_ann, + update_consistency) +from ymir.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -47,7 +47,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu # origin dataset - images_rank = images[RANK::WORLD_SIZE] + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images origin_dataset = YmirDataset(images_rank, load_fn=load_fn) origin_dataset_loader = td.DataLoader(origin_dataset, batch_size=batch_size_per_gpu, diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py b/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py index df5a1ff..86136e1 100644 --- a/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_entropy.py @@ -14,9 +14,9 @@ import torch.distributed as dist import torch.utils.data as td from easydict import EasyDict as edict -from mining.util import YmirDataset, load_image_file from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 +from ymir.mining.util import YmirDataset, load_image_file +from ymir.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -45,7 +45,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): max_barrier_times = (len(images) // max(1, WORLD_SIZE)) // batch_size_per_gpu # origin dataset - images_rank = images[RANK::WORLD_SIZE] + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images origin_dataset = YmirDataset(images_rank, load_fn=load_fn) origin_dataset_loader = td.DataLoader(origin_dataset, batch_size=batch_size_per_gpu, diff --git a/det-yolov5-tmi/ymir/mining/ymir_mining_random.py b/det-yolov5-tmi/ymir/mining/ymir_mining_random.py index 30fb099..eeb08cf 100644 --- a/det-yolov5-tmi/ymir/mining/ymir_mining_random.py +++ b/det-yolov5-tmi/ymir/mining/ymir_mining_random.py @@ -13,7 +13,7 @@ import torch.distributed as dist from easydict import EasyDict as edict from tqdm import tqdm -from utils.ymir_yolov5 import YmirYolov5 +from ymir.ymir_yolov5 import YmirYolov5 from ymir_exc import result_writer as rw from ymir_exc.util import YmirStage, get_merged_config @@ -31,7 +31,10 @@ def run(ymir_cfg: edict, ymir_yolov5: YmirYolov5): with open(ymir_cfg.ymir.input.candidate_index_file, 'r') as f: images = [line.strip() for line in f.readlines()] - images_rank = images[RANK::WORLD_SIZE] + if RANK != -1: + images_rank = images[RANK::WORLD_SIZE] + else: + images_rank = images mining_results = dict() dataset_size = len(images_rank) pbar = tqdm(images_rank) if RANK == 0 else images_rank diff --git a/det-yolov5-tmi/ymir/start.py b/det-yolov5-tmi/ymir/start.py index 6c82844..11eece0 100644 --- a/det-yolov5-tmi/ymir/start.py +++ b/det-yolov5-tmi/ymir/start.py @@ -6,7 +6,7 @@ import cv2 from easydict import EasyDict as edict from models.experimental import attempt_download -from utils.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file +from ymir.ymir_yolov5 import YmirYolov5, convert_ymir_to_yolov5, get_weight_file from ymir_exc import dataset_reader as dr from ymir_exc import env, monitor from ymir_exc import result_writer as rw @@ -124,10 +124,10 @@ def _run_mining(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: raise Exception(f'unknown mining algorithm {mining_algorithm}, not in {support_mining_algorithms}') if gpu_count <= 1: - command = f'python3 mining/ymir_mining_{mining_algorithm}.py' + command = f'python3 ymir/mining/ymir_mining_{mining_algorithm}.py' else: port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_mining_{mining_algorithm}.py' # noqa + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} ymir/mining/ymir_mining_{mining_algorithm}.py' # noqa logging.info(f'mining: {command}') subprocess.run(command.split(), check=True) @@ -147,29 +147,13 @@ def _run_infer(cfg: edict, task_idx: int = 0, task_num: int = 1) -> None: gpu_count: int = len(gpu_id.split(',')) if gpu_id else 0 if gpu_count <= 1: - N = dr.items_count(env.DatasetType.CANDIDATE) - infer_result = dict() - model = YmirYolov5(cfg) - idx = -1 - - monitor_gap = max(1, N // 100) - for asset_path, _ in dr.item_paths(dataset_type=env.DatasetType.CANDIDATE): - img = cv2.imread(asset_path) - result = model.infer(img) - infer_result[asset_path] = result - idx += 1 - - if idx % monitor_gap == 0: - percent = get_ymir_process(stage=YmirStage.TASK, p=idx / N, task_idx=task_idx, task_num=task_num) - monitor.write_monitor_logger(percent=percent) - - rw.write_infer_result(infer_result=infer_result) + command = 'python3 ymir/mining/ymir_infer.py' else: port = find_free_port() - command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} mining/ymir_infer.py' # noqa + command = f'python3 -m torch.distributed.launch --nproc_per_node {gpu_count} --master_port {port} ymir/mining/ymir_infer.py' # noqa - logging.info(f'infer: {command}') - subprocess.run(command.split(), check=True) + logging.info(f'infer: {command}') + subprocess.run(command.split(), check=True) monitor.write_monitor_logger( percent=get_ymir_process(stage=YmirStage.POSTPROCESS, p=1.0, task_idx=task_idx, task_num=task_num)) From f124b4ce0eba4b07357c942adeeaf0f715bb6c01 Mon Sep 17 00:00:00 2001 From: wxjf Date: Mon, 31 Oct 2022 15:09:25 +0800 Subject: [PATCH 150/150] add entropy --- det-mmdetection-tmi/mining/util.py | 0 det-mmdetection-tmi/mining/ymir_mining.py | 412 ++++++++++++++++++ .../mining/ymir_mining_aldd.py | 0 det-mmdetection-tmi/start.py | 2 +- det-mmdetection-tmi/ymir_mining_entropy.py | 150 +++++++ 5 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 det-mmdetection-tmi/mining/util.py create mode 100644 det-mmdetection-tmi/mining/ymir_mining.py create mode 100644 det-mmdetection-tmi/mining/ymir_mining_aldd.py create mode 100644 det-mmdetection-tmi/ymir_mining_entropy.py diff --git a/det-mmdetection-tmi/mining/util.py b/det-mmdetection-tmi/mining/util.py new file mode 100644 index 0000000..e69de29 diff --git a/det-mmdetection-tmi/mining/ymir_mining.py b/det-mmdetection-tmi/mining/ymir_mining.py new file mode 100644 index 0000000..506506d --- /dev/null +++ b/det-mmdetection-tmi/mining/ymir_mining.py @@ -0,0 +1,412 @@ +""" +data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout +official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py +""" +import os +import random +import sys +from typing import Any, Callable, Dict, List, Tuple + +import cv2 +import numpy as np +import torch +import torch.distributed as dist +from easydict import EasyDict as edict +from mmcv.runner import init_dist +from mmdet.apis.test import collect_results_gpu +from mmdet.utils.util_ymir import BBOX, CV_IMAGE +from nptyping import NDArray +from scipy.stats import entropy +from tqdm import tqdm +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process +from ymir_infer import YmirModel + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + +def intersect(boxes1: BBOX, boxes2: BBOX) -> NDArray: + ''' + Find intersection of every box combination between two sets of box + boxes1: bounding boxes 1, a tensor of dimensions (n1, 4) + boxes2: bounding boxes 2, a tensor of dimensions (n2, 4) + + Out: Intersection each of boxes1 with respect to each of boxes2, + a tensor of dimensions (n1, n2) + ''' + n1 = boxes1.shape[0] + n2 = boxes2.shape[0] + max_xy = np.minimum( + np.expand_dims(boxes1[:, 2:], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, 2:], axis=0).repeat(n1, axis=0)) + + min_xy = np.maximum( + np.expand_dims(boxes1[:, :2], axis=1).repeat(n2, axis=1), + np.expand_dims(boxes2[:, :2], axis=0).repeat(n1, axis=0)) + inter = np.clip(max_xy - min_xy, a_min=0, a_max=None) # (n1, n2, 2) + return inter[:, :, 0] * inter[:, :, 1] # (n1, n2) + + +def horizontal_flip(image: CV_IMAGE, bbox: BBOX) \ + -> Tuple[CV_IMAGE, BBOX]: + """ + image: opencv image, [height,width,channels] + bbox: numpy.ndarray, [N,4] --> [x1,y1,x2,y2] + """ + image = image.copy() + + width = image.shape[1] + # Flip image horizontally + image = image[:, ::-1, :] + if len(bbox) > 0: + bbox = bbox.copy() + # Flip bbox horizontally + bbox[:, [0, 2]] = width - bbox[:, [2, 0]] + return image, bbox + + +def cutout(image: CV_IMAGE, + bbox: BBOX, + cut_num: int = 2, + fill_val: int = 0, + bbox_remove_thres: float = 0.4, + bbox_min_thres: float = 0.1) -> Tuple[CV_IMAGE, BBOX]: + ''' + Cutout augmentation + image: A PIL image + boxes: bounding boxes, a tensor of dimensions (#objects, 4) + labels: labels of object, a tensor of dimensions (#objects) + fill_val: Value filled in cut out + bbox_remove_thres: Theshold to remove bbox cut by cutout + + Out: new image, new_boxes, new_labels + ''' + image = image.copy() + bbox = bbox.copy() + + if len(bbox) == 0: + return image, bbox + + original_h, original_w, original_channel = image.shape + count = 0 + for _ in range(50): + # Random cutout size: [0.15, 0.5] of original dimension + cutout_size_h = random.uniform(0.05 * original_h, 0.2 * original_h) + cutout_size_w = random.uniform(0.05 * original_w, 0.2 * original_w) + + # Random position for cutout + left = random.uniform(0, original_w - cutout_size_w) + right = left + cutout_size_w + top = random.uniform(0, original_h - cutout_size_h) + bottom = top + cutout_size_h + cutout = np.array([[float(left), float(top), float(right), float(bottom)]]) + + # Calculate intersect between cutout and bounding boxes + overlap_size = intersect(cutout, bbox) + area_boxes = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) + ratio = overlap_size / (area_boxes + 1e-14) + # If all boxes have Iou greater than bbox_remove_thres, try again + if ratio.max() > bbox_remove_thres or ratio.max() < bbox_min_thres: + continue + + image[int(top):int(bottom), int(left):int(right), :] = fill_val + count += 1 + if count >= cut_num: + break + return image, bbox + + +def rotate(image: CV_IMAGE, bbox: BBOX, rot: float = 5) -> Tuple[CV_IMAGE, BBOX]: + image = image.copy() + bbox = bbox.copy() + h, w, c = image.shape + center = np.array([w / 2.0, h / 2.0]) + s = max(h, w) * 1.0 + trans = get_affine_transform(center, s, rot, [w, h]) + if len(bbox) > 0: + for i in range(bbox.shape[0]): + x1, y1 = affine_transform(bbox[i, :2], trans) + x2, y2 = affine_transform(bbox[i, 2:], trans) + x3, y3 = affine_transform(bbox[i, [2, 1]], trans) + x4, y4 = affine_transform(bbox[i, [0, 3]], trans) + bbox[i, :2] = [min(x1, x2, x3, x4), min(y1, y2, y3, y4)] + bbox[i, 2:] = [max(x1, x2, x3, x4), max(y1, y2, y3, y4)] + image = cv2.warpAffine(image, trans, (w, h), flags=cv2.INTER_LINEAR) + return image, bbox + + +def get_3rd_point(a: NDArray, b: NDArray) -> NDArray: + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_dir(src_point: NDArray, rot_rad: float) -> List: + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + + return src_result + + +def transform_preds(coords: NDArray, center: NDArray, scale: Any, rot: float, output_size: List) -> NDArray: + trans = get_affine_transform(center, scale, rot, output_size, inv=True) + target_coords = affine_transform(coords, trans) + return target_coords + + +def get_affine_transform(center: NDArray, + scale: Any, + rot: float, + output_size: List, + shift: NDArray = np.array([0, 0], dtype=np.float32), + inv: bool = False) -> NDArray: + if not isinstance(scale, np.ndarray) and not isinstance(scale, list): + scale = np.array([scale, scale], dtype=np.float32) + + scale_tmp = scale + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = get_dir(np.array([0, src_w * -0.5], np.float32), rot_rad) + dst_dir = np.array([0, dst_w * -0.5], np.float32) + + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir + + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def affine_transform(pt: NDArray, t: NDArray) -> NDArray: + new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def resize(img: CV_IMAGE, boxes: BBOX, ratio: float = 0.8) -> Tuple[CV_IMAGE, BBOX]: + """ + ratio: <= 1.0 + """ + assert ratio <= 1.0, f'resize ratio {ratio} must <= 1.0' + + h, w, _ = img.shape + ow = int(w * ratio) + oh = int(h * ratio) + resize_img = cv2.resize(img, (ow, oh)) + new_img = np.zeros_like(img) + new_img[:oh, :ow] = resize_img + + if len(boxes) == 0: + return new_img, boxes + else: + return new_img, boxes * ratio + + +def get_ious(boxes1: BBOX, boxes2: BBOX) -> NDArray: + """ + args: + boxes1: np.array, (N, 4), xyxy + boxes2: np.array, (M, 4), xyxy + return: + iou: np.array, (N, M) + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) + iner_area = intersect(boxes1, boxes2) + area1 = area1.reshape(-1, 1).repeat(area2.shape[0], axis=1) + area2 = area2.reshape(1, -1).repeat(area1.shape[0], axis=0) + iou = iner_area / (area1 + area2 - iner_area + 1e-14) + return iou + + +def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: + if len(result) > 0: + bboxes = result[:, :4].astype(np.int32) + conf = result[:, 4] + class_id = result[:, 5] + else: + bboxes = np.zeros(shape=(0, 4), dtype=np.int32) + conf = np.zeros(shape=(0, 1), dtype=np.float32) + class_id = np.zeros(shape=(0, 1), dtype=np.int32) + + return bboxes, conf, class_id + + +class YmirMining(YmirModel): + def __init__(self, cfg: edict): + super().__init__(cfg) + if cfg.ymir.run_mining and cfg.ymir.run_infer: + mining_task_idx = 0 + # infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + # infer_task_idx = 0 + task_num = 1 + self.task_idx = mining_task_idx + self.task_num = task_num + + def mining(self): + with open(self.cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + if RANK == -1: + N = len(images) + tbar = tqdm(images) + else: + images_rank = images[RANK::WORLD_SIZE] + N = len(images_rank) + if RANK == 0: + tbar = tqdm(images_rank) + else: + tbar = images_rank + + monitor_gap = max(1, N // 100) + idx = -1 + beta = 1.3 + mining_result = [] + for asset_path in tbar: + # batch-level sync, avoid 30min time-out error + if LOCAL_RANK != -1: + dist.barrier() + + img = cv2.imread(asset_path) + # xyxy,conf,cls + result = self.predict(img) + bboxes, conf, _ = split_result(result) + if len(result) == 0: + # no result for the image without augmentation + mining_result.append((asset_path, -beta)) + continue + + consistency = 0.0 + aug_bboxes_dict, aug_results_dict = self.aug_predict(img, bboxes) + for key in aug_results_dict: + # no result for the image with augmentation f'{key}' + if len(aug_results_dict[key]) == 0: + consistency += beta + continue + + bboxes_key, conf_key, _ = split_result(aug_results_dict[key]) + cls_scores_aug = 1 - conf_key + cls_scores = 1 - conf + + consistency_per_aug = 2.0 + ious = get_ious(bboxes_key, aug_bboxes_dict[key]) + aug_idxs = np.argmax(ious, axis=0) + for origin_idx, aug_idx in enumerate(aug_idxs): + max_iou = ious[aug_idx, origin_idx] + if max_iou == 0: + consistency_per_aug = min(consistency_per_aug, beta) + p = cls_scores_aug[aug_idx] + q = cls_scores[origin_idx] + m = (p + q) / 2. + js = 0.5 * entropy([p, 1 - p], [m, 1 - m]) + 0.5 * entropy([q, 1 - q], [m, 1 - m]) + if js < 0: + js = 0 + consistency_box = max_iou + consistency_cls = 0.5 * \ + (conf[origin_idx] + conf_key[aug_idx]) * (1 - js) + consistency_per_inst = abs(consistency_box + consistency_cls - beta) + consistency_per_aug = min(consistency_per_aug, consistency_per_inst.item()) + + consistency += consistency_per_aug + + consistency /= len(aug_results_dict) + + mining_result.append((asset_path, consistency)) + idx += 1 + + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=self.task_idx, + task_num=self.task_num) + monitor.write_monitor_logger(percent=percent) + + if RANK != -1: + mining_result = collect_results_gpu(mining_result, len(images)) + + return mining_result + + def predict(self, img: CV_IMAGE) -> NDArray: + """ + predict single image and return bbox information + img: opencv BGR, uint8 format + """ + results = self.infer(img) + + xyxy_conf_idx_list = [] + for idx, result in enumerate(results): + for line in result: + if any(np.isinf(line)): + continue + x1, y1, x2, y2, score = line + xyxy_conf_idx_list.append([x1, y1, x2, y2, score, idx]) + + if len(xyxy_conf_idx_list) == 0: + return np.zeros(shape=(0, 6), dtype=np.float32) + else: + return np.array(xyxy_conf_idx_list, dtype=np.float32) + + def aug_predict(self, image: CV_IMAGE, bboxes: BBOX) -> Tuple[Dict[str, BBOX], Dict[str, NDArray]]: + """ + for different augmentation methods: flip, cutout, rotate and resize + augment the image and bbox and use model to predict them. + + return the predict result and augment bbox. + """ + aug_dict: Dict[str, Callable] = dict(flip=horizontal_flip, cutout=cutout, rotate=rotate, resize=resize) + + aug_bboxes = dict() + aug_results = dict() + for key in aug_dict: + aug_img, aug_bbox = aug_dict[key](image, bboxes) + + aug_result = self.predict(aug_img) + aug_bboxes[key] = aug_bbox + aug_results[key] = aug_result + + return aug_bboxes, aug_results + + +def main(): + if LOCAL_RANK != -1: + init_dist(launcher='pytorch', backend="nccl" if dist.is_nccl_available() else "gloo") + + cfg = get_merged_config() + miner = YmirMining(cfg) + gpu_id: str = str(cfg.param.get('gpu_id', '0')) + gpu = int(gpu_id.split(',')[LOCAL_RANK]) + device = torch.device('cuda', gpu) + miner.model.to(device) + mining_result = miner.mining() + + if RANK in [0, -1]: + rw.write_mining_result(mining_result=mining_result) + + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, p=1, task_idx=miner.task_idx, task_num=miner.task_num) + monitor.write_monitor_logger(percent=percent) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/det-mmdetection-tmi/mining/ymir_mining_aldd.py b/det-mmdetection-tmi/mining/ymir_mining_aldd.py new file mode 100644 index 0000000..e69de29 diff --git a/det-mmdetection-tmi/start.py b/det-mmdetection-tmi/start.py index 220d373..13402f2 100644 --- a/det-mmdetection-tmi/start.py +++ b/det-mmdetection-tmi/start.py @@ -39,7 +39,7 @@ def _run_mining(cfg: edict) -> None: gpu_count = len(gpu_id.split(',')) mining_algorithm: str = cfg.param.get('mining_algorithm', 'aldd') - supported_mining_algorithm = ['cald', 'aldd', 'random'] + supported_mining_algorithm = ['cald', 'aldd', 'random','entropy'] assert mining_algorithm in supported_mining_algorithm, f'unknown mining_algorithm {mining_algorithm}, not in {supported_mining_algorithm}' if gpu_count <= 1: command = f'python3 ymir_mining_{mining_algorithm}.py' diff --git a/det-mmdetection-tmi/ymir_mining_entropy.py b/det-mmdetection-tmi/ymir_mining_entropy.py new file mode 100644 index 0000000..02426b2 --- /dev/null +++ b/det-mmdetection-tmi/ymir_mining_entropy.py @@ -0,0 +1,150 @@ +""" +data augmentations for CALD method, including horizontal_flip, rotate(5'), cutout +official code: https://github.com/we1pingyu/CALD/blob/master/cald/cald_helper.py +""" +import os +import random +import sys +from typing import Any, Callable, Dict, List, Tuple + +import cv2 +import numpy as np +import torch +import torch.distributed as dist +from easydict import EasyDict as edict +from mmcv.runner import init_dist +from mmdet.apis.test import collect_results_gpu +from mmdet.utils.util_ymir import BBOX, CV_IMAGE +from nptyping import NDArray +from scipy.stats import entropy +from tqdm import tqdm +from ymir_exc import monitor +from ymir_exc import result_writer as rw +from ymir_exc.util import YmirStage, get_merged_config, get_ymir_process +from ymir_infer import YmirModel + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + + + +def split_result(result: NDArray) -> Tuple[BBOX, NDArray, NDArray]: + if len(result) > 0: + bboxes = result[:, :4].astype(np.int32) + conf = result[:, 4] + class_id = result[:, 5] + else: + bboxes = np.zeros(shape=(0, 4), dtype=np.int32) + conf = np.zeros(shape=(0, 1), dtype=np.float32) + class_id = np.zeros(shape=(0, 1), dtype=np.int32) + + return bboxes, conf, class_id + + +class YmirMining(YmirModel): + + def __init__(self, cfg: edict): + super().__init__(cfg) + if cfg.ymir.run_mining and cfg.ymir.run_infer: + mining_task_idx = 0 + # infer_task_idx = 1 + task_num = 2 + else: + mining_task_idx = 0 + # infer_task_idx = 0 + task_num = 1 + self.task_idx = mining_task_idx + self.task_num = task_num + + def mining(self): + with open(self.cfg.ymir.input.candidate_index_file, 'r') as f: + images = [line.strip() for line in f.readlines()] + + max_barrier_times = len(images) // WORLD_SIZE + if RANK == -1: + N = len(images) + tbar = tqdm(images) + else: + images_rank = images[RANK::WORLD_SIZE] + N = len(images_rank) + if RANK == 0: + tbar = tqdm(images_rank) + else: + tbar = images_rank + + monitor_gap = max(1, N // 100) + idx = -1 + + mining_result = [] + for idx, asset_path in enumerate(tbar): + if idx % monitor_gap == 0: + percent = get_ymir_process(stage=YmirStage.TASK, + p=idx / N, + task_idx=self.task_idx, + task_num=self.task_num) + monitor.write_monitor_logger(percent=percent) + # batch-level sync, avoid 30min time-out error + if WORLD_SIZE > 1 and idx < max_barrier_times: + dist.barrier() + + img = cv2.imread(asset_path) + # xyxy,conf,cls + result = self.predict(img) + bboxes, conf, _ = split_result(result) + if len(result) == 0: + # no result for the image without augmentation + mining_result.append((asset_path, -10)) + continue + conf = conf.data.cpu().numpy() + mining_result.append((asset_path, -np.sum(conf * np.log2(conf)))) + + if WORLD_SIZE > 1: + mining_result = collect_results_gpu(mining_result, len(images)) + + return mining_result + + def predict(self, img: CV_IMAGE) -> NDArray: + """ + predict single image and return bbox information + img: opencv BGR, uint8 format + """ + results = self.infer(img) + + xyxy_conf_idx_list = [] + for idx, result in enumerate(results): + for line in result: + if any(np.isinf(line)): + continue + x1, y1, x2, y2, score = line + xyxy_conf_idx_list.append([x1, y1, x2, y2, score, idx]) + + if len(xyxy_conf_idx_list) == 0: + return np.zeros(shape=(0, 6), dtype=np.float32) + else: + return np.array(xyxy_conf_idx_list, dtype=np.float32) + + + +def main(): + if LOCAL_RANK != -1: + init_dist(launcher='pytorch', backend="nccl" if dist.is_nccl_available() else "gloo") + + cfg = get_merged_config() + miner = YmirMining(cfg) + gpu = max(0, LOCAL_RANK) + device = torch.device('cuda', gpu) + miner.model.to(device) + mining_result = miner.mining() + + if RANK in [0, -1]: + rw.write_mining_result(mining_result=mining_result) + + percent = get_ymir_process(stage=YmirStage.POSTPROCESS, p=1, task_idx=miner.task_idx, task_num=miner.task_num) + monitor.write_monitor_logger(percent=percent) + + return 0 + + +if __name__ == "__main__": + sys.exit(main())