From 491597a5b04224eb00bfe9b5db2aff8ecc9edff4 Mon Sep 17 00:00:00 2001 From: liyiersan-server2 Date: Tue, 17 Feb 2026 21:29:24 +0800 Subject: [PATCH] update igevpp and igev_rt --- cfgs/igev_rt/igev_rt_sceneflow_amp.yaml | 4 +- cfgs/igev_rt/igev_rt_sceneflow_uniform.yaml | 84 +++++++++++++++++++ cfgs/igevpp/igevpp_sceneflow_amp.yaml | 4 +- cfgs/igevpp/igevpp_sceneflow_uniform.yaml | 92 +++++++++++++++++++++ docs/1.model_zoo.md | 4 +- stereo/modeling/models/igev_rt/trainer.py | 22 +++++ stereo/modeling/models/igevpp/trainer.py | 19 +++++ 7 files changed, 224 insertions(+), 5 deletions(-) create mode 100644 cfgs/igev_rt/igev_rt_sceneflow_uniform.yaml create mode 100644 cfgs/igevpp/igevpp_sceneflow_uniform.yaml diff --git a/cfgs/igev_rt/igev_rt_sceneflow_amp.yaml b/cfgs/igev_rt/igev_rt_sceneflow_amp.yaml index 9cf0c706..5dcde2c8 100644 --- a/cfgs/igev_rt/igev_rt_sceneflow_amp.yaml +++ b/cfgs/igev_rt/igev_rt_sceneflow_amp.yaml @@ -44,8 +44,8 @@ OPTIMIZATION: FREEZE_BN: true SYNC_BN: false AMP: true - BATCH_SIZE_PER_GPU: 8 - NUM_EPOCHS: 45 + BATCH_SIZE_PER_GPU: 1 + MAX_ITER: 200000 OPTIMIZER: NAME: AdamW diff --git a/cfgs/igev_rt/igev_rt_sceneflow_uniform.yaml b/cfgs/igev_rt/igev_rt_sceneflow_uniform.yaml new file mode 100644 index 00000000..de960643 --- /dev/null +++ b/cfgs/igev_rt/igev_rt_sceneflow_uniform.yaml @@ -0,0 +1,84 @@ +DATA_CONFIG: + DATA_INFOS: + - DATASET: SceneFlowDataset + DATA_SPLIT: { + TRAINING: ./data/SceneFlow/sceneflow_finalpass_train.txt, + EVALUATING: ./data/SceneFlow/sceneflow_finalpass_test.txt, + TESTING: ./data/SceneFlow/sceneflow_finalpass_test.txt + } + RETURN_RIGHT_DISP: false + + DATA_TRANSFORM: + TRAINING: + - { NAME: RandomCrop, SIZE: [ 320, 736 ], Y_JITTER: false } + - { NAME: TransposeImage } + - { NAME: ToTensor } + - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] } + EVALUATING: + #- { NAME: RightTopPad, SIZE: [ 540, 960 ] } + - { NAME: RightTopPad, SIZE: [ 544, 960 ] } + - { NAME: TransposeImage } + - { NAME: ToTensor } + - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] } + +MODEL: + NAME: IGEVRT + MAX_DISP: &max_disp 192 + HIDDEN_DIMS: [ 128, 128, 128 ] + MIXED_PRECISION: false + PRECISION_DTYPE: float32 + HIDDEN_DIM: 96 + N_GRU_LAYERS: 3 + N_DOWNSAMPLE: 2 + SLOW_FAST_GRU: True + CORR_LEVELS: 2 + CORR_RADIUS: 4 + TRAIN_ITERS: 22 + VALID_ITERS: 8 + FIND_UNUSED_PARAMETERS: true + CKPT: -1 + PRETRAINED_MODEL: '' + +OPTIMIZATION: + FREEZE_BN: false + SYNC_BN: true + AMP: false + BATCH_SIZE_PER_GPU: 1 + MAX_ITER: 200000 + + OPTIMIZER: + NAME: AdamW + LR: &lr 0.0002 + WEIGHT_DECAY: 1.0e-05 + EPS: 1.0e-08 + + SCHEDULER: + NAME: OneCycleLR + ON_EPOCH: False + MAX_LR: *lr + PCT_START: 0.01 + CYCLE_MOMENTUM: False + ANNEAL_STRATEGY: linear + + CLIP_GRAD: + TYPE: norm + MAX_NORM: 1.0 + NORM_TYPE: 2 + +EVALUATOR: + BATCH_SIZE_PER_GPU: 8 + MAX_DISP: *max_disp + METRIC: + - d1_all + - epe + - thres_1 + - thres_2 + - thres_3 + +TRAINER: + EVAL_INTERVAL: 1 + CKPT_SAVE_INTERVAL: 1 + MAX_CKPT_SAVE_NUM: 30 + LOGGER_ITER_INTERVAL: 10 + TRAIN_VISUALIZATION: True + EVAL_VISUALIZATION: True \ No newline at end of file diff --git a/cfgs/igevpp/igevpp_sceneflow_amp.yaml b/cfgs/igevpp/igevpp_sceneflow_amp.yaml index 3f86e945..27c12c44 100644 --- a/cfgs/igevpp/igevpp_sceneflow_amp.yaml +++ b/cfgs/igevpp/igevpp_sceneflow_amp.yaml @@ -50,11 +50,11 @@ MODEL: PRETRAINED_MODEL: '' OPTIMIZATION: - BATCH_SIZE_PER_GPU: 8 + BATCH_SIZE_PER_GPU: 1 FREEZE_BN: true SYNC_BN: false AMP: true - NUM_EPOCHS: 45 + MAX_ITER: 200000 OPTIMIZER: NAME: AdamW diff --git a/cfgs/igevpp/igevpp_sceneflow_uniform.yaml b/cfgs/igevpp/igevpp_sceneflow_uniform.yaml new file mode 100644 index 00000000..63f0f2df --- /dev/null +++ b/cfgs/igevpp/igevpp_sceneflow_uniform.yaml @@ -0,0 +1,92 @@ +DATA_CONFIG: + DATA_INFOS: + - DATASET: SceneFlowDataset + DATA_SPLIT: { + TRAINING: ./data/SceneFlow/sceneflow_finalpass_train.txt, + EVALUATING: ./data/SceneFlow/sceneflow_finalpass_test.txt, + TESTING: ./data/SceneFlow/sceneflow_finalpass_test.txt + } + RETURN_RIGHT_DISP: false + + DATA_TRANSFORM: + TRAINING: + - { NAME: RandomCrop, SIZE: [ 320, 736 ], Y_JITTER: false } + - { NAME: TransposeImage } + - { NAME: ToTensor } + - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] } + EVALUATING: + - { NAME: RightTopPad, SIZE: [ 544, 960 ] } + - { NAME: TransposeImage } + - { NAME: ToTensor } + - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] } + +MODEL: + NAME: IGEVPP + MAX_DISP: 768 + HIDDEN_DIMS: [ 128, 128, 128 ] + + MIXED_PRECISION: false + PRECISION_DTYPE: float32 + + + S_DISP_RANGE: 48 + S_DISP_INTERVAL: 1 + M_DISP_RANGE: 96 + M_DISP_INTERVAL: 2 + L_DISP_RANGE: 192 + L_DISP_INTERVAL: 4 + + N_GRU_LAYERS: 3 + N_DOWNSAMPLE: 2 + SLOW_FAST_GRU: True + CORR_LEVELS: 2 + CORR_RADIUS: 4 + TRAIN_ITERS: 22 + VALID_ITERS: 32 + FIND_UNUSED_PARAMETERS: false + CKPT: -1 + PRETRAINED_MODEL: '' + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 1 + FREEZE_BN: false + SYNC_BN: true + AMP: false + MAX_ITER: 200000 + + OPTIMIZER: + NAME: AdamW + LR: &lr 0.0002 + WEIGHT_DECAY: 0.00001 + EPS: 0.00000001 + + SCHEDULER: + NAME: OneCycleLR + ON_EPOCH: False + MAX_LR: *lr + PCT_START: 0.01 + CYCLE_MOMENTUM: False + ANNEAL_STRATEGY: linear + + CLIP_GRAD: + TYPE: norm + MAX_NORM: 1.0 + NORM_TYPE: 2 + +EVALUATOR: + BATCH_SIZE_PER_GPU: 8 + MAX_DISP: 192 + METRIC: + - d1_all + - epe + - thres_1 + - thres_2 + - thres_3 + +TRAINER: + EVAL_INTERVAL: 1 + CKPT_SAVE_INTERVAL: 1 + MAX_CKPT_SAVE_NUM: 30 + LOGGER_ITER_INTERVAL: 10 + TRAIN_VISUALIZATION: True + EVAL_VISUALIZATION: True \ No newline at end of file diff --git a/docs/1.model_zoo.md b/docs/1.model_zoo.md index c165f985..49653d98 100755 --- a/docs/1.model_zoo.md +++ b/docs/1.model_zoo.md @@ -14,7 +14,9 @@ | [COEX](https://arxiv.org/abs/2108.05773) | 0.68| 0.67 | [coex_sceneflow_amp.yaml](../cfgs/coex/coex_sceneflow_amp.yaml) | 288x576 |0.64 | | [FADNet++](https://arxiv.org/abs/2110.02582) | 0.76 | 0.65 | [fadnet_sceneflow.yaml](../cfgs/fadnet/fadnet_sceneflow.yaml) | 384x768 | - | | [CasStereo](https://arxiv.org/abs/1912.06378) | 0.72 | 0.67 | [casnet_psm_sceneflow.yaml](../cfgs/casnet/casnet_psm_sceneflow.yaml) | 256x512 |0.58 | -| [IGEV](https://arxiv.org/pdf/2303.06615.pdf)| 0.47| 0.46 | [igev_sceneflow_amp.yaml](../cfgs/igev/igev_sceneflow_amp.yaml) | 256x512 |0.46 | +| [IGEV](https://arxiv.org/pdf/2303.06615.pdf)| 0.47| 0.46 | [igev_sceneflow_amp.yaml](../cfgs/igevpp/igevpp_sceneflow_amp.yaml) | 256x512 |0.46 | +| [IGEV++](https://arxiv.org/abs/2409.00638)| 0.43| 0.44 | [igevpp_sceneflow_amp.yaml](../cfgs/igevpp/igevpp_sceneflow_amp.yaml) | 256x768 |0.38 | +| [IGEV_RT](https://arxiv.org/abs/2409.00638)| 0.50| 0.52 | [igev_rt_sceneflow_amp.yaml](../cfgs/igev_rt/igev_rt_sceneflow_amp.yaml) | 320x768 |0.52 | | [StereoBase](https://arxiv.org/abs/2312.00343) (*Ours*) | -| **0.34** | [stereobase_sceneflow.yaml](../cfgs/stereobase/stereobase_sceneflow.yaml) | 320x736 | **0.34** | | [FoundationStereo](https://arxiv.org/abs/2501.09898) | 0.33| 0.34 | [foundationstereo_sceneflow.yaml](../cfgs/foundationstereo/fstereo_sceneflow.yaml) | 320x736 | 0.34 | | [Monster](https://arxiv.org/abs/2501.08643) | 0.37| 0.40 | [monster_sceneflow.yaml](../cfgs/monster/monster_sceneflow.yaml) | 320x736 | 0.33 | diff --git a/stereo/modeling/models/igev_rt/trainer.py b/stereo/modeling/models/igev_rt/trainer.py index 65de6740..b15c9066 100644 --- a/stereo/modeling/models/igev_rt/trainer.py +++ b/stereo/modeling/models/igev_rt/trainer.py @@ -3,6 +3,11 @@ from stereo.modeling.trainer_template import TrainerTemplate from .igev_rt_stereo import IGEVRTtereo +from stereo.modeling.trainer_template import TrainerTemplate +from .igev_rt_stereo import IGEVRTtereo +import torch +from stereo.utils import common_utils + __all__ = { 'IGEVRT': IGEVRTtereo, } @@ -11,3 +16,20 @@ class Trainer(TrainerTemplate): def __init__(self, args, cfgs, local_rank, global_rank, logger, tb_writer): model = __all__[cfgs.MODEL.NAME](cfgs.MODEL) super().__init__(args, cfgs, local_rank, global_rank, logger, tb_writer, model) + + + def build_optimizer_and_scheduler(self): + if self.cfgs.OPTIMIZATION.OPTIMIZER.NAME == 'Lamb': + from stereo.utils.lamb import Lamb + optimizer_cls = Lamb + else: + optimizer_cls = getattr(torch.optim, self.cfgs.OPTIMIZATION.OPTIMIZER.NAME) + valid_arg = common_utils.get_valid_args(optimizer_cls, self.cfgs.OPTIMIZATION.OPTIMIZER, ['name']) + optimizer = optimizer_cls(params=[p for p in self.model.parameters() if p.requires_grad], **valid_arg) + + self.cfgs.OPTIMIZATION.SCHEDULER.TOTAL_STEPS = self.max_iter + 100 + scheduler_cls = getattr(torch.optim.lr_scheduler, self.cfgs.OPTIMIZATION.SCHEDULER.NAME) + valid_arg = common_utils.get_valid_args(scheduler_cls, self.cfgs.OPTIMIZATION.SCHEDULER, ['name', 'on_epoch']) + scheduler = scheduler_cls(optimizer, **valid_arg) + + return optimizer, scheduler \ No newline at end of file diff --git a/stereo/modeling/models/igevpp/trainer.py b/stereo/modeling/models/igevpp/trainer.py index a95b8d0c..98d5b15b 100644 --- a/stereo/modeling/models/igevpp/trainer.py +++ b/stereo/modeling/models/igevpp/trainer.py @@ -2,6 +2,8 @@ # @Author : Qian Zhou from stereo.modeling.trainer_template import TrainerTemplate from .igevpp_stereo import IGEVPPStereo +import torch +from stereo.utils import common_utils __all__ = { 'IGEVPP': IGEVPPStereo, @@ -11,3 +13,20 @@ class Trainer(TrainerTemplate): def __init__(self, args, cfgs, local_rank, global_rank, logger, tb_writer): model = __all__[cfgs.MODEL.NAME](cfgs.MODEL) super().__init__(args, cfgs, local_rank, global_rank, logger, tb_writer, model) + + + def build_optimizer_and_scheduler(self): + if self.cfgs.OPTIMIZATION.OPTIMIZER.NAME == 'Lamb': + from stereo.utils.lamb import Lamb + optimizer_cls = Lamb + else: + optimizer_cls = getattr(torch.optim, self.cfgs.OPTIMIZATION.OPTIMIZER.NAME) + valid_arg = common_utils.get_valid_args(optimizer_cls, self.cfgs.OPTIMIZATION.OPTIMIZER, ['name']) + optimizer = optimizer_cls(params=[p for p in self.model.parameters() if p.requires_grad], **valid_arg) + + self.cfgs.OPTIMIZATION.SCHEDULER.TOTAL_STEPS = self.max_iter + 100 + scheduler_cls = getattr(torch.optim.lr_scheduler, self.cfgs.OPTIMIZATION.SCHEDULER.NAME) + valid_arg = common_utils.get_valid_args(scheduler_cls, self.cfgs.OPTIMIZATION.SCHEDULER, ['name', 'on_epoch']) + scheduler = scheduler_cls(optimizer, **valid_arg) + + return optimizer, scheduler \ No newline at end of file