From fc9ccb9902df07edb487e10be51e860ca489b709 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Sun, 21 Mar 2021 21:31:55 -0400 Subject: [PATCH 01/59] add evalution assessment for sen12ms --- evaluation_sen12ms_assessment.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 evaluation_sen12ms_assessment.md diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md new file mode 100644 index 0000000..8f83215 --- /dev/null +++ b/evaluation_sen12ms_assessment.md @@ -0,0 +1,20 @@ +1. The difference evaluation options: +> - a. Land cover classification -- image classificaiton. +> - b. Semantic Segmentation -- assigning a class label to every pixel of the input image. + +2. + +3. tmp +> a. SEN12MS - Supervised Learning Benchmark - Classification + +| Backbone | Land Type | Modalitities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | +|---|---|---|---|---|---|---|---| +|DenseNet|single-label|_s1s2|64|100|51.16|50.78|62.90| +|DenseNet|single-label|_s2|64|100|54.41|52.32|64.74| +|ResNet50|single-label|_RGB|64|100|45.11|45.16|58.98| +|ResNet50|single-label|_s1s2|64|100|45.52|53.21|64.66| +|ResNet50|single-label|_s2|64|100|57.33|53.39|66.35| +|ResNet50|multi-label|_RGB|64|100|89.86|47.57|66.51| +|ResNet50|multi-label|_s1s2|64|100|91.22|57.46|71.40| +|ResNet50|multi-label|_s2|64|100|90.62|56.14|69.88| + From c4ea1567bf74f1eb9d5aeba16cc11cdcbebfc250 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:39:37 -0400 Subject: [PATCH 02/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 8f83215..2d6a87f 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -1,10 +1,23 @@ +# A. Key Questions: +> ### 1. The evaluation options: +> > a. Scene Classification -- land cover label (currently on wandb). +> > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). + +> ### 2. What metrics they used: +> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the repo url[https://github.com/schmitt-muc/SEN12MS]. +> > b. Semantic segmentation -- class-wise and average accuracy -- refer to the repo url[https://github.com/lukasliebel/dfc2020_baseline]. + + + +# B. Deep Dive + 1. The difference evaluation options: > - a. Land cover classification -- image classificaiton. > - b. Semantic Segmentation -- assigning a class label to every pixel of the input image. 2. -3. tmp +3. results (tmp) on wandb > a. SEN12MS - Supervised Learning Benchmark - Classification | Backbone | Land Type | Modalitities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | From 43fea0bbc243d796b20d73e78b1d9c02cda09e6c Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:41:00 -0400 Subject: [PATCH 03/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 2d6a87f..1b524b8 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -4,8 +4,8 @@ > > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). > ### 2. What metrics they used: -> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the repo url[https://github.com/schmitt-muc/SEN12MS]. -> > b. Semantic segmentation -- class-wise and average accuracy -- refer to the repo url[https://github.com/lukasliebel/dfc2020_baseline]. +> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo] (https://github.com/schmitt-muc/SEN12MS). +> > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). From f93f14080c5f1da88357528d67f6602d6a3fe20d Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:41:20 -0400 Subject: [PATCH 04/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 1b524b8..8b91bab 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -4,7 +4,7 @@ > > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). > ### 2. What metrics they used: -> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo] (https://github.com/schmitt-muc/SEN12MS). +> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). > > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). From 520fe0f8050f1670f0b5d44ae05c0e1f8dc0e1bd Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:41:51 -0400 Subject: [PATCH 05/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 8b91bab..6067c12 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -1,10 +1,12 @@ # A. Key Questions: > ### 1. The evaluation options: > > a. Scene Classification -- land cover label (currently on wandb). + > > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). > ### 2. What metrics they used: > > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). + > > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). From c145c4ca8273c3594a6e962a53bc60c92b1b3527 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:43:59 -0400 Subject: [PATCH 06/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 6067c12..f0849c0 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -1,25 +1,27 @@ -# A. Key Questions: -> ### 1. The evaluation options: -> > a. Scene Classification -- land cover label (currently on wandb). +# A. Key Questions +### 1. The evaluation options: +> a. Scene Classification -- land cover label (currently on wandb). -> > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). +> b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). -> ### 2. What metrics they used: -> > a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). +### 2. What metrics they used: +> a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). -> > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). +> b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). # B. Deep Dive -1. The difference evaluation options: -> - a. Land cover classification -- image classificaiton. -> - b. Semantic Segmentation -- assigning a class label to every pixel of the input image. +### 1. Scene Classification -2. +### 2. Semantic Segmentation -3. results (tmp) on wandb + + + + +# Results (WIP on wandb, subject to changes) > a. SEN12MS - Supervised Learning Benchmark - Classification | Backbone | Land Type | Modalitities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | @@ -33,3 +35,4 @@ |ResNet50|multi-label|_s1s2|64|100|91.22|57.46|71.40| |ResNet50|multi-label|_s2|64|100|90.62|56.14|69.88| + From 00d3f122beb575a0b964653a6b77871a0d6ad771 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:54:18 -0400 Subject: [PATCH 07/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index f0849c0..08d82d0 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -1,4 +1,4 @@ -# A. Key Questions +# A. Question & Response ### 1. The evaluation options: > a. Scene Classification -- land cover label (currently on wandb). @@ -11,9 +11,12 @@ -# B. Deep Dive +# B. Summary of the Deep Dive ### 1. Scene Classification +1. Label were used -- **IGBP land cover scheme**. +>> a. the original IGBP land cover scheme has **17** classes +>> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From ff18a6856a8dd2b27aa01ca9f65d68bbbf8dbeff Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:54:31 -0400 Subject: [PATCH 08/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 08d82d0..15fe335 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -15,7 +15,8 @@ ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. ->> a. the original IGBP land cover scheme has **17** classes +>> a. the original IGBP land cover scheme has **17** classes. + >> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From b7826810473fdf419e59aa80a9d2aa208e016d4a Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:55:02 -0400 Subject: [PATCH 09/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 15fe335..c6ca5d3 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -15,9 +15,8 @@ ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. ->> a. the original IGBP land cover scheme has **17** classes. - ->> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. +a. the original IGBP land cover scheme has **17** classes. +b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From 08b0bab8fdc096eb31b3c9dcada872806fd2d767 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:55:15 -0400 Subject: [PATCH 10/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 1 + 1 file changed, 1 insertion(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index c6ca5d3..d06c976 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -16,6 +16,7 @@ ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. a. the original IGBP land cover scheme has **17** classes. + b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From 3120c8b438d1e09666ed00fe97141cff6832f511 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 21:56:45 -0400 Subject: [PATCH 11/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index d06c976..bebabd1 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -15,9 +15,10 @@ ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. -a. the original IGBP land cover scheme has **17** classes. -b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. +  a. the original IGBP land cover scheme has **17** classes. + +  b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From 7a315615b63d6d8363633d4e359d012419e6614b Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:00:29 -0400 Subject: [PATCH 12/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index bebabd1..1b4dd17 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -16,9 +16,22 @@ ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. -  a. the original IGBP land cover scheme has **17** classes. +> a. the original IGBP land cover scheme has **17** classes. + +> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. + +2. Definition of single-label and multi-label. + +> . The authors has already processed and stored the labels of each image in SEN12MS dataset. + +- single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. +- single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. +- single-label_IGBPsimple_ClsNum: This file contains scene labels based on the simplified IGBP land cover scheme, represented by actual class numbers. +- single-label_IGBPsimple_OneHot: This file contains scene labels based on the simplified IGBP land cover scheme, represented by a one-hot vector encoding. All these files are available both in plain ASCII (.txt) format, as well as .pkl format. + + + -  b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. ### 2. Semantic Segmentation From eebed492e292c488767cdb1ccaf019eed2cdfc23 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:10:05 -0400 Subject: [PATCH 13/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 1b4dd17..b6626ca 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -22,7 +22,9 @@ 2. Definition of single-label and multi-label. -> . The authors has already processed and stored the labels of each image in SEN12MS dataset. +> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). There are known label noise to the SEN12MS dataset and hence these accuracies will constitute the upper bound of actually achievable predictive power. + +> b. The authors has already processed and stored the labels of each image in SEN12MS dataset in to ... - single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. - single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. From 871f1860cfbf1ca5837b4e32a63f5e7e0a8376e7 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:25:31 -0400 Subject: [PATCH 14/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index b6626ca..e70132f 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -24,7 +24,13 @@ > a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). There are known label noise to the SEN12MS dataset and hence these accuracies will constitute the upper bound of actually achievable predictive power. -> b. The authors has already processed and stored the labels of each image in SEN12MS dataset in to ... +> b. from (a), the authors has already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the imange that belongs to each classes, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below is the parameters we can define on the fly when training. + +- full classes (17) or simplified classes (10) +- single label -- it's derived from the probabilities files that applys the argmax to select the highest probability of class (vector) in a scence (patch). +- multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vetor. + +> c. For the single-label, the authors also provided the processed one-hot encoding for the vector dervided from (b). - single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. - single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. From 49bbbc0fe9a6b49fcaaec5cc7e84926473f5b187 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:26:12 -0400 Subject: [PATCH 15/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index e70132f..c4fc24d 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -26,16 +26,16 @@ > b. from (a), the authors has already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the imange that belongs to each classes, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below is the parameters we can define on the fly when training. -- full classes (17) or simplified classes (10) -- single label -- it's derived from the probabilities files that applys the argmax to select the highest probability of class (vector) in a scence (patch). -- multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vetor. +>> - full classes (17) or simplified classes (10) +>> - single label -- it's derived from the probabilities files that applys the argmax to select the highest probability of class (vector) in a scence (patch). +>> - multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vetor. > c. For the single-label, the authors also provided the processed one-hot encoding for the vector dervided from (b). -- single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. -- single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. -- single-label_IGBPsimple_ClsNum: This file contains scene labels based on the simplified IGBP land cover scheme, represented by actual class numbers. -- single-label_IGBPsimple_OneHot: This file contains scene labels based on the simplified IGBP land cover scheme, represented by a one-hot vector encoding. All these files are available both in plain ASCII (.txt) format, as well as .pkl format. +>> - single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. +>> - single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. +>> - single-label_IGBPsimple_ClsNum: This file contains scene labels based on the simplified IGBP land cover scheme, represented by actual class numbers. +>> - single-label_IGBPsimple_OneHot: This file contains scene labels based on the simplified IGBP land cover scheme, represented by a one-hot vector encoding. All these files are available both in plain ASCII (.txt) format, as well as .pkl format. From 8701067f3a27a402f09d99dc2cbc888b16e15cb0 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:37:46 -0400 Subject: [PATCH 16/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index c4fc24d..0f7e296 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -9,6 +9,7 @@ > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). +**Recalling from the meeting with Colorado, whether this metircs are standard? -- the answer is yes -- hence, In stead of using the author's evaluation system, there maybe options for the use of openselfsup ecosystem. # B. Summary of the Deep Dive @@ -37,11 +38,23 @@ >> - single-label_IGBPsimple_ClsNum: This file contains scene labels based on the simplified IGBP land cover scheme, represented by actual class numbers. >> - single-label_IGBPsimple_OneHot: This file contains scene labels based on the simplified IGBP land cover scheme, represented by a one-hot vector encoding. All these files are available both in plain ASCII (.txt) format, as well as .pkl format. +3. Modalities +The modalities can be chosen when performing the training. Three options can be evaluated. +>> - _RGB: only S2 TGB imagery is used +>> _s2: full multi-spectral s-2 data were used +>> _s1s2: data fusion-based models analyzing both s-1 and s-2 data +**Checked wheter _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** +4. Reporting Metrics +The authors has implemented some metrics in the .py files but according to the papers, there is no actual reporting for the model describe above (**or not found, still searhing**). However, the author did mentioned in the paper as well as in the .py files for the metrics to be reported, which includes: +>> 1. Average Accuracy (get_AA) -- only applied to single-label types. +>> 2. Overall Accuracy (OA_multi) -- particular for multi-label cases. +>> 3. F1-score, precision, and recall -- this is relatively standard measure. -### 2. Semantic Segmentation +### 2. Semantic Segmentation (WIP) +-- this tasks seems to be not straightforwrad. and the author did not report everything (based on the paper and repo). checking ... From 4c760c654c4e61e628ceda1c3d304d7107f254c3 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:38:20 -0400 Subject: [PATCH 17/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 0f7e296..bbba674 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -12,7 +12,7 @@ **Recalling from the meeting with Colorado, whether this metircs are standard? -- the answer is yes -- hence, In stead of using the author's evaluation system, there maybe options for the use of openselfsup ecosystem. -# B. Summary of the Deep Dive +# B. Summary of the Benchmark Evaluation Deep Dive ### 1. Scene Classification 1. Label were used -- **IGBP land cover scheme**. From 492066313bb0584b5b8b29a5f0b047fdfb0ed730 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:41:49 -0400 Subject: [PATCH 18/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index bbba674..5760e5e 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -9,7 +9,7 @@ > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). -**Recalling from the meeting with Colorado, whether this metircs are standard? -- the answer is yes -- hence, In stead of using the author's evaluation system, there maybe options for the use of openselfsup ecosystem. +**Recalling from the meeting with Colorado, whether this metircs are standard? -- the answer is yes -- hence, In stead of using the author's evaluation system, there maybe options for the use of openselfsup ecosystem.** # B. Summary of the Benchmark Evaluation Deep Dive @@ -52,16 +52,19 @@ The authors has implemented some metrics in the .py files but according to the p >> 2. Overall Accuracy (OA_multi) -- particular for multi-label cases. >> 3. F1-score, precision, and recall -- this is relatively standard measure. +5. There are pre-trained model(weights) and optimizations parameters can be downloaded. + ### 2. Semantic Segmentation (WIP) -- this tasks seems to be not straightforwrad. and the author did not report everything (based on the paper and repo). checking ... - +WIP # Results (WIP on wandb, subject to changes) -> a. SEN12MS - Supervised Learning Benchmark - Classification +### 1. SEN12MS - Supervised Learning Benchmark - Scence Classification +These models were downloaded from their per-trained described in B-5, and evaluated. | Backbone | Land Type | Modalitities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | |---|---|---|---|---|---|---|---| From 9d36b46a3845009a69838a5fed161a91bedd1911 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:45:44 -0400 Subject: [PATCH 19/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 5760e5e..c5158a8 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -44,7 +44,7 @@ The modalities can be chosen when performing the training. Three options can be >> _s2: full multi-spectral s-2 data were used >> _s1s2: data fusion-based models analyzing both s-1 and s-2 data -**Checked wheter _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** +**Checked whether _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** 4. Reporting Metrics The authors has implemented some metrics in the .py files but according to the papers, there is no actual reporting for the model describe above (**or not found, still searhing**). However, the author did mentioned in the paper as well as in the .py files for the metrics to be reported, which includes: From 24dd6e036e791976a31efa4b00f7612fa5226334 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:49:06 -0400 Subject: [PATCH 20/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index c5158a8..8ba0ba5 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -62,7 +62,13 @@ WIP -# Results (WIP on wandb, subject to changes) +# C. Our Evaluation choices +1. potential 1 -- using the exiting scence classificaion models and the current evalution in sen12ms dataset to evaluate the moco one +2. potential 2 -- using openselfsup to evalute the sen12ms dataset?? (tbc) +3. potential 3 -- others ?? (tbd) + + +# D. Results (WIP on wandb, subject to changes) ### 1. SEN12MS - Supervised Learning Benchmark - Scence Classification These models were downloaded from their per-trained described in B-5, and evaluated. From 58cbded04ccfaba4fc79456c2e202e229caac1c1 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:53:13 -0400 Subject: [PATCH 21/59] Update evaluation_sen12ms_assessment.md upload the appendix --- evaluation_sen12ms_assessment.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 8ba0ba5..36727f5 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -84,3 +84,12 @@ These models were downloaded from their per-trained described in B-5, and evalua |ResNet50|multi-label|_s2|64|100|90.62|56.14|69.88| +# E. Appendix +1. IGBP Land Cover Classification System +![Screen Shot 2021-03-21 at 10 52 56 PM](https://user-images.githubusercontent.com/39634122/111934636-2f68ee00-8a98-11eb-8763-8453266227ed.png) + + + + + + From 40d711c6395ebde1d21f1f1345d1a1c9c65e4263 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 22:58:04 -0400 Subject: [PATCH 22/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 36727f5..d1eb19b 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -63,10 +63,17 @@ WIP # C. Our Evaluation choices + +### a. methods + 1. potential 1 -- using the exiting scence classificaion models and the current evalution in sen12ms dataset to evaluate the moco one 2. potential 2 -- using openselfsup to evalute the sen12ms dataset?? (tbc) 3. potential 3 -- others ?? (tbd) +### b. samples +1. full or sub-samples ? (distributions) +2. size + # D. Results (WIP on wandb, subject to changes) ### 1. SEN12MS - Supervised Learning Benchmark - Scence Classification From 2b49f216c812b7f2d0ebc73bf1abb33c2d941ef5 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:21:16 -0400 Subject: [PATCH 23/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index d1eb19b..d3a2afe 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -41,7 +41,9 @@ 3. Modalities The modalities can be chosen when performing the training. Three options can be evaluated. >> - _RGB: only S2 TGB imagery is used + >> _s2: full multi-spectral s-2 data were used + >> _s1s2: data fusion-based models analyzing both s-1 and s-2 data **Checked whether _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** From 8a7748428b44b10719d3fc12ed137133214678a5 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:21:34 -0400 Subject: [PATCH 24/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index d3a2afe..496ead7 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -42,9 +42,9 @@ The modalities can be chosen when performing the training. Three options can be evaluated. >> - _RGB: only S2 TGB imagery is used ->> _s2: full multi-spectral s-2 data were used +>> - _s2: full multi-spectral s-2 data were used ->> _s1s2: data fusion-based models analyzing both s-1 and s-2 data +>> - _s1s2: data fusion-based models analyzing both s-1 and s-2 data **Checked whether _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** From 65d529b6b64c3557a490b52b2c49e6133ef3ce32 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:23:00 -0400 Subject: [PATCH 25/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 496ead7..4e171e5 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -23,7 +23,7 @@ 2. Definition of single-label and multi-label. -> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). There are known label noise to the SEN12MS dataset and hence these accuracies will constitute the upper bound of actually achievable predictive power. +> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). **There are known label noise to the SEN12MS dataset and hence these accuracies will constitute the upper bound of actually achievable predictive power**. > b. from (a), the authors has already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the imange that belongs to each classes, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below is the parameters we can define on the fly when training. From ccd93301cd102110c9c9592f62138631a33ce472 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:25:28 -0400 Subject: [PATCH 26/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 4e171e5..0427255 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -56,6 +56,8 @@ The authors has implemented some metrics in the .py files but according to the p 5. There are pre-trained model(weights) and optimizations parameters can be downloaded. +6. No entire resutls from this pre-trained was documented (or at least not found at the moment, still seacrhing) + ### 2. Semantic Segmentation (WIP) -- this tasks seems to be not straightforwrad. and the author did not report everything (based on the paper and repo). checking ... From 90288a487b37aafe3f3809b49ea1ed4ce50f9020 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:27:43 -0400 Subject: [PATCH 27/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 0427255..6dc0267 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -29,7 +29,7 @@ >> - full classes (17) or simplified classes (10) >> - single label -- it's derived from the probabilities files that applys the argmax to select the highest probability of class (vector) in a scence (patch). ->> - multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vetor. +>> - multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vector. > c. For the single-label, the authors also provided the processed one-hot encoding for the vector dervided from (b). From 9418f934541374fbbcac7395f9eb3d910df49e06 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:28:22 -0400 Subject: [PATCH 28/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 6dc0267..bd79296 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -40,7 +40,7 @@ 3. Modalities The modalities can be chosen when performing the training. Three options can be evaluated. ->> - _RGB: only S2 TGB imagery is used +>> - _RGB: only S2 RGB imagery is used >> - _s2: full multi-spectral s-2 data were used From 14de6d801d58d3a8f41668b83be01d5368c6100d Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:41:00 -0400 Subject: [PATCH 29/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index bd79296..d5955cf 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -46,7 +46,7 @@ The modalities can be chosen when performing the training. Three options can be >> - _s1s2: data fusion-based models analyzing both s-1 and s-2 data -**Checked whether _s1s2 would be the most releveant model when it comes to compares with our approach - s1s2 MOCO, or it does not matter?** +**Checked whether _s1s2 would be the most releveant model when it comes to comparison with our approach - s1s2 MOCO, or it does not matter?** 4. Reporting Metrics The authors has implemented some metrics in the .py files but according to the papers, there is no actual reporting for the model describe above (**or not found, still searhing**). However, the author did mentioned in the paper as well as in the .py files for the metrics to be reported, which includes: From e8455d6020838284cc6818a0bffc63436866b8af Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Sun, 21 Mar 2021 23:43:49 -0400 Subject: [PATCH 30/59] Update evaluation_sen12ms_assessment.md --- evaluation_sen12ms_assessment.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index d5955cf..b0ed5e9 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -56,8 +56,6 @@ The authors has implemented some metrics in the .py files but according to the p 5. There are pre-trained model(weights) and optimizations parameters can be downloaded. -6. No entire resutls from this pre-trained was documented (or at least not found at the moment, still seacrhing) - ### 2. Semantic Segmentation (WIP) -- this tasks seems to be not straightforwrad. and the author did not report everything (based on the paper and repo). checking ... From ed789168ffe9d695869b76b08b202df8ba2db2fa Mon Sep 17 00:00:00 2001 From: Surya <15736461+suryatechie@users.noreply.github.com> Date: Mon, 22 Mar 2021 12:25:30 -0700 Subject: [PATCH 31/59] Made some minor changes --- evaluation_sen12ms_assessment.md | 42 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index b0ed5e9..7cda446 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -5,11 +5,11 @@ > b. Semantic segmentation -- assigning a class label to every pixel of the input image (not implement). ### 2. What metrics they used: -> a. Scene Classification -- Average Accuracy (single-label); Overall Accuarcy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). +> a. Scene Classification -- Average Accuracy (single-label); Overall Accuracy (multi-label); f-1, precision, and recall refer to the [repo](https://github.com/schmitt-muc/SEN12MS). > b. Semantic segmentation -- class-wise and average accuracy -- refer to the [repo](https://github.com/lukasliebel/dfc2020_baseline). -**Recalling from the meeting with Colorado, whether this metircs are standard? -- the answer is yes -- hence, In stead of using the author's evaluation system, there maybe options for the use of openselfsup ecosystem.** +**Recalling from the meeting with Colorado, whether these metrics are standard? -- the answer is yes -- hence, Instead of using the author's evaluation system, there may be options for the use of openselfsup ecosystem.** # B. Summary of the Benchmark Evaluation Deep Dive @@ -19,19 +19,19 @@ > a. the original IGBP land cover scheme has **17** classes. -> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the orignial 17 classes. +> b. the simplified version of IGBP classes has **10** classes, which derived and consolidated from the original 17 classes. 2. Definition of single-label and multi-label. -> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). **There are known label noise to the SEN12MS dataset and hence these accuracies will constitute the upper bound of actually achievable predictive power**. +> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). **There are known label noise to the SEN12MS dataset, and hence these accuracies will constitute the upper bound of actually achievable predictive power**. -> b. from (a), the authors has already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the imange that belongs to each classes, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below is the parameters we can define on the fly when training. +> b. from (a), the authors have already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the image that belongs to each class, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below are the parameters we can define on the fly when training. >> - full classes (17) or simplified classes (10) ->> - single label -- it's derived from the probabilities files that applys the argmax to select the highest probability of class (vector) in a scence (patch). ->> - multi label -- it's derived from the probabilities files that some threshold can be applied for each classes in a vector. +>> - single label -- it's derived from the probabilities files that applies the argmax to select the highest probability of class (vector) in a scence (patch). +>> - multi-label -- it's derived from the probabilities files that some threshold can be applied for each class in a vector. -> c. For the single-label, the authors also provided the processed one-hot encoding for the vector dervided from (b). +> c. For the single-label, the authors also provided the processed one-hot encoding for the vector derived from (b). >> - single-label_IGBPfull_ClsNum: This file contains scene labels based on the full IGBP land cover scheme, represented by actual class numbers. >> - single-label_IGBP_full_OneHot: This file contains scene labels based on the full IGBP land cover scheme, represented by a one-hot vector encoding. @@ -46,19 +46,19 @@ The modalities can be chosen when performing the training. Three options can be >> - _s1s2: data fusion-based models analyzing both s-1 and s-2 data -**Checked whether _s1s2 would be the most releveant model when it comes to comparison with our approach - s1s2 MOCO, or it does not matter?** +**Checked whether _s1s2 would be the most relevant model when it comes to comparison with our approach - s1s2 MOCO, or it does not matter?** 4. Reporting Metrics -The authors has implemented some metrics in the .py files but according to the papers, there is no actual reporting for the model describe above (**or not found, still searhing**). However, the author did mentioned in the paper as well as in the .py files for the metrics to be reported, which includes: +The authors have implemented some metrics in the .py files, but according to the papers, there is no actual reporting for the model described above (**or not found, still searching**). However, the author did mention in the paper as well as in the .py files for the metrics to be reported, which includes: >> 1. Average Accuracy (get_AA) -- only applied to single-label types. >> 2. Overall Accuracy (OA_multi) -- particular for multi-label cases. ->> 3. F1-score, precision, and recall -- this is relatively standard measure. +>> 3. F1-score, precision, and recall -- this is a relatively standard measure. 5. There are pre-trained model(weights) and optimizations parameters can be downloaded. ### 2. Semantic Segmentation (WIP) --- this tasks seems to be not straightforwrad. and the author did not report everything (based on the paper and repo). checking ... +-- this task seems to be not straightforward. and the author did not report everything (based on the paper and repo). checking ... WIP @@ -68,20 +68,20 @@ WIP ### a. methods -1. potential 1 -- using the exiting scence classificaion models and the current evalution in sen12ms dataset to evaluate the moco one -2. potential 2 -- using openselfsup to evalute the sen12ms dataset?? (tbc) -3. potential 3 -- others ?? (tbd) +1. potential 1 -- using the exiting scence classification models and the current evaluation in sen12ms dataset to evaluate the moco one +2. potential 2 -- using openselfsup to evaluate the sen12ms dataset?? (TBD) +3. potential 3 -- others ?? (TBD) ### b. samples -1. full or sub-samples ? (distributions) +1. full or sub-samples? (distributions) 2. size # D. Results (WIP on wandb, subject to changes) ### 1. SEN12MS - Supervised Learning Benchmark - Scence Classification -These models were downloaded from their per-trained described in B-5, and evaluated. +These models were downloaded from their pre-trained described in B-5, and evaluated. -| Backbone | Land Type | Modalitities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | +| Backbone | Land Type | Modalities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | |---|---|---|---|---|---|---|---| |DenseNet|single-label|_s1s2|64|100|51.16|50.78|62.90| |DenseNet|single-label|_s2|64|100|54.41|52.32|64.74| @@ -96,9 +96,3 @@ These models were downloaded from their per-trained described in B-5, and evalua # E. Appendix 1. IGBP Land Cover Classification System ![Screen Shot 2021-03-21 at 10 52 56 PM](https://user-images.githubusercontent.com/39634122/111934636-2f68ee00-8a98-11eb-8763-8453266227ed.png) - - - - - - From 45a8edbdb334f274057af54cdd7ad6d0eb3d03d4 Mon Sep 17 00:00:00 2001 From: Surya <15736461+suryatechie@users.noreply.github.com> Date: Mon, 22 Mar 2021 12:28:48 -0700 Subject: [PATCH 32/59] Made some more minor changes --- evaluation_sen12ms_assessment.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/evaluation_sen12ms_assessment.md b/evaluation_sen12ms_assessment.md index 7cda446..5eb9466 100644 --- a/evaluation_sen12ms_assessment.md +++ b/evaluation_sen12ms_assessment.md @@ -23,12 +23,12 @@ 2. Definition of single-label and multi-label. -> a. For every scence (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). **There are known label noise to the SEN12MS dataset, and hence these accuracies will constitute the upper bound of actually achievable predictive power**. +> a. For every scene (patch), we can identify the labels through land cover images from MODIS, in which the first band describes the IGBP classification scheme, whereas the rest of the three bands covered the LCCS land cover layer, LCCS land use layer, and the LCCS surface hydrology layer. According to the authors, the overall acc for the layers are about 67% (IGBP), 74% (LCCS land cover), 81% (LCCS land use), and 87% (LCCS surface hydrology). **There are known label noise to the SEN12MS dataset, and hence these accuracies will constitute the upper bound of actually achievable predictive power**. -> b. from (a), the authors have already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the image that belongs to each class, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scence (patch). Below are the parameters we can define on the fly when training. +> b. from (a), the authors have already processed and stored the labels of each image in SEN12MS with full IGBP classes into the file **IGBP_probability_labels.pkl**, meaning the percentage of the image that belongs to each class, where further label types and target classes can be derived during the training steps -- single label or multi-label for a scene (patch). Below are the parameters we can define on the fly when training. >> - full classes (17) or simplified classes (10) ->> - single label -- it's derived from the probabilities files that applies the argmax to select the highest probability of class (vector) in a scence (patch). +>> - single label -- it's derived from the probabilities files that applies the argmax to select the highest probability of class (vector) in a scene (patch). >> - multi-label -- it's derived from the probabilities files that some threshold can be applied for each class in a vector. > c. For the single-label, the authors also provided the processed one-hot encoding for the vector derived from (b). @@ -68,7 +68,7 @@ WIP ### a. methods -1. potential 1 -- using the exiting scence classification models and the current evaluation in sen12ms dataset to evaluate the moco one +1. potential 1 -- using the exiting scene classification models and the current evaluation in sen12ms dataset to evaluate the moco one 2. potential 2 -- using openselfsup to evaluate the sen12ms dataset?? (TBD) 3. potential 3 -- others ?? (TBD) @@ -78,7 +78,7 @@ WIP # D. Results (WIP on wandb, subject to changes) -### 1. SEN12MS - Supervised Learning Benchmark - Scence Classification +### 1. SEN12MS - Supervised Learning Benchmark - Scene Classification These models were downloaded from their pre-trained described in B-5, and evaluated. | Backbone | Land Type | Modalities | Bactch size | Epochs | Accuracy (%) | Macro-F1 (%) | Micro-F1 (%) | @@ -94,5 +94,5 @@ These models were downloaded from their pre-trained described in B-5, and evalua # E. Appendix -1. IGBP Land Cover Classification System +1. IGBP Land Cover Classification System ![Screen Shot 2021-03-21 at 10 52 56 PM](https://user-images.githubusercontent.com/39634122/111934636-2f68ee00-8a98-11eb-8763-8453266227ed.png) From 1b1f838241444a2ada1bf55114ff9ff01b480dad Mon Sep 17 00:00:00 2001 From: taeil Date: Wed, 24 Mar 2021 21:31:24 -0700 Subject: [PATCH 33/59] Add SEN12MS as sub module --- .gitmodules | 3 ++ README.md | 85 ++++++++++++++++++++++++++++----------------- SEN12MS | 1 + references/setup.md | 68 ++++++++---------------------------- 4 files changed, 73 insertions(+), 84 deletions(-) create mode 100644 .gitmodules create mode 160000 SEN12MS diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..065358c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "SEN12MS"] + path = SEN12MS + url = git@github.com:Berkeley-Data/SEN12MS.git diff --git a/README.md b/README.md index 492d5dd..3afb1d5 100644 --- a/README.md +++ b/README.md @@ -58,54 +58,77 @@ bands_std = {'s1_std': [4.525339, 4.3586307], 1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]} ``` -**NEXT**, copy the pretraining template -```bash -cd src/utils -cp templates/pretraining-config-template.sh pretrain-configs/sen12ms-small.sh -# edit pretrain-configs/sen12ms-small.sh +## Pre-training with SEN12MS Dataset +[OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) +- see `src/utils/pretrain-runner.sh` for end-to-end run (require prep creating config files). -# once edited, generate the project -./gen-pretrain-project.sh pretrain-configs/my-dataset-config.sh -``` +Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or [wandb](https://wandb.ai/cal-capstone/hpt): -What just happened? We generated a bunch of pretraining configs in the following location (take a look at all of these files to get a feel for how this works): -``` -OpenSelfSup/configs/hpt-pretrain/${shortname} +Set up experimental tracking and model versioning: +```bash +export WANDB_API_KEY= +export WANDB_ENTITY=cal-capstone +export WANDB_PROJECT=hpt2 ``` -**NEXT**, you're ready to kick off a trial run to make sure the pretraining is working as expected =) +Run pre-training +```bash +cd OpenSelfSup +# (sanity check) Single GPU training on samll dataset +CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --debug + +# (sanity check) 4 GPUs training on samll dataset +CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py 4 + +# distributed full training +CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 +``` + +Extract pre-trained model ```bash -# the `-t` flag means `trial`: it'll only run a 50 iter pretraining - ./utils/pretrain-runner.sh -t -d OpenSelfSup/configs/hpt-pretrain/${shortname} +BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth + +# method 1: From working dir(same system for pre-training) +# CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth + +# method 2: from W&B, {projectid}/{W&B run id} (any system) +CHECKPOINT=hpt2/3l4yg63k + +# Extract the backbone +python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} + ``` -**NEXT**, if this works, kick off the full training. NOTE: you can kick this off multiple times as long as the config directories share the same filesystem + +## Evaluating Pretrained Representations + +Using OpenSelfSup ```bash -# simply removing the `-t` flag from above - ./utils/pretrain-runner.sh -d OpenSelfSup/configs/hpt-pretrain/${shortname} +python tools/train.py $CFG --pretrained $PRETRAIN + +# RESISC finetune example +tools/train.py --local_rank=0 configs/benchmarks/linear_classification/resisc45/r50_last.py --pretrained work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20_moco_in_basetrain.pth --work_dir work_dirs/benchmarks/linear_classification/resisc45/moco-selfsup/r50_v2_resisc_in_basetrain_20ep-r50_last --seed 0 --launcher=pytorch + + + ``` -**NEXT**, if you want to perform BYOL pretraining, add `-b` flag. + +Using Sen12ms ```bash -# simply add the `-b` flag to above. - ./utils/pretrain-runner.sh -d OpenSelfSup/configs/hpt-pretrain/${shortname} -b ``` -Congratulations: you've launch a full hierarchical pretraining experiment. -**FAQs/PROBLEMS?** -* How does `pretrain-runner.sh` keep track of what's been pretrained? - * In each config directory, it creates a `.pretrain-status` folder to keep track of what's processing/finished. See them with e.g. `find OpenSelfSup/configs/hpt-pretrain -name '.pretrain-status'` -* How to redo a pretraining, e.g. because it crashed or something changed? Remove the - * Remove the associate `.proc` or `.done` file. Find these e.g. - ```bash - find OpenSelfSup/configs/hpt-pretrain -name '.proc' - find OpenSelfSup/configs/hpt-pretrain -name '.done' - ``` -## Evaluating Pretrained Representations + +#### Previous +``` +# Evaluate the representations (NOT SURE) +./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} +``` + This has been simplified to simply: ```bash ./utils/pretrain-evaluator.sh -b OpenSelfSup/work_dirs/hpt-pretrain/${shortname}/ -d OpenSelfSup/configs/hpt-pretrain/${shortname} diff --git a/SEN12MS b/SEN12MS new file mode 160000 index 0000000..feb3e96 --- /dev/null +++ b/SEN12MS @@ -0,0 +1 @@ +Subproject commit feb3e9602c430d53d388e496fbb22b8ff76f68db diff --git a/references/setup.md b/references/setup.md index 728009e..e030be0 100644 --- a/references/setup.md +++ b/references/setup.md @@ -31,11 +31,13 @@ conda activate hpt # NOTE: if you are not using CUDA 10.2, you need to change the 10.2 in this command appropriately. Make sure to use torch 1.6.0 # (check CUDA version with e.g. `cat /usr/local/cuda/version.txt`) -# latest + +# latest torch conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch # 1.6 torch (no support for torchvision transform on tensor) conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch + #colorado machine conda install pytorch==1.2.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch @@ -61,7 +63,7 @@ mkdir ~/data mv resisc45 ~/data # replace/set $DATA and $CODE as appropriate -# e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all +# e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/OpenSelfSup/data/resisc45/all ln -s $DATA/resisc45 $CODE/OpenSelfSup/data/resisc45/all e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all @@ -69,8 +71,8 @@ e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc4 ### Download Pretrained Models ``` shell -cd OpenSelfSup/data/basetrain_chkpts/ -./download-pretrained-models.sh +mkdir OpenSelfSup/data/basetrain_chkpts +tools/download-pretrained-models.sh ``` ## Verify Install With RESISC DataSet @@ -82,18 +84,22 @@ Check installation by pretraining using mocov2, extracting the model weights, ev ```bash export WANDB_API_KEY= export WANDB_ENTITY=cal-capstone -export WANDB_PROJECT=hpt2 +export WANDB_PROJECT=hpt3 #export WANDB_MODE=dryrun + + + + cd OpenSelfSup # Sanity check with single train and single epoch -CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py --debug +CUDA_VISIBLE_DEVICES=x ./tools/single_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py --debug -CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh /scratch/crguest/OpenSelfSup/configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --work_dir work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/ --debug +CUDA_VISIBLE_DEVICES=x ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --work_dir work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_1ep/ --debug # Sanity check: MoCo for 20 epoch on 4 gpus -./tools/dist_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py 4 +CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py 4 # if debugging, use tools/train.py configs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep.py --work_dir work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep/ --debug @@ -102,7 +108,7 @@ tools/train.py configs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep.py --work_dir CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth BACKBONE=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20_moco_in_basetrain.pth # Extract the backbone -python tools/extract_backbone_weights.py ${CHECKPOINT} ${BACKBONE} +python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} # Evaluate the representations ./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} @@ -114,47 +120,3 @@ tensorboard --logdir . ``` -## Verify Install With SEN12MS Dataset -[OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) - -Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or [wandb](https://wandb.ai/cal-capstone/hpt): - -```bash -export WANDB_API_KEY= -export WANDB_ENTITY=cal-capstone -export WANDB_PROJECT=hpt2 - -cd OpenSelfSup - -# single GPU training -CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --debug - -CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug - - -# command for remote debugging, use full path -python /scratch/crguest/OpenSelfSup/tools/train.py /scratch/crguest/OpenSelfSup/configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug - -CUDA_VISIBLE_DEVICES=1 python ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py --debug - -# Sanity check: MoCo for 20 epoch on 4 gpus -#CUDA_VISIBLE_DEVICES=0,1,2,3 -CUDA_VISIBLE_DEVICES=1 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py 4 - -# distributed training -#CUDA_VISIBLE_DEVICES=0,1,2,3 -./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 - -BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth -# method 1: from working dir -CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth -# method 2: from W&B, {projectid}/{W&B run id} -CHECKPOINT=hpt2/3l4yg63k - -# Extract the backbone -python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} - -# Evaluate the representations -./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} - -``` \ No newline at end of file From d388be7ccda7000d3ba87d3352959e86b6a65492 Mon Sep 17 00:00:00 2001 From: taeil Date: Wed, 24 Mar 2021 21:35:48 -0700 Subject: [PATCH 34/59] Add OpenSelfSup as sub module --- .gitmodules | 3 +++ OpenSelfSup | 1 + 2 files changed, 4 insertions(+) create mode 160000 OpenSelfSup diff --git a/.gitmodules b/.gitmodules index 065358c..7bf3595 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "SEN12MS"] path = SEN12MS url = git@github.com:Berkeley-Data/SEN12MS.git +[submodule "OpenSelfSup"] + path = OpenSelfSup + url = https://github.com/Berkeley-Data/OpenSelfSup.git diff --git a/OpenSelfSup b/OpenSelfSup new file mode 160000 index 0000000..2fcf673 --- /dev/null +++ b/OpenSelfSup @@ -0,0 +1 @@ +Subproject commit 2fcf673f0f133f6ab966d018e17cc5ddc4edf80f From aea59e56ae79a95913c051caaf4d2914be3ee93d Mon Sep 17 00:00:00 2001 From: taeil Date: Wed, 24 Mar 2021 22:54:44 -0700 Subject: [PATCH 35/59] updated submodule url --- .gitmodules | 3 ++- SEN12MS | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 7bf3595..742d048 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,7 @@ [submodule "SEN12MS"] path = SEN12MS - url = git@github.com:Berkeley-Data/SEN12MS.git + url = https://github.com/Berkeley-Data/SEN12MS.git + branch = taeil [submodule "OpenSelfSup"] path = OpenSelfSup url = https://github.com/Berkeley-Data/OpenSelfSup.git diff --git a/SEN12MS b/SEN12MS index feb3e96..42a01d2 160000 --- a/SEN12MS +++ b/SEN12MS @@ -1 +1 @@ -Subproject commit feb3e9602c430d53d388e496fbb22b8ff76f68db +Subproject commit 42a01d232d46614d4789215b349b81237f4078cd From 8dd800e7de7342c3d94ba4b102150cea744bbb5a Mon Sep 17 00:00:00 2001 From: taeil Date: Fri, 26 Mar 2021 22:03:39 -0700 Subject: [PATCH 36/59] added instructions --- references/evaluation.md | 32 +++++++++++++++++++++++++++++++ references/model_architectures.md | 5 +++++ 2 files changed, 37 insertions(+) create mode 100644 references/evaluation.md create mode 100644 references/model_architectures.md diff --git a/references/evaluation.md b/references/evaluation.md new file mode 100644 index 0000000..7dce8dd --- /dev/null +++ b/references/evaluation.md @@ -0,0 +1,32 @@ +## download pre-trained models + +Some of key pre-trained models are on s3 (s3://sen12ms/pretrained): +- [200 epochs w/o augmentation: vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p/overview?workspace=user-cjrd) +- [20 epochs w/o augmentation: silvery-oath7-2rr3864e](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e?workspace=user-taeil) +- [sen12ms-baseline: soft-snowflake-3.pth](https://wandb.ai/cal-capstone/SEN12MS/runs/3gjhe4ff/overview?workspace=user-taeil) + +``` +aws configure +aws s3 sync s3://sen12ms/pretrained . --dryrun +aws s3 sync s3://sen12ms/pretrained_sup . --dryrun +``` + +Any other models can be restored by run ID if stored with W&B. Go to files section under the run to find `*.pth` files + +#### Extract Backbone +``` +BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth +# method 1: from working dir +CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth +# method 2: from W&B, {projectid}/{W&B run id} +CHECKPOINT=hpt2/3l4yg63k + +# Extract the backbone +python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} +``` + +#### Evaluate the representations :confused: :question: + +``` +./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} +``` \ No newline at end of file diff --git a/references/model_architectures.md b/references/model_architectures.md new file mode 100644 index 0000000..da5b7d4 --- /dev/null +++ b/references/model_architectures.md @@ -0,0 +1,5 @@ +#### Key model architectures and terms: +- ResNet50_1x1: adding conv1x1 to original ResNet50 used by sen12ms +- ResNet50: original ResNet50 used by sen12ms +- Moco: original ResNet50 initialized the weight by Moco backbone +- Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module \ No newline at end of file From 553a2fd38fd3d99c2511d79de8fcc2dfd586e1d2 Mon Sep 17 00:00:00 2001 From: taeil Date: Sat, 27 Mar 2021 09:48:38 -0700 Subject: [PATCH 37/59] updated instructions --- SEN12MS | 2 +- references/setup.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/SEN12MS b/SEN12MS index 42a01d2..80ce4ca 160000 --- a/SEN12MS +++ b/SEN12MS @@ -1 +1 @@ -Subproject commit 42a01d232d46614d4789215b349b81237f4078cd +Subproject commit 80ce4ca236e442b31d83d1e977c9f96509fcd782 diff --git a/references/setup.md b/references/setup.md index e030be0..567bd0a 100644 --- a/references/setup.md +++ b/references/setup.md @@ -120,3 +120,31 @@ tensorboard --logdir . ``` +## setup sub-modules for sen12ms and openselfsup repo + +Cloning +```console +git clone --recurse-submodules https://github.com/Berkeley-Data/hpt.git + +``` + +or alternatiely +``` +git submodule init +git submodule update +``` + +additional config +``` +git config push.recurseSubmodules on-demand +# show status including submodule +git config status.submodulesummary 1 +``` + +update +``` +git submodule update --remote +``` + +For mroe info: [7.11 Git Tools - Submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) + \ No newline at end of file From 9372a6073da726f3f3eb5190fb8841ddb6f42071 Mon Sep 17 00:00:00 2001 From: Taeil Goh Date: Sat, 27 Mar 2021 10:01:28 -0700 Subject: [PATCH 38/59] Update model_architectures.md --- references/model_architectures.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/references/model_architectures.md b/references/model_architectures.md index da5b7d4..464bede 100644 --- a/references/model_architectures.md +++ b/references/model_architectures.md @@ -2,4 +2,5 @@ - ResNet50_1x1: adding conv1x1 to original ResNet50 used by sen12ms - ResNet50: original ResNet50 used by sen12ms - Moco: original ResNet50 initialized the weight by Moco backbone -- Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module \ No newline at end of file +- Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module +- Moco_1x1random: ResNet50_1x1 randomly the weight by Moco backbone and input module From 21e6e88936f6da6e1ceece3dfe3d49e383bdbab4 Mon Sep 17 00:00:00 2001 From: Taeil Goh Date: Sat, 27 Mar 2021 16:09:06 -0700 Subject: [PATCH 39/59] Update model_architectures.md --- references/model_architectures.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/references/model_architectures.md b/references/model_architectures.md index 464bede..b217473 100644 --- a/references/model_architectures.md +++ b/references/model_architectures.md @@ -1,6 +1,6 @@ #### Key model architectures and terms: -- ResNet50_1x1: adding conv1x1 to original ResNet50 used by sen12ms -- ResNet50: original ResNet50 used by sen12ms -- Moco: original ResNet50 initialized the weight by Moco backbone -- Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module -- Moco_1x1random: ResNet50_1x1 randomly the weight by Moco backbone and input module +- ResNet50_1x1: adding conv1x1 to original ResNet50 used by sen12ms (supervised training) +- ResNet50: original ResNet50 used by sen12ms (supervised training) +- Moco: original ResNet50 initialized the weight by Moco backbone (transfer learning) +- Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module (transfer learning) +- Moco_1x1random: ResNet50_1x1 randomly the weight by Moco backbone and input module (transfer learning) From ad6b17e65920e8f94c0fbd020544f9d1b2f3e75c Mon Sep 17 00:00:00 2001 From: taeil Date: Sat, 3 Apr 2021 19:10:27 -0700 Subject: [PATCH 40/59] updated instructions --- OpenSelfSup | 2 +- README.md | 21 ++++- SEN12MS | 2 +- references/evaluation.md | 7 +- references/model_architectures.md | 2 +- references/setup.md | 150 ------------------------------ 6 files changed, 22 insertions(+), 162 deletions(-) delete mode 100644 references/setup.md diff --git a/OpenSelfSup b/OpenSelfSup index 2fcf673..c28dd45 160000 --- a/OpenSelfSup +++ b/OpenSelfSup @@ -1 +1 @@ -Subproject commit 2fcf673f0f133f6ab966d018e17cc5ddc4edf80f +Subproject commit c28dd4505e143cf4c89fe737adfb43826e2fc266 diff --git a/README.md b/README.md index 3afb1d5..68c22bb 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This is a research repository for the submission "Self-Supervised Pretraining Improves Self-Supervised Pretraining" -For initial setup, refer to [setup instructions](references/setup.md). +For initial setup, refer to [setup instructions](setup_pretraining.md). ## Setup Weight & Biases Tracking @@ -68,21 +68,32 @@ Set up experimental tracking and model versioning: ```bash export WANDB_API_KEY= export WANDB_ENTITY=cal-capstone -export WANDB_PROJECT=hpt2 +export WANDB_PROJECT=hpt4 ``` Run pre-training ```bash cd OpenSelfSup +# set which GPUs to use +# CUDA_VISIBLE_DEVICES=1 +# CUDA_VISIBLE_DEVICES=0,1,2,3 + # (sanity check) Single GPU training on samll dataset -CUDA_VISIBLE_DEVICES=1 ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --debug +/tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_aug_20ep.py --debug + +# (sanity check) Single GPU training on samll dataset on sen12ms fusion +./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_12ch_in_smoketrain_aug_2ep.py --debug + # (sanity check) 4 GPUs training on samll dataset -CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py 4 +./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_aug_20ep.py 4 + +# (sanity check) 4 GPUs training on samll fusion dataset +./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_12ch_in_smoketrain_aug_2ep.py 4 # distributed full training -CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 +/tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 ``` Extract pre-trained model diff --git a/SEN12MS b/SEN12MS index 80ce4ca..a8a7760 160000 --- a/SEN12MS +++ b/SEN12MS @@ -1 +1 @@ -Subproject commit 80ce4ca236e442b31d83d1e977c9f96509fcd782 +Subproject commit a8a7760d81f83b015d341bd3f3ebc87741b0658e diff --git a/references/evaluation.md b/references/evaluation.md index 7dce8dd..76b1190 100644 --- a/references/evaluation.md +++ b/references/evaluation.md @@ -13,16 +13,15 @@ aws s3 sync s3://sen12ms/pretrained_sup . --dryrun Any other models can be restored by run ID if stored with W&B. Go to files section under the run to find `*.pth` files -#### Extract Backbone +#### Extract and Convert Backbone ``` -BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth # method 1: from working dir CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth # method 2: from W&B, {projectid}/{W&B run id} -CHECKPOINT=hpt2/3l4yg63k +CHECKPOINT=hpt3/2brjqb28 # Extract the backbone -python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} +python classification/models/convert_moco_to_resnet50.py -i hpt3/2brjqb28 -o pretrained/moco ``` #### Evaluate the representations :confused: :question: diff --git a/references/model_architectures.md b/references/model_architectures.md index 464bede..ab54a31 100644 --- a/references/model_architectures.md +++ b/references/model_architectures.md @@ -3,4 +3,4 @@ - ResNet50: original ResNet50 used by sen12ms - Moco: original ResNet50 initialized the weight by Moco backbone - Moco_1x1: ResNet50_1x1 initialized the weight by Moco backbone and input module -- Moco_1x1random: ResNet50_1x1 randomly the weight by Moco backbone and input module +- Moco_1x1RND: ResNet50_1x1 randomly the weight by Moco backbone and input module diff --git a/references/setup.md b/references/setup.md deleted file mode 100644 index 567bd0a..0000000 --- a/references/setup.md +++ /dev/null @@ -1,150 +0,0 @@ - - -## (optional) GPU instance - -Use `Deep Learning AMI (Ubuntu 18.04) Version 40.0` AMI -- on us-west-2, ami-084f81625fbc98fa4 -- additional disk may be required for data - -Once logged in -``` -# update conda to the latest -conda update -n base conda - -conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch - -``` - -## Installation - -**Dependency repo** -- [modified OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) -- [modified SEN12MS](https://github.com/Berkeley-Data/SEN12MS) -- [modified irrigation_detection](https://github.com/Berkeley-Data/irrigation_detection) - -```bash -# clone dependency repo on the same levels as this repo and cd into this repo - -# setup environment -conda create -n hpt python=3.7 ipython -conda activate hpt - -# NOTE: if you are not using CUDA 10.2, you need to change the 10.2 in this command appropriately. Make sure to use torch 1.6.0 -# (check CUDA version with e.g. `cat /usr/local/cuda/version.txt`) - -# latest torch -conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch - -# 1.6 torch (no support for torchvision transform on tensor) -conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch - -#colorado machine -conda install pytorch==1.2.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch - -# install local submodules -cd OpenSelfSup -pip install -v -e . -``` - -## Data installation - -Installing and setting up all 16 datsets is a bit of work, so this tutorial shows how to install and setup RESISC-45, and provides links to repeat those steps with other datasets. - -### RESISC-45 -RESISC-45 contains 31,500 aerial images, covering 45 scene classes with 700 images in each class. - -``` shell -# cd to the directory where you want the data, $DATA -wget -q https://bit.ly/3pfkHYp -O resisc45.tar.gz -md5sum resisc45.tar.gz # this should be 964dafcfa2dff0402d0772514fb4540b -tar xf resisc45.tar.gz - -mkdir ~/data -mv resisc45 ~/data - -# replace/set $DATA and $CODE as appropriate -# e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/OpenSelfSup/data/resisc45/all -ln -s $DATA/resisc45 $CODE/OpenSelfSup/data/resisc45/all - -e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all -``` - -### Download Pretrained Models -``` shell -mkdir OpenSelfSup/data/basetrain_chkpts -tools/download-pretrained-models.sh -``` - -## Verify Install With RESISC DataSet -[OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) - -Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or [wandb](https://wandb.ai/cal-capstone/hpt): - - -```bash -export WANDB_API_KEY= -export WANDB_ENTITY=cal-capstone -export WANDB_PROJECT=hpt3 -#export WANDB_MODE=dryrun - - - - - -cd OpenSelfSup - -# Sanity check with single train and single epoch -CUDA_VISIBLE_DEVICES=x ./tools/single_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py --debug - -CUDA_VISIBLE_DEVICES=x ./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep.py --work_dir work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_1ep/ --debug - -# Sanity check: MoCo for 20 epoch on 4 gpus -CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py 4 - -# if debugging, use -tools/train.py configs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep.py --work_dir work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep/ --debug - -# make some variables so its clear what's happening -CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth -BACKBONE=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20_moco_in_basetrain.pth -# Extract the backbone -python tools/extract_backbone_weights.py ${BACKBONE} ${CHECKPOINT} - -# Evaluate the representations -./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} - -# View the results (optional if wandb is not configured) -cd work_dirs -# you may need to install tensorboard -tensorboard --logdir . -``` - - -## setup sub-modules for sen12ms and openselfsup repo - -Cloning -```console -git clone --recurse-submodules https://github.com/Berkeley-Data/hpt.git - -``` - -or alternatiely -``` -git submodule init -git submodule update -``` - -additional config -``` -git config push.recurseSubmodules on-demand -# show status including submodule -git config status.submodulesummary 1 -``` - -update -``` -git submodule update --remote -``` - -For mroe info: [7.11 Git Tools - Submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) - \ No newline at end of file From 5980c8d9f614ca123e88f686613303507a5fd64f Mon Sep 17 00:00:00 2001 From: taeil Date: Sat, 3 Apr 2021 19:19:07 -0700 Subject: [PATCH 41/59] updated references --- references/setup_pretraining.md | 125 ++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 references/setup_pretraining.md diff --git a/references/setup_pretraining.md b/references/setup_pretraining.md new file mode 100644 index 0000000..e9e2ae3 --- /dev/null +++ b/references/setup_pretraining.md @@ -0,0 +1,125 @@ + + +## (optional) GPU instance + +Use `Deep Learning AMI (Ubuntu 18.04) Version 40.0` AMI +- on us-west-2, ami-084f81625fbc98fa4 +- additional disk may be required for data + +Once logged in +``` +# update conda to the latest +conda update -n base conda + +conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch + +``` + +## Installation + +**Dependency repo** +- [modified OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) +- [modified SEN12MS](https://github.com/Berkeley-Data/SEN12MS) +- [modified irrigation_detection](https://github.com/Berkeley-Data/irrigation_detection) + +```bash +# clone dependency repo on the same levels as this repo and cd into this repo + +# setup environment +conda create -n hpt python=3.7 ipython +conda activate hpt + +# NOTE: if you are not using CUDA 10.2, you need to change the 10.2 in this command appropriately. Make sure to use torch 1.6.0 +# (check CUDA version with e.g. `cat /usr/local/cuda/version.txt`) + +# latest torch +conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch + +# 1.6 torch (no support for torchvision transform on tensor) +conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch + +#llano machine +conda install pytorch==1.2.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch + +# install local submodules +cd OpenSelfSup +pip install -v -e . +``` + +## Data installation + +Installing and setting up all 16 datsets is a bit of work, so this tutorial shows how to install and setup RESISC-45, and provides links to repeat those steps with other datasets. + +### RESISC-45 +RESISC-45 contains 31,500 aerial images, covering 45 scene classes with 700 images in each class. + +``` shell +# cd to the directory where you want the data, $DATA +wget -q https://bit.ly/3pfkHYp -O resisc45.tar.gz +md5sum resisc45.tar.gz # this should be 964dafcfa2dff0402d0772514fb4540b +tar xf resisc45.tar.gz + +mkdir ~/data +mv resisc45 ~/data + +# replace/set $DATA and $CODE as appropriate +# e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/OpenSelfSup/data/resisc45/all +ln -s $DATA/resisc45 $CODE/OpenSelfSup/data/resisc45/all + +e.g., ln -s /home/ubuntu/data/resisc45 /home/ubuntu/hpt/OpenSelfSup/data/resisc45/all +``` + +### Download Pretrained Models +``` shell +tools/download-pretrained-models.sh +mkdir OpenSelfSup/data/basetrain_chkpts +mv +``` + +## Verify Install With RESISC DataSet +[OpenSelfSup](https://github.com/Berkeley-Data/OpenSelfSup) + +Check installation by pretraining using mocov2, extracting the model weights, evaluating the representations, and then viewing the results on tensorboard or [wandb](https://wandb.ai/cal-capstone/hpt): + + +```bash +cd OpenSelfSup + +CUDA_VISIBLE_DEVICES=0,1,2,3 + +# Sanity check with single train and single epoch +./tools/single_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_1ep.py --debug + +# Sanity check: MoCo for 20 epoch on 4 gpus + ./tools/dist_train.sh configs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep.py 4 +``` + + +## setup sub-modules for sen12ms and openselfsup repo + +Cloning +```console +git clone --recurse-submodules https://github.com/Berkeley-Data/hpt.git + +``` + +or alternatiely +``` +git submodule init +git submodule update +``` + +additional config +``` +git config push.recurseSubmodules on-demand +# show status including submodule +git config status.submodulesummary 1 +``` + +update +``` +git submodule update --remote +``` + +For mroe info: [7.11 Git Tools - Submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) + \ No newline at end of file From 55d312ecc6371e5d3b5f5f3f9ada00de286326b7 Mon Sep 17 00:00:00 2001 From: taeil Date: Sat, 3 Apr 2021 19:26:30 -0700 Subject: [PATCH 42/59] updated references --- README.md | 19 +++++++++++++++++-- references/evaluation.md | 31 ------------------------------- 2 files changed, 17 insertions(+), 33 deletions(-) delete mode 100644 references/evaluation.md diff --git a/README.md b/README.md index 68c22bb..c6d9322 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ export WANDB_ENTITY=cal-capstone export WANDB_PROJECT=hpt4 ``` -Run pre-training +#### Run pre-training ```bash cd OpenSelfSup @@ -96,7 +96,22 @@ cd OpenSelfSup /tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 ``` -Extract pre-trained model +#### (OPTIONAL) download pre-trained models + +Some of key pre-trained models are on s3 (s3://sen12ms/pretrained): +- [200 epochs w/o augmentation: vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p/overview?workspace=user-cjrd) +- [20 epochs w/o augmentation: silvery-oath7-2rr3864e](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e?workspace=user-taeil) +- [sen12ms-baseline: soft-snowflake-3.pth](https://wandb.ai/cal-capstone/SEN12MS/runs/3gjhe4ff/overview?workspace=user-taeil) + +``` +aws configure +aws s3 sync s3://sen12ms/pretrained . --dryrun +aws s3 sync s3://sen12ms/pretrained_sup . --dryrun +``` + +#### Extract pre-trained model +Any other models can be restored by run ID if stored with W&B. Go to files section under the run to find `*.pth` files + ```bash BACKBONE=work_dirs/selfsup/moco/r50_v2_sen12ms_in_basetrain_20ep/epoch_20_moco_in_baseline.pth diff --git a/references/evaluation.md b/references/evaluation.md deleted file mode 100644 index 76b1190..0000000 --- a/references/evaluation.md +++ /dev/null @@ -1,31 +0,0 @@ -## download pre-trained models - -Some of key pre-trained models are on s3 (s3://sen12ms/pretrained): -- [200 epochs w/o augmentation: vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p/overview?workspace=user-cjrd) -- [20 epochs w/o augmentation: silvery-oath7-2rr3864e](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e?workspace=user-taeil) -- [sen12ms-baseline: soft-snowflake-3.pth](https://wandb.ai/cal-capstone/SEN12MS/runs/3gjhe4ff/overview?workspace=user-taeil) - -``` -aws configure -aws s3 sync s3://sen12ms/pretrained . --dryrun -aws s3 sync s3://sen12ms/pretrained_sup . --dryrun -``` - -Any other models can be restored by run ID if stored with W&B. Go to files section under the run to find `*.pth` files - -#### Extract and Convert Backbone -``` -# method 1: from working dir -CHECKPOINT=work_dirs/selfsup/moco/r50_v2_resisc_in_basetrain_20ep/epoch_20.pth -# method 2: from W&B, {projectid}/{W&B run id} -CHECKPOINT=hpt3/2brjqb28 - -# Extract the backbone -python classification/models/convert_moco_to_resnet50.py -i hpt3/2brjqb28 -o pretrained/moco -``` - -#### Evaluate the representations :confused: :question: - -``` -./benchmarks/dist_train_linear.sh configs/benchmarks/linear_classification/resisc45/r50_last.py ${BACKBONE} -``` \ No newline at end of file From e6b64f8dc20b61f283c0ffc436b9150d3dacc2e2 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Sun, 4 Apr 2021 20:43:57 -0400 Subject: [PATCH 43/59] add tmp file for verifying mean and std --- src/data/calc_mean_std.py | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/data/calc_mean_std.py diff --git a/src/data/calc_mean_std.py b/src/data/calc_mean_std.py new file mode 100644 index 0000000..1a7f441 --- /dev/null +++ b/src/data/calc_mean_std.py @@ -0,0 +1,61 @@ + +import torch +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from torch.utils.data import DataLoader + +data_dir = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0' +# data_dir = '/home/cjrd/data/sen12ms_x' + + +# load data +dataset = datasets.ImageFolder(data_dir, transform=transforms.ToTensor()) +data_loader = DataLoader(dataset=dataset, batch_size=512, num_workers=30, shuffle=True) + +def calc_mean_std(loader): + # var --> std + channel_sum, channel_sq_sum, num_batch = 0, 0, 0 + + for data, _ in loader: + # b x c x w x h + channel_sum += torch.mean(data, dim=[0, 2, 3]) + channel_sq_sum += torch.mean(data**2, dim=[0, 2, 3]) + num_batch += 1 + + mean = channel_sum/ num_batch + std = (channel_sq_sum/ num_batch - mean**2)**0.5 + + return mean, std + +mean, std = calc_mean_std(data_loader) +print(mean) +print(std) + + +### back calc band mean and std +# def back_calc(loader): +# +# channel_sum, channel_sq_sum, num_batch = 0, 0, 0 +# for data, _ in loader: +# x1 = data[0, 0] +# print('x1', x1.shape) +# channel_sum += torch.mean(x1, dim=[0, 1]) +# channel_sq_sum += torch.mean(x1 ** 2, dim=[0, 1]) +# num_batch += 1 +# +# mean = channel_sum / num_batch +# std = (channel_sq_sum / num_batch - mean ** 2) ** 0.5 +# +# return mean, std +# +# mean, std = calc_mean_std(data_loader) +# print(mean) +# print(std) +# + + + + + + + From 6a9185a5799fb7a2ddf9f7c545ba45496a59aad7 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Tue, 6 Apr 2021 19:07:39 -0400 Subject: [PATCH 44/59] add bigearthnet custom dataset class and calc dataset info script --- src/data/calc_mean_std.py | 61 ---- src/data/clac_personal_check.py | 351 ++++++++++++++++++++++ src/data/dataset_band_info_BigEarthNet.py | 257 ++++++++++++++++ 3 files changed, 608 insertions(+), 61 deletions(-) delete mode 100644 src/data/calc_mean_std.py create mode 100644 src/data/clac_personal_check.py create mode 100644 src/data/dataset_band_info_BigEarthNet.py diff --git a/src/data/calc_mean_std.py b/src/data/calc_mean_std.py deleted file mode 100644 index 1a7f441..0000000 --- a/src/data/calc_mean_std.py +++ /dev/null @@ -1,61 +0,0 @@ - -import torch -import torchvision.datasets as datasets -import torchvision.transforms as transforms -from torch.utils.data import DataLoader - -data_dir = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0' -# data_dir = '/home/cjrd/data/sen12ms_x' - - -# load data -dataset = datasets.ImageFolder(data_dir, transform=transforms.ToTensor()) -data_loader = DataLoader(dataset=dataset, batch_size=512, num_workers=30, shuffle=True) - -def calc_mean_std(loader): - # var --> std - channel_sum, channel_sq_sum, num_batch = 0, 0, 0 - - for data, _ in loader: - # b x c x w x h - channel_sum += torch.mean(data, dim=[0, 2, 3]) - channel_sq_sum += torch.mean(data**2, dim=[0, 2, 3]) - num_batch += 1 - - mean = channel_sum/ num_batch - std = (channel_sq_sum/ num_batch - mean**2)**0.5 - - return mean, std - -mean, std = calc_mean_std(data_loader) -print(mean) -print(std) - - -### back calc band mean and std -# def back_calc(loader): -# -# channel_sum, channel_sq_sum, num_batch = 0, 0, 0 -# for data, _ in loader: -# x1 = data[0, 0] -# print('x1', x1.shape) -# channel_sum += torch.mean(x1, dim=[0, 1]) -# channel_sq_sum += torch.mean(x1 ** 2, dim=[0, 1]) -# num_batch += 1 -# -# mean = channel_sum / num_batch -# std = (channel_sq_sum / num_batch - mean ** 2) ** 0.5 -# -# return mean, std -# -# mean, std = calc_mean_std(data_loader) -# print(mean) -# print(std) -# - - - - - - - diff --git a/src/data/clac_personal_check.py b/src/data/clac_personal_check.py new file mode 100644 index 0000000..1a380dc --- /dev/null +++ b/src/data/clac_personal_check.py @@ -0,0 +1,351 @@ + +###### +import glob +import os +s1 = [] +for file in [os.path.basename(x) for x in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*")]: + s1.append(file) + +### +s1_vh = [] +for file in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*"): + for img in glob.glob(file+'/*_VH.tif'): + s1_vh.append(img) + +s1_vv = [] +for file in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*"): + for img in glob.glob(file+'/*_VV.tif'): + s1_vv.append(img) + +import pickle +with open("s1_vh_list.pkl", 'wb') as f: + pickle.dump(s1_vh, f) + +import pickle +with open("s1_vv_list.pkl", 'wb') as f: + pickle.dump(s1_vv, f) + +### +import pickle +with open("s1_list.pkl", 'wb') as f: + pickle.dump(s1, f) + + +import pickle +with open('s1_list.pkl', 'rb') as f: + s1_list = pickle.load(f) + +#'/home/taeil/hpt_k/src/data/' +# 590362 + +###### + +###### +import argparse + + +###### + +import torch +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from torch.utils.data import DataLoader, Dataset + + +data_dir = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0' + +# data_dir = '/home/taeil/SEN12MS/data/bigearthnet/S1' + +# data_dir = '/home/cjrd/data/sen12ms_x' + +data_transforms = transforms.Compose([ + transforms.ToTensor() +]) + +# load data +dataset = datasets.ImageFolder(data_dir, transform= data_transforms) +data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) + +def calc_mean_std(loader): + # var --> std + channel_sum, channel_sq_sum, num_batch = 0, 0, 0 + + for data, _in loader: + # b x c x h x w + channel_sum += torch.mean(data, dim=[0, 2, 3]) + channel_sq_sum += torch.mean(data**2, dim=[0, 2, 3]) + num_batch += 1 + + mean = channel_sum/ num_batch + std = (channel_sq_sum/ num_batch - mean**2)**0.5 + + return mean, std + +mean, std = calc_mean_std(data_loader) +print(mean) +print(std) + + +### new check vh, vv +def calc_mean_std(loader): + # var --> std + channel_sum, channel_sq_sum, num_batch = 0, 0, 0 + for data in loader: + # b x c x h x w + x1 = data[:, :, :, :] + channel_sum += torch.mean(x1, dim=[0, 2, 3]) + channel_sq_sum += torch.mean(x1**2, dim=[0, 2, 3]) + num_batch += 1 + + mean = channel_sum/ num_batch + std = (channel_sq_sum/ num_batch - mean**2)**0.5 + + return mean, std + +mean, std = calc_mean_std(data_loader) +print(mean) +print(std) + + +### +data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) +for data, _ in data_loader: + print(data.shape) + + +###### +###### +def main(self): + print("\n\nbigearth data") + + import torch + import torchvision.datasets as datasets + import torchvision.transforms as transforms + from torch.utils.data import DataLoader, Dataset + + + import os + import pickle as pkl + from tqdm import tqdm + import rasterio + import numpy as np + + path = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/' + data_index_dir = '/home/taeil/hpt_k/src/data' + + data_transforms = transforms.Compose([ + ToTensor() + ]) + + dataset = bigearthnet(path, data_index_dir, imgTransform=data_transforms, + use_s1=True, use_s2=False, use_RGB=True) + + dataset = bigearthnet(path, data_index_dir, imgTransform=False, + use_s1=True, use_s2=False, use_RGB=True) + + # s_nor = ds.__getitem__(10) + # print("id:", s_nor["id"], "\n", + # "input shape:", s_nor["image"].shape) + + # + + data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) + for data in data_loader: + print(data.shape) + + + +### + +# util function for reading s2 data +def load_s2(path, imgTransform, s2_band): + bands_selected = s2_band + with rasterio.open(path) as data: + s2 = data.read(bands_selected) + s2 = s2.astype(np.float32) + if not imgTransform: + s2 = np.clip(s2, 0, 10000) + s2 /= 10000 + s2 = s2.astype(np.float32) + return s2 + +# util function for reading s1 data +def load_s1(path, imgTransform): + with rasterio.open(path) as data: + s1 = data.read() + s1 = s1.astype(np.float32) + s1 = np.nan_to_num(s1) + s1 = np.clip(s1, -25, 0) + if not imgTransform: + s1 /= 25 + s1 += 1 + s1 = s1.astype(np.float32) + return s1 + +# util function for reading data from single sample +def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): + # # load s2 data + # if use_s2: + # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD) + # # load only RGB + # if use_RGB and use_s2 == False: + # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB) + # + # # load s1 data + # if use_s1: + # if use_s2 or use_RGB: + # img = np.concatenate((img, load_s1(sample["s1"], imgTransform)), axis=0) + # else: + # img = load_s1(sample["s1"], imgTransform) + + img = load_s1(sample["s1"], imgTransform) + # print(sample['id']) + # print(img) + + + # load label + # lc = labels[sample["id"]] + + # covert label to IGBP simplified scheme + # if IGBP_s: + # cls1 = sum(lc[0:5]); + # cls2 = sum(lc[5:7]); + # cls3 = sum(lc[7:9]); + # cls6 = lc[11] + lc[13]; + # lc = np.asarray([cls1, cls2, cls3, lc[9], lc[10], cls6, lc[12], lc[14], lc[15], lc[16]]) + + # if label_type == "multi_label": + # lc_hot = (lc >= threshold).astype(np.float32) + # else: + # loc = np.argmax(lc, axis=-1) + # lc_hot = np.zeros_like(lc).astype(np.float32) + # lc_hot[loc] = 1 + + # rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]} + + rt_sample = {'image': img, 'id': sample["id"]} + # print(rt_sample['image']) + # print(rt_sample) + + # if imgTransform is not None: + # rt_sample = imgTransform(rt_sample) + + return rt_sample['image'] + + +# calculate number of input channels +def get_ninputs(use_s1, use_s2, use_RGB): + n_inputs = 0 + if use_s2: + n_inputs += len(S2_BANDS_LD) + if use_s1: + n_inputs += 2 + if use_RGB and use_s2 == False: + n_inputs += 3 + + return n_inputs + + +class ToTensor(object): + """Convert ndarrays in sample to Tensors.""" + + def __call__(self, rt_sample): + img, sample_id = rt_sample['image'], rt_sample['id'] + + rt_sample = {'image': torch.tensor(img), 'id': sample_id} + return rt_sample + + +### write a class +class bigearthnet(Dataset): + """PyTorch dataset class for the bigearth dataset""" + + # expects dataset dir as: + # - SEN12MS_holdOutScenes.txt + # - ROIsxxxx_y + # - lc_n + # - s1_n + # - s2_n + # + # SEN12SEN12MS_holdOutScenes.txt contains the subdirs for the official + # train/val/test split and can be obtained from: + # https://github.com/MSchmitt1984/SEN12MS/ + + def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=False, use_s1=False, use_RGB=False): + """Initialize the dataset""" + + # inizialize + super(bigearthnet, self).__init__() + self.imgTransform = imgTransform + + # make sure input parameters are okay + if not (use_s2 or use_s1 or use_RGB): + raise ValueError("No input specified, set at least one of " + + "use_[s2, s1, RGB] to True!") + self.use_s2 = use_s2 + self.use_s1 = use_s1 + self.use_RGB = use_RGB + + # provide number of input channels + self.n_inputs = get_ninputs(use_s1, use_s2, use_RGB) + + # make sure parent dir exists + assert os.path.exists(path) + # assert os.path.exists(ls_dir) + + self.samples = [] + # file = os.path.join(data_index_dir, 's1_list.pkl') + file = os.path.join(data_index_dir, 's1_vh_list.pkl') + # file = os.path.join(data_index_dir, 's1_vv_list.pkl') + print(file) + sample_list = pkl.load(open(file, "rb")) + # print(sample_list) + + pbar = tqdm(total=len(sample_list)) # 18106 samples in test set + pbar.set_description("[Load]") + + pbar.set_description("[Load]") + + # for s2_id in sample_list: + # mini_name = s2_id.split("_") + # s2_loc = os.path.join(path, (mini_name[0] + '_' + mini_name[1]), + # (mini_name[2] + '_' + mini_name[3]), s2_id) + # s1_loc = s2_loc.replace("_s2_", "_s1_").replace("s2_", "s1_") + # + # pbar.update() + # self.samples.append({"s1": s1_loc, "s2": s2_loc, + # "id": s2_id}) + + ###### + for i, name in enumerate(sample_list): + self.samples.append({"id": i, "s1": name}) + + + pbar.close() + # ---------------------------------------------------------------------- + + # sort list of samples + self.samples = sorted(self.samples, key=lambda i: i['id']) + + print(f"loaded {len(self.samples)} from {path}") + + # import lables as a dictionary + # label_file = os.path.join(ls_dir,'IGBP_probability_labels.pkl') + + # a_file = open(label_file, "rb") + # self.labels = pkl.load(a_file) + # a_file.close() + + def __getitem__(self, index): + """Get a single example from the dataset""" + + # get and load sample from index file + sample = self.samples[index] + # labels = self.labels + return load_sample(sample, self.imgTransform, self.use_s1, self.use_s2, self.use_RGB) + + def __len__(self): + """Get number of samples in the dataset""" + return len(self.samples) + + diff --git a/src/data/dataset_band_info_BigEarthNet.py b/src/data/dataset_band_info_BigEarthNet.py new file mode 100644 index 0000000..5dd4cb9 --- /dev/null +++ b/src/data/dataset_band_info_BigEarthNet.py @@ -0,0 +1,257 @@ + +import argparse +import os +import pickle as pkl +from tqdm import tqdm +import rasterio +import numpy as np +import glob + +import torch +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from torch.utils.data import DataLoader, Dataset + + +parser = argparse.ArgumentParser(description='Compute image statistics from ImageFolder') +parser.add_argument('--numworkers', type=int, default=30) +parser.add_argument('--batchsize', type=int, default=1) + +# data dir path and index dir +parser.add_argument('--path', type=str, default=None, + help='path to BigEarthNet dataset') +parser.add_argument('--data_index_dir', type=str, default=None, + help="path to generated data list") + +# data modality +parser.add_argument('--use_s1', action='store_true', default=True, + help='use sentinel-1 data') +parser.add_argument('--use_s2', action='store_true', default=False, + help='use sentinel-2 bands') +parser.add_argument('--use_RGB', action='store_true', default=False, + help='use sentinel-2 RGB bands') + +# band (channel) +parser.add_argument('--band', type=str, default=None, + help='band(channel) name from BigEarthNet dataset') + + +### new check vh, vv +def calc_mean_std(loader): + # var --> std + channel_sum, channel_sq_sum, num_batch = 0, 0, 0 + for data in loader: + # b x c x h x w + x1 = data[:, :, :, :] + channel_sum += torch.mean(x1, dim=[0, 2, 3]) + channel_sq_sum += torch.mean(x1**2, dim=[0, 2, 3]) + num_batch += 1 + + mean = channel_sum/ num_batch + std = (channel_sq_sum/ num_batch - mean**2)**0.5 + + return mean, std + + +# util function for reading s1 data +def load_s1(path, imgTransform): + """util to load s1 data + """ + with rasterio.open(path) as data: + s1 = data.read() + s1 = s1.astype(np.float32) + s1 = np.nan_to_num(s1) + s1 = np.clip(s1, -25, 0) + if not imgTransform: + s1 /= 25 + s1 += 1 + s1 = s1.astype(np.float32) + return s1 + +# util function for reading s2 data +def load_s2(path, imgTransform, s2_band): + """wip + """ + # bands_selected = s2_band + # with rasterio.open(path) as data: + # s2 = data.read(bands_selected) + # s2 = s2.astype(np.float32) + # if not imgTransform: + # s2 = np.clip(s2, 0, 10000) + # s2 /= 10000 + # s2 = s2.astype(np.float32) + # return s2 + + +# util function for reading data from single sample +def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): + """loading sample data + """ + # # load s2 data + # if use_s2: + # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD) + # # load only RGB + # if use_RGB and use_s2 == False: + # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB) + # + # # load s1 data + # if use_s1: + # if use_s2 or use_RGB: + # img = np.concatenate((img, load_s1(sample["s1"], imgTransform)), axis=0) + # else: + # img = load_s1(sample["s1"], imgTransform) + + if use_s1: + img = load_s1(sample["s1"], imgTransform) + # print(sample['id']) + # print(img) + + + # load label + # lc = labels[sample["id"]] + + # covert label to IGBP simplified scheme + # if IGBP_s: + # cls1 = sum(lc[0:5]); + # cls2 = sum(lc[5:7]); + # cls3 = sum(lc[7:9]); + # cls6 = lc[11] + lc[13]; + # lc = np.asarray([cls1, cls2, cls3, lc[9], lc[10], cls6, lc[12], lc[14], lc[15], lc[16]]) + + # if label_type == "multi_label": + # lc_hot = (lc >= threshold).astype(np.float32) + # else: + # loc = np.argmax(lc, axis=-1) + # lc_hot = np.zeros_like(lc).astype(np.float32) + # lc_hot[loc] = 1 + + # rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]} + + rt_sample = {'image': img, 'id': sample["id"]} + # print(rt_sample['image']) + # print(rt_sample) + + # if imgTransform is not None: + # rt_sample = imgTransform(rt_sample) + + return rt_sample['image'] + + +# calculate number of input channels +def get_ninputs(use_s1, use_s2, use_RGB): + n_inputs = 0 + if use_s2: + n_inputs += len(S2_BANDS_LD) + if use_s1: + n_inputs += 2 + if use_RGB and use_s2 == False: + n_inputs += 3 + + return n_inputs + + +class ToTensor(object): + """Convert ndarrays in sample to Tensors.""" + + def __call__(self, rt_sample): + img, sample_id = rt_sample['image'], rt_sample['id'] + + rt_sample = {'image': torch.tensor(img), 'id': sample_id} + return rt_sample + + +### write a class +class bigearthnet(Dataset): + """pytorch dataset class custom for BigEarthNet + """ + + def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=False, use_s1=False, use_RGB=False, band=None): + """Initialize the dataset + """ + + # initialize + super(bigearthnet, self).__init__() + self.imgTransform = imgTransform + + # make sure input parameters are okay + if not (use_s2 or use_s1 or use_RGB): + raise ValueError("No input specified, set at least one of " + + "use_[s2, s1, RGB] to True!") + self.use_s2 = use_s2 + self.use_s1 = use_s1 + self.use_RGB = use_RGB + + # provide number of input channels + self.n_inputs = get_ninputs(use_s1, use_s2, use_RGB) + + # make sure parent dir exists + # assert os.path.exists(path) + # assert os.path.exists(ls_dir) + + self.samples = [] + # file = os.path.join(data_index_dir, 's1_list.pkl') + + if band == 'VV': + file = os.path.join(data_index_dir, 's1_vv_list.pkl') + elif band == 'VH': + file = os.path.join(data_index_dir, 's1_vh_list.pkl') + + sample_list = pkl.load(open(file, "rb")) + + pbar = tqdm(total=len(sample_list)) + pbar.set_description("[Load]") + + for i, name in enumerate(sample_list): + self.samples.append({"id": i, "s1": name}) + pbar.update() + + pbar.close() + # ---------------------------------------------------------------------- + + # sort list of samples + # self.samples = sorted(self.samples, key=lambda i: i['id']) + # + # print(f"loaded {len(self.samples)} from {path}") + + # import lables as a dictionary + # label_file = os.path.join(ls_dir,'IGBP_probability_labels.pkl') + + # a_file = open(label_file, "rb") + # self.labels = pkl.load(a_file) + # a_file.close() + + def __getitem__(self, index): + """Get a single example from the dataset""" + + # get and load sample from index file + sample = self.samples[index] + # labels = self.labels + return load_sample(sample, self.imgTransform, self.use_s1, self.use_s2, self.use_RGB) + + def __len__(self): + """Get number of samples in the dataset""" + return len(self.samples) + + +def main(args): + # path = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/' + # data_index_dir = '/home/taeil/hpt_k/src/data' + + data_transforms = transforms.Compose([ + ToTensor() + ]) + + dataset = bigearthnet(path=args.path, data_index_dir=args.data_index_dir, imgTransform=data_transforms,\ + use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB, band=args.band) + + data_loader = DataLoader(dataset=dataset, batch_size=args.batchsize, num_workers=args.numworkers, shuffle=True) + + # calc mean and std for the dataset + mean, std = calc_mean_std(data_loader) + print('mean:{}, std:{}'.format(mean[0], std[0])) + + + +if __name__ == "__main__": + main(parser.parse_args()) + From 4049792a1b13681e99045732e8825c30b4acbf4f Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Tue, 6 Apr 2021 19:11:25 -0400 Subject: [PATCH 45/59] rm personal checkup file --- src/data/clac_personal_check.py | 351 -------------------------------- 1 file changed, 351 deletions(-) delete mode 100644 src/data/clac_personal_check.py diff --git a/src/data/clac_personal_check.py b/src/data/clac_personal_check.py deleted file mode 100644 index 1a380dc..0000000 --- a/src/data/clac_personal_check.py +++ /dev/null @@ -1,351 +0,0 @@ - -###### -import glob -import os -s1 = [] -for file in [os.path.basename(x) for x in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*")]: - s1.append(file) - -### -s1_vh = [] -for file in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*"): - for img in glob.glob(file+'/*_VH.tif'): - s1_vh.append(img) - -s1_vv = [] -for file in glob.glob("/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/*"): - for img in glob.glob(file+'/*_VV.tif'): - s1_vv.append(img) - -import pickle -with open("s1_vh_list.pkl", 'wb') as f: - pickle.dump(s1_vh, f) - -import pickle -with open("s1_vv_list.pkl", 'wb') as f: - pickle.dump(s1_vv, f) - -### -import pickle -with open("s1_list.pkl", 'wb') as f: - pickle.dump(s1, f) - - -import pickle -with open('s1_list.pkl', 'rb') as f: - s1_list = pickle.load(f) - -#'/home/taeil/hpt_k/src/data/' -# 590362 - -###### - -###### -import argparse - - -###### - -import torch -import torchvision.datasets as datasets -import torchvision.transforms as transforms -from torch.utils.data import DataLoader, Dataset - - -data_dir = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0' - -# data_dir = '/home/taeil/SEN12MS/data/bigearthnet/S1' - -# data_dir = '/home/cjrd/data/sen12ms_x' - -data_transforms = transforms.Compose([ - transforms.ToTensor() -]) - -# load data -dataset = datasets.ImageFolder(data_dir, transform= data_transforms) -data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) - -def calc_mean_std(loader): - # var --> std - channel_sum, channel_sq_sum, num_batch = 0, 0, 0 - - for data, _in loader: - # b x c x h x w - channel_sum += torch.mean(data, dim=[0, 2, 3]) - channel_sq_sum += torch.mean(data**2, dim=[0, 2, 3]) - num_batch += 1 - - mean = channel_sum/ num_batch - std = (channel_sq_sum/ num_batch - mean**2)**0.5 - - return mean, std - -mean, std = calc_mean_std(data_loader) -print(mean) -print(std) - - -### new check vh, vv -def calc_mean_std(loader): - # var --> std - channel_sum, channel_sq_sum, num_batch = 0, 0, 0 - for data in loader: - # b x c x h x w - x1 = data[:, :, :, :] - channel_sum += torch.mean(x1, dim=[0, 2, 3]) - channel_sq_sum += torch.mean(x1**2, dim=[0, 2, 3]) - num_batch += 1 - - mean = channel_sum/ num_batch - std = (channel_sq_sum/ num_batch - mean**2)**0.5 - - return mean, std - -mean, std = calc_mean_std(data_loader) -print(mean) -print(std) - - -### -data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) -for data, _ in data_loader: - print(data.shape) - - -###### -###### -def main(self): - print("\n\nbigearth data") - - import torch - import torchvision.datasets as datasets - import torchvision.transforms as transforms - from torch.utils.data import DataLoader, Dataset - - - import os - import pickle as pkl - from tqdm import tqdm - import rasterio - import numpy as np - - path = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/' - data_index_dir = '/home/taeil/hpt_k/src/data' - - data_transforms = transforms.Compose([ - ToTensor() - ]) - - dataset = bigearthnet(path, data_index_dir, imgTransform=data_transforms, - use_s1=True, use_s2=False, use_RGB=True) - - dataset = bigearthnet(path, data_index_dir, imgTransform=False, - use_s1=True, use_s2=False, use_RGB=True) - - # s_nor = ds.__getitem__(10) - # print("id:", s_nor["id"], "\n", - # "input shape:", s_nor["image"].shape) - - # - - data_loader = DataLoader(dataset=dataset, batch_size=256, num_workers=30, shuffle=True) - for data in data_loader: - print(data.shape) - - - -### - -# util function for reading s2 data -def load_s2(path, imgTransform, s2_band): - bands_selected = s2_band - with rasterio.open(path) as data: - s2 = data.read(bands_selected) - s2 = s2.astype(np.float32) - if not imgTransform: - s2 = np.clip(s2, 0, 10000) - s2 /= 10000 - s2 = s2.astype(np.float32) - return s2 - -# util function for reading s1 data -def load_s1(path, imgTransform): - with rasterio.open(path) as data: - s1 = data.read() - s1 = s1.astype(np.float32) - s1 = np.nan_to_num(s1) - s1 = np.clip(s1, -25, 0) - if not imgTransform: - s1 /= 25 - s1 += 1 - s1 = s1.astype(np.float32) - return s1 - -# util function for reading data from single sample -def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): - # # load s2 data - # if use_s2: - # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD) - # # load only RGB - # if use_RGB and use_s2 == False: - # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB) - # - # # load s1 data - # if use_s1: - # if use_s2 or use_RGB: - # img = np.concatenate((img, load_s1(sample["s1"], imgTransform)), axis=0) - # else: - # img = load_s1(sample["s1"], imgTransform) - - img = load_s1(sample["s1"], imgTransform) - # print(sample['id']) - # print(img) - - - # load label - # lc = labels[sample["id"]] - - # covert label to IGBP simplified scheme - # if IGBP_s: - # cls1 = sum(lc[0:5]); - # cls2 = sum(lc[5:7]); - # cls3 = sum(lc[7:9]); - # cls6 = lc[11] + lc[13]; - # lc = np.asarray([cls1, cls2, cls3, lc[9], lc[10], cls6, lc[12], lc[14], lc[15], lc[16]]) - - # if label_type == "multi_label": - # lc_hot = (lc >= threshold).astype(np.float32) - # else: - # loc = np.argmax(lc, axis=-1) - # lc_hot = np.zeros_like(lc).astype(np.float32) - # lc_hot[loc] = 1 - - # rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]} - - rt_sample = {'image': img, 'id': sample["id"]} - # print(rt_sample['image']) - # print(rt_sample) - - # if imgTransform is not None: - # rt_sample = imgTransform(rt_sample) - - return rt_sample['image'] - - -# calculate number of input channels -def get_ninputs(use_s1, use_s2, use_RGB): - n_inputs = 0 - if use_s2: - n_inputs += len(S2_BANDS_LD) - if use_s1: - n_inputs += 2 - if use_RGB and use_s2 == False: - n_inputs += 3 - - return n_inputs - - -class ToTensor(object): - """Convert ndarrays in sample to Tensors.""" - - def __call__(self, rt_sample): - img, sample_id = rt_sample['image'], rt_sample['id'] - - rt_sample = {'image': torch.tensor(img), 'id': sample_id} - return rt_sample - - -### write a class -class bigearthnet(Dataset): - """PyTorch dataset class for the bigearth dataset""" - - # expects dataset dir as: - # - SEN12MS_holdOutScenes.txt - # - ROIsxxxx_y - # - lc_n - # - s1_n - # - s2_n - # - # SEN12SEN12MS_holdOutScenes.txt contains the subdirs for the official - # train/val/test split and can be obtained from: - # https://github.com/MSchmitt1984/SEN12MS/ - - def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=False, use_s1=False, use_RGB=False): - """Initialize the dataset""" - - # inizialize - super(bigearthnet, self).__init__() - self.imgTransform = imgTransform - - # make sure input parameters are okay - if not (use_s2 or use_s1 or use_RGB): - raise ValueError("No input specified, set at least one of " - + "use_[s2, s1, RGB] to True!") - self.use_s2 = use_s2 - self.use_s1 = use_s1 - self.use_RGB = use_RGB - - # provide number of input channels - self.n_inputs = get_ninputs(use_s1, use_s2, use_RGB) - - # make sure parent dir exists - assert os.path.exists(path) - # assert os.path.exists(ls_dir) - - self.samples = [] - # file = os.path.join(data_index_dir, 's1_list.pkl') - file = os.path.join(data_index_dir, 's1_vh_list.pkl') - # file = os.path.join(data_index_dir, 's1_vv_list.pkl') - print(file) - sample_list = pkl.load(open(file, "rb")) - # print(sample_list) - - pbar = tqdm(total=len(sample_list)) # 18106 samples in test set - pbar.set_description("[Load]") - - pbar.set_description("[Load]") - - # for s2_id in sample_list: - # mini_name = s2_id.split("_") - # s2_loc = os.path.join(path, (mini_name[0] + '_' + mini_name[1]), - # (mini_name[2] + '_' + mini_name[3]), s2_id) - # s1_loc = s2_loc.replace("_s2_", "_s1_").replace("s2_", "s1_") - # - # pbar.update() - # self.samples.append({"s1": s1_loc, "s2": s2_loc, - # "id": s2_id}) - - ###### - for i, name in enumerate(sample_list): - self.samples.append({"id": i, "s1": name}) - - - pbar.close() - # ---------------------------------------------------------------------- - - # sort list of samples - self.samples = sorted(self.samples, key=lambda i: i['id']) - - print(f"loaded {len(self.samples)} from {path}") - - # import lables as a dictionary - # label_file = os.path.join(ls_dir,'IGBP_probability_labels.pkl') - - # a_file = open(label_file, "rb") - # self.labels = pkl.load(a_file) - # a_file.close() - - def __getitem__(self, index): - """Get a single example from the dataset""" - - # get and load sample from index file - sample = self.samples[index] - # labels = self.labels - return load_sample(sample, self.imgTransform, self.use_s1, self.use_s2, self.use_RGB) - - def __len__(self): - """Get number of samples in the dataset""" - return len(self.samples) - - From ce4e4c1866bb28d4571ae3f5cae4a0290c0af41d Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Tue, 6 Apr 2021 19:31:54 -0400 Subject: [PATCH 46/59] add desc --- src/data/dataset_band_info_BigEarthNet.py | 37 ++++++++++++----------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/data/dataset_band_info_BigEarthNet.py b/src/data/dataset_band_info_BigEarthNet.py index 5dd4cb9..3c62964 100644 --- a/src/data/dataset_band_info_BigEarthNet.py +++ b/src/data/dataset_band_info_BigEarthNet.py @@ -1,4 +1,13 @@ +""" +- Tsung-Chin Han (Ken)- + +- Descritions - +This script is to help calculate for each of the band mean and standard deviation for +BigEarthNet dataset. Note that this custom load does not stack all channels together. +Instead, by specifying the channel name, it helps compute the band mean and standard deviation. +""" + import argparse import os import pickle as pkl @@ -12,32 +21,29 @@ import torchvision.transforms as transforms from torch.utils.data import DataLoader, Dataset - -parser = argparse.ArgumentParser(description='Compute image statistics from ImageFolder') +parser = argparse.ArgumentParser(description='Compute band image mean and stdev from BigEarthNet class') parser.add_argument('--numworkers', type=int, default=30) parser.add_argument('--batchsize', type=int, default=1) -# data dir path and index dir +# data path & data index directory parser.add_argument('--path', type=str, default=None, help='path to BigEarthNet dataset') parser.add_argument('--data_index_dir', type=str, default=None, help="path to generated data list") - # data modality parser.add_argument('--use_s1', action='store_true', default=True, help='use sentinel-1 data') parser.add_argument('--use_s2', action='store_true', default=False, help='use sentinel-2 bands') -parser.add_argument('--use_RGB', action='store_true', default=False, - help='use sentinel-2 RGB bands') - -# band (channel) +# band (channel) name +# S1 - VV, VH +# S2 - B01, B02, B03, B04, B05, B06, B07, B08, B09, B11, B12, B8A parser.add_argument('--band', type=str, default=None, help='band(channel) name from BigEarthNet dataset') - -### new check vh, vv def calc_mean_std(loader): + """calc band image dataset mean and standard deviation + """ # var --> std channel_sum, channel_sq_sum, num_batch = 0, 0, 0 for data in loader: @@ -53,7 +59,6 @@ def calc_mean_std(loader): return mean, std -# util function for reading s1 data def load_s1(path, imgTransform): """util to load s1 data """ @@ -68,7 +73,7 @@ def load_s1(path, imgTransform): s1 = s1.astype(np.float32) return s1 -# util function for reading s2 data + def load_s2(path, imgTransform, s2_band): """wip """ @@ -83,9 +88,8 @@ def load_s2(path, imgTransform, s2_band): # return s2 -# util function for reading data from single sample def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): - """loading sample data + """util to load sample data """ # # load s2 data # if use_s2: @@ -160,7 +164,7 @@ def __call__(self, rt_sample): return rt_sample -### write a class +### custom BigEarthNet dataset class class bigearthnet(Dataset): """pytorch dataset class custom for BigEarthNet """ @@ -175,8 +179,7 @@ def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=Fal # make sure input parameters are okay if not (use_s2 or use_s1 or use_RGB): - raise ValueError("No input specified, set at least one of " - + "use_[s2, s1, RGB] to True!") + raise ValueError("No input specified, set at least one of " + "use_[s2, s1, RGB] to True!") self.use_s2 = use_s2 self.use_s1 = use_s1 self.use_RGB = use_RGB From 7f42f00d4d461dec0a5185106f057231be883d8c Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Tue, 6 Apr 2021 19:36:59 -0400 Subject: [PATCH 47/59] Update README.md --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 492d5dd..aa4ca53 100644 --- a/README.md +++ b/README.md @@ -301,3 +301,22 @@ python tools/train.py configs/hpt-pretrain/resisc/moco_v2_800ep_basetrain/500-it ``` +## (Other) BigEarthNet bands mean and standard deviation + +For S-1 data, band name {'VV', 'VH'} +```bash +python dataset_band_info_BigEarthNet.py\ + --path \ + --data_index_dir \ + --numworkers 30\ + --batchsize 256\ + --use_s1\ + --band + +``` + + + + + + From 31bc8d9b85285a42b31f314d69365749129d0bd4 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Tue, 6 Apr 2021 19:48:06 -0400 Subject: [PATCH 48/59] rm rgb s2, re-work on s2 for bigearthnet --- src/data/dataset_band_info_BigEarthNet.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/data/dataset_band_info_BigEarthNet.py b/src/data/dataset_band_info_BigEarthNet.py index 3c62964..64a470c 100644 --- a/src/data/dataset_band_info_BigEarthNet.py +++ b/src/data/dataset_band_info_BigEarthNet.py @@ -89,7 +89,7 @@ def load_s2(path, imgTransform, s2_band): def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): - """util to load sample data + """util to load sample data (wip) """ # # load s2 data # if use_s2: @@ -142,14 +142,12 @@ def load_sample(sample, imgTransform, use_s1, use_s2, use_RGB): # calculate number of input channels -def get_ninputs(use_s1, use_s2, use_RGB): +def get_ninputs(use_s1, use_s2): n_inputs = 0 - if use_s2: - n_inputs += len(S2_BANDS_LD) + # if use_s2: + # n_inputs += len(S2_BANDS_LD) if use_s1: n_inputs += 2 - if use_RGB and use_s2 == False: - n_inputs += 3 return n_inputs @@ -169,7 +167,7 @@ class bigearthnet(Dataset): """pytorch dataset class custom for BigEarthNet """ - def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=False, use_s1=False, use_RGB=False, band=None): + def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=False, use_s1=False, band=None): """Initialize the dataset """ @@ -178,14 +176,13 @@ def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=Fal self.imgTransform = imgTransform # make sure input parameters are okay - if not (use_s2 or use_s1 or use_RGB): + if not (use_s2 or use_s1): raise ValueError("No input specified, set at least one of " + "use_[s2, s1, RGB] to True!") self.use_s2 = use_s2 self.use_s1 = use_s1 - self.use_RGB = use_RGB # provide number of input channels - self.n_inputs = get_ninputs(use_s1, use_s2, use_RGB) + self.n_inputs = get_ninputs(use_s1, use_s2) # make sure parent dir exists # assert os.path.exists(path) @@ -229,7 +226,7 @@ def __getitem__(self, index): # get and load sample from index file sample = self.samples[index] # labels = self.labels - return load_sample(sample, self.imgTransform, self.use_s1, self.use_s2, self.use_RGB) + return load_sample(sample, self.imgTransform, self.use_s1, self.use_s2) def __len__(self): """Get number of samples in the dataset""" @@ -245,7 +242,7 @@ def main(args): ]) dataset = bigearthnet(path=args.path, data_index_dir=args.data_index_dir, imgTransform=data_transforms,\ - use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB, band=args.band) + use_s1=args.use_s1, use_s2=args.use_s2, band=args.band) data_loader = DataLoader(dataset=dataset, batch_size=args.batchsize, num_workers=args.numworkers, shuffle=True) From de5d415b8ad8c6834b097b371bdc97a2624a9d9e Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Tue, 6 Apr 2021 20:05:51 -0400 Subject: [PATCH 49/59] rm lines not used --- src/data/dataset_band_info_BigEarthNet.py | 41 ++--------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/src/data/dataset_band_info_BigEarthNet.py b/src/data/dataset_band_info_BigEarthNet.py index 5cbe71d..4b7911b 100644 --- a/src/data/dataset_band_info_BigEarthNet.py +++ b/src/data/dataset_band_info_BigEarthNet.py @@ -107,36 +107,12 @@ def load_sample(sample, imgTransform, use_s1, use_s2): if use_s1: img = load_s1(sample["s1"], imgTransform) - # print(sample['id']) - # print(img) - # load label - # lc = labels[sample["id"]] - - # covert label to IGBP simplified scheme - # if IGBP_s: - # cls1 = sum(lc[0:5]); - # cls2 = sum(lc[5:7]); - # cls3 = sum(lc[7:9]); - # cls6 = lc[11] + lc[13]; - # lc = np.asarray([cls1, cls2, cls3, lc[9], lc[10], cls6, lc[12], lc[14], lc[15], lc[16]]) - - # if label_type == "multi_label": - # lc_hot = (lc >= threshold).astype(np.float32) - # else: - # loc = np.argmax(lc, axis=-1) - # lc_hot = np.zeros_like(lc).astype(np.float32) - # lc_hot[loc] = 1 - # rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]} rt_sample = {'image': img, 'id': sample["id"]} - # print(rt_sample['image']) - # print(rt_sample) - # if imgTransform is not None: - # rt_sample = imgTransform(rt_sample) return rt_sample['image'] @@ -184,10 +160,6 @@ def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=Fal # provide number of input channels self.n_inputs = get_ninputs(use_s1, use_s2) - # make sure parent dir exists - # assert os.path.exists(path) - # assert os.path.exists(ls_dir) - self.samples = [] # file = os.path.join(data_index_dir, 's1_list.pkl') @@ -206,19 +178,9 @@ def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=Fal pbar.update() pbar.close() - # ---------------------------------------------------------------------- - - # sort list of samples - # self.samples = sorted(self.samples, key=lambda i: i['id']) - # - # print(f"loaded {len(self.samples)} from {path}") - # import lables as a dictionary - # label_file = os.path.join(ls_dir,'IGBP_probability_labels.pkl') + return - # a_file = open(label_file, "rb") - # self.labels = pkl.load(a_file) - # a_file.close() def __getitem__(self, index): """Get a single example from the dataset""" @@ -233,6 +195,7 @@ def __len__(self): return len(self.samples) + def main(args): # path = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/' # data_index_dir = '/home/taeil/hpt_k/src/data' From 0e6d9fdbe4aa2caec9417ee44aaf83be541a99a5 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Wed, 7 Apr 2021 20:51:10 -0400 Subject: [PATCH 50/59] update BigEarthNet channel data stats calc. inlude also the s-2 data band --- ...arthNet.py => dataset_calc_BigEarthNet.py} | 154 +++++++++++------- src/data/dataset_calc_BigEarthNet.sh | 39 +++++ 2 files changed, 131 insertions(+), 62 deletions(-) rename src/data/{dataset_band_info_BigEarthNet.py => dataset_calc_BigEarthNet.py} (58%) create mode 100644 src/data/dataset_calc_BigEarthNet.sh diff --git a/src/data/dataset_band_info_BigEarthNet.py b/src/data/dataset_calc_BigEarthNet.py similarity index 58% rename from src/data/dataset_band_info_BigEarthNet.py rename to src/data/dataset_calc_BigEarthNet.py index 4b7911b..23dbe1e 100644 --- a/src/data/dataset_band_info_BigEarthNet.py +++ b/src/data/dataset_calc_BigEarthNet.py @@ -1,11 +1,19 @@ """ -- Tsung-Chin Han (Ken)- - -- Descritions - -This script is to help calculate for each of the band mean and standard deviation for +### Tsung-Chin Han +### Descritions - +This script is to help calculate for "each" of the band mean and standard deviation for BigEarthNet dataset. Note that this custom load does not stack all channels together. -Instead, by specifying the channel name, it helps compute the band mean and standard deviation. +Instead, by specifying the channel name, it helps compute the band mean and standard deviation for each channel data. + + +### BigEarthNet channels for S-1 and S-2 +# S-1 - VV, VH +# S-2 - B01, B02, B03, B04, B05, B06, B07, B08, B09, B11, B12, B8A + +Note that the calc is based on raw pixel values to reflect the original data stats, +It does not do any pixel "clip" or normalization. + """ import argparse @@ -22,6 +30,7 @@ from torch.utils.data import DataLoader, Dataset parser = argparse.ArgumentParser(description='Compute band image mean and stdev from BigEarthNet class') +# batch parser.add_argument('--numworkers', type=int, default=30) parser.add_argument('--batchsize', type=int, default=1) @@ -31,15 +40,21 @@ parser.add_argument('--data_index_dir', type=str, default=None, help="path to generated data list") # data modality -parser.add_argument('--use_s1', action='store_true', default=True, - help='use sentinel-1 data') +parser.add_argument('--use_s1', action='store_true', default=False, + help='use Sentinel-1 data') parser.add_argument('--use_s2', action='store_true', default=False, - help='use sentinel-2 bands') + help='use Sentinel-2 bands') +# channel name +parser.add_argument('--band', type=str, default=None, + help='band(channel) name from BigEarthNet dataset') + # band (channel) name # S1 - VV, VH # S2 - B01, B02, B03, B04, B05, B06, B07, B08, B09, B11, B12, B8A -parser.add_argument('--band', type=str, default=None, - help='band(channel) name from BigEarthNet dataset') +# band info +s1_band = ['VV', 'VW'] +s2_band = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'] + def calc_mean_std(loader): """calc band image dataset mean and standard deviation @@ -60,81 +75,60 @@ def calc_mean_std(loader): def load_s1(path, imgTransform): - """util to load s1 data + """load s1 band data raw """ with rasterio.open(path) as data: s1 = data.read() s1 = s1.astype(np.float32) s1 = np.nan_to_num(s1) - s1 = np.clip(s1, -25, 0) - if not imgTransform: - s1 /= 25 - s1 += 1 - s1 = s1.astype(np.float32) return s1 -def load_s2(path, imgTransform, s2_band): - """wip +def load_s2(path, imgTransform): + """load s2 band data raw """ - # bands_selected = s2_band - # with rasterio.open(path) as data: - # s2 = data.read(bands_selected) - # s2 = s2.astype(np.float32) - # if not imgTransform: - # s2 = np.clip(s2, 0, 10000) - # s2 /= 10000 - # s2 = s2.astype(np.float32) - # return s2 + with rasterio.open(path) as data: + s2 = data.read() + s2 = s2.astype(np.float32) + s2 = np.nan_to_num(s2) + return s2 def load_sample(sample, imgTransform, use_s1, use_s2): """util to load sample data (wip) + to do --> stacked data """ - # # load s2 data - # if use_s2: - # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD) - # # load only RGB - # if use_RGB and use_s2 == False: - # img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB) - # - # # load s1 data - # if use_s1: - # if use_s2 or use_RGB: - # img = np.concatenate((img, load_s1(sample["s1"], imgTransform)), axis=0) - # else: - # img = load_s1(sample["s1"], imgTransform) - if use_s1: img = load_s1(sample["s1"], imgTransform) - - - # rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]} + if use_s2: + img = load_s2(sample["s2"], imgTransform) rt_sample = {'image': img, 'id': sample["id"]} - return rt_sample['image'] -# calculate number of input channels def get_ninputs(use_s1, use_s2): + """return number of input channels + wip - to do - work on stacked the bands + """ n_inputs = 0 - # if use_s2: - # n_inputs += len(S2_BANDS_LD) if use_s1: - n_inputs += 2 + n_inputs += len(s1_band) + if use_s2: + n_inputs += len(s2_band) return n_inputs class ToTensor(object): - """Convert ndarrays in sample to Tensors.""" + """convert sample ndarrays to inpiut Tensors.""" def __call__(self, rt_sample): img, sample_id = rt_sample['image'], rt_sample['id'] rt_sample = {'image': torch.tensor(img), 'id': sample_id} + return rt_sample @@ -153,28 +147,29 @@ def __init__(self, path=None, data_index_dir=None, imgTransform=None, use_s2=Fal # make sure input parameters are okay if not (use_s2 or use_s1): - raise ValueError("No input specified, set at least one of " + "use_[s2, s1, RGB] to True!") - self.use_s2 = use_s2 + raise ValueError("input error, please check to data modality") self.use_s1 = use_s1 + self.use_s2 = use_s2 # provide number of input channels self.n_inputs = get_ninputs(use_s1, use_s2) + # get sample images self.samples = [] - # file = os.path.join(data_index_dir, 's1_list.pkl') + if use_s1 and band in s1_band: + file = os.path.join(data_index_dir, 's1_'+band+'_list.pkl') - if band == 'VV': - file = os.path.join(data_index_dir, 's1_vv_list.pkl') - elif band == 'VH': - file = os.path.join(data_index_dir, 's1_vh_list.pkl') + if use_s2 and band in s2_band: + file = os.path.join(data_index_dir, 's2_'+band+'_list.pkl') sample_list = pkl.load(open(file, "rb")) - pbar = tqdm(total=len(sample_list)) - pbar.set_description("[Load]") - + pbar.set_description("[Loading Images]") for i, name in enumerate(sample_list): - self.samples.append({"id": i, "s1": name}) + if use_s1: + self.samples.append({"id": i, "s1": name}) + if use_s2: + self.samples.append({"id": i, "s2": name}) pbar.update() pbar.close() @@ -195,11 +190,43 @@ def __len__(self): return len(self.samples) +def file_list_generator(path, band_list, use_s1=False, use_s2=False): + """"generate band file list from s-1 or s-2 + """ + # band info + # s1_band = ['VV', 'VW'] + # s2_band = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'] + + import glob + import os + import pickle + + for band in band_list: + tmp = [] + # s1 - "/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0 + # s2 - "/home/cjrd/data/bigearthnet/BigEarthNet-v1.0 + for file in glob.glob(path+'/*'): + for img in glob.glob(file+'/*_'+band+'.tif'): + print(img) + tmp.append(img) + if use_s1: + with open("s1_"+band+"_list.pkl", 'wb') as f: + pickle.dump(tmp, f) + if use_s2: + with open("s2_"+band+"_list.pkl", 'wb') as f: + pickle.dump(tmp, f) + del tmp + + return def main(args): # path = '/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/' # data_index_dir = '/home/taeil/hpt_k/src/data' + # band info + # s1_band = ['VV', 'VW'] + # s2_band = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'] + data_transforms = transforms.Compose([ ToTensor() ]) @@ -211,8 +238,11 @@ def main(args): # calc mean and std for the dataset mean, std = calc_mean_std(data_loader) - print('mean:{}, std:{}'.format(mean[0], std[0])) + print('band:{} -- mean:{}, std:{}'.format(args.band, mean[0], std[0])) + + ####### to-do later---> + # 1. stacked the bands dataset -- if needed to load all at once if __name__ == "__main__": diff --git a/src/data/dataset_calc_BigEarthNet.sh b/src/data/dataset_calc_BigEarthNet.sh new file mode 100644 index 0000000..aa0c2e7 --- /dev/null +++ b/src/data/dataset_calc_BigEarthNet.sh @@ -0,0 +1,39 @@ + +### S-1 data +declare -a s1_band=("VV" "VH") +path="/home/cjrd/data/bigearthnet/BigEarthNet-S1-v1.0/" +data_index_dir="/home/taeil/hpt_k/src/data" +declare -i numworkers=30 +declare -i batchsize=256 + +for band in ${s1_band[@]} +do + python dataset_calc_BigEarthNet.py\ + --path $path\ + --data_index_dir $data_index_dir\ + --numworkers $numworkers\ + --batchsize $batchsize\ + --use_s1\ + --band $band + +done + +### S-2 data +declare -a s2_band=("B01" "B02" "B03" "B04" "B05" "B06" "B07" "B08" "B09" "B11" "B12" "B8A") +path="/home/cjrd/data/bigearthnet/BigEarthNet-v1.0/" +data_index_dir="/home/taeil/hpt_k/src/data" +declare -i numworkers=30 +declare -i batchsize=256 + +for band in ${s2_band[@]} +do + python dataset_calc_BigEarthNet.py\ + --path $path\ + --data_index_dir $data_index_dir\ + --numworkers $numworkers\ + --batchsize $batchsize\ + --use_s2\ + --band $band + +done + From 4801489b5083ffdfd4f6754cc741b370d6b4f944 Mon Sep 17 00:00:00 2001 From: TsungChinHanKen <> Date: Wed, 7 Apr 2021 20:59:05 -0400 Subject: [PATCH 51/59] correct s1 band name 'VH' --- src/data/dataset_calc_BigEarthNet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/dataset_calc_BigEarthNet.py b/src/data/dataset_calc_BigEarthNet.py index 23dbe1e..6643772 100644 --- a/src/data/dataset_calc_BigEarthNet.py +++ b/src/data/dataset_calc_BigEarthNet.py @@ -52,7 +52,7 @@ # S1 - VV, VH # S2 - B01, B02, B03, B04, B05, B06, B07, B08, B09, B11, B12, B8A # band info -s1_band = ['VV', 'VW'] +s1_band = ['VV', 'VH'] s2_band = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'] From 0350894b7a1ef320c23b27c43d31df98eb226d04 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Wed, 7 Apr 2021 21:02:22 -0400 Subject: [PATCH 52/59] Update README.md update doc running the bigearthnet band stats --- README.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index aa4ca53..073617c 100644 --- a/README.md +++ b/README.md @@ -304,15 +304,13 @@ python tools/train.py configs/hpt-pretrain/resisc/moco_v2_800ep_basetrain/500-it ## (Other) BigEarthNet bands mean and standard deviation For S-1 data, band name {'VV', 'VH'} +For S-2 data, band name {'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'} +``` +cd into hpt/src/data/ +``` +Calc band stats by running ```bash -python dataset_band_info_BigEarthNet.py\ - --path \ - --data_index_dir \ - --numworkers 30\ - --batchsize 256\ - --use_s1\ - --band - +bash dataset_calc_BigEarthNet.sh ``` From 9c25985108a2002b79175fda308aa4e0dadcacf0 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Wed, 7 Apr 2021 21:03:00 -0400 Subject: [PATCH 53/59] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 073617c..38e5c58 100644 --- a/README.md +++ b/README.md @@ -303,8 +303,8 @@ python tools/train.py configs/hpt-pretrain/resisc/moco_v2_800ep_basetrain/500-it ## (Other) BigEarthNet bands mean and standard deviation -For S-1 data, band name {'VV', 'VH'} -For S-2 data, band name {'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'} +```For S-1 data, band name {'VV', 'VH'}``` +```For S-2 data, band name {'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'}``` ``` cd into hpt/src/data/ ``` From 3aae7bb191259dff7b8d15e674c92d21b8302483 Mon Sep 17 00:00:00 2001 From: Tsung-Chin Han <39634122+TsungChinHanKen@users.noreply.github.com> Date: Wed, 7 Apr 2021 21:03:31 -0400 Subject: [PATCH 54/59] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 38e5c58..3592d79 100644 --- a/README.md +++ b/README.md @@ -303,8 +303,10 @@ python tools/train.py configs/hpt-pretrain/resisc/moco_v2_800ep_basetrain/500-it ## (Other) BigEarthNet bands mean and standard deviation -```For S-1 data, band name {'VV', 'VH'}``` -```For S-2 data, band name {'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'}``` +For S-1 data, band name {'VV', 'VH'}. + +For S-2 data, band name {'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A'}. + ``` cd into hpt/src/data/ ``` From 92c6232059d6da8865830c1585f87a3813129128 Mon Sep 17 00:00:00 2001 From: taeil Date: Sat, 10 Apr 2021 23:08:51 -0700 Subject: [PATCH 55/59] added project file for hpt --- .idea/.gitignore | 8 +++++ .idea/deployment.xml | 22 ++++++++++++++ .idea/hpt.iml | 19 ++++++++++++ .../inspectionProfiles/profiles_settings.xml | 6 ++++ .idea/misc.xml | 7 +++++ .idea/modules.xml | 8 +++++ .idea/other.xml | 6 ++++ .idea/runConfigurations/single_train.xml | 30 +++++++++++++++++++ .idea/vcs.xml | 8 +++++ OpenSelfSup | 2 +- README.md | 7 ++--- SEN12MS | 2 +- references/model_architectures.md | 18 +++++++++++ 13 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/deployment.xml create mode 100644 .idea/hpt.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/other.xml create mode 100644 .idea/runConfigurations/single_train.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 0000000..6a6d49b --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/hpt.iml b/.idea/hpt.iml new file mode 100644 index 0000000..3e2e3fe --- /dev/null +++ b/.idea/hpt.iml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..8598883 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b0929b6 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..a708ec7 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/runConfigurations/single_train.xml b/.idea/runConfigurations/single_train.xml new file mode 100644 index 0000000..c310ab9 --- /dev/null +++ b/.idea/runConfigurations/single_train.xml @@ -0,0 +1,30 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..b120a90 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/OpenSelfSup b/OpenSelfSup index c28dd45..add4f80 160000 --- a/OpenSelfSup +++ b/OpenSelfSup @@ -1 +1 @@ -Subproject commit c28dd4505e143cf4c89fe737adfb43826e2fc266 +Subproject commit add4f806fa4ca9128975e9688d81e621def47334 diff --git a/README.md b/README.md index c6d9322..c43f900 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ For initial setup, refer to [setup instructions](setup_pretraining.md). ```bash export WANDB_API_KEY= export WANDB_ENTITY=cal-capstone -export WANDB_PROJECT=hpt +export WANDB_PROJECT=scene_classification #export WANDB_MODE=dryrun ``` @@ -83,14 +83,13 @@ cd OpenSelfSup /tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_aug_20ep.py --debug # (sanity check) Single GPU training on samll dataset on sen12ms fusion -./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_12ch_in_smoketrain_aug_2ep.py --debug - +./tools/single_train.sh configs/selfsup/moco/r50_v2_sen12ms_fusion_in_smoke_aug.py --debug # (sanity check) 4 GPUs training on samll dataset ./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_basetrain_aug_20ep.py 4 # (sanity check) 4 GPUs training on samll fusion dataset -./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_12ch_in_smoketrain_aug_2ep.py 4 +./tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_fusion_in_smoke_aug.py 4 # distributed full training /tools/dist_train.sh configs/selfsup/moco/r50_v2_sen12ms_in_fulltrain_20ep.py 4 diff --git a/SEN12MS b/SEN12MS index a8a7760..f824eeb 160000 --- a/SEN12MS +++ b/SEN12MS @@ -1 +1 @@ -Subproject commit a8a7760d81f83b015d341bd3f3ebc87741b0658e +Subproject commit f824eebb94e6e08b211cf963d6dc768c82c3bd08 diff --git a/references/model_architectures.md b/references/model_architectures.md index c687de9..1300689 100644 --- a/references/model_architectures.md +++ b/references/model_architectures.md @@ -10,3 +10,21 @@ - Moco_1x1Rnd: adding conv1x1 block to the ResNet50 used by Sen12ms. ResNet50 layers are initialized with the weight from Moco but input module is initialized with random weights - Finetune v2 (1k dataset) - freezing ResNet50 fully or partially does not seem to help with accuracy. We will continue explore and share the results once we are sure there is no issue with implementation. + + +#### Key pretrained models + +Some pretrained models: + +**Sensor Augmentation** +- [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p) +- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) +- sen12_crossaugment_epoch_1000.pth: 1000 epocs + +**Data Fusion - Augmentation Set 2** +- [(optional fusion) crimson-pyramid-70](https://wandb.ai/cal-capstone/hpt4/runs/2iu8yfs6): 200 epochs +- [(partial fusion) laced-water-61](https://wandb.ai/cal-capstone/hpt4/runs/367tz8vs) 200 epochs, 32K +- [(partial fusion) visionary-lake-62](https://wandb.ai/cal-capstone/hpt4/runs/1srlc7jr/overview?workspace=user-taeil) should deprecate. different number of epochs from other pretrained models +- [(full fusion) electric-mountain-33](https://wandb.ai/cal-capstone/hpt4/runs/ak0xdbfu) + +**Data Fusion - Augmentation Set 1** \ No newline at end of file From d4eb4a8037752de0036df15fa3fc236acad041fc Mon Sep 17 00:00:00 2001 From: Colorado Reed Date: Tue, 20 Apr 2021 09:20:50 -0700 Subject: [PATCH 56/59] merge --- data/small_sample.pkl | Bin 0 -> 19520 bytes python | 4 + .../imagenet/base-imagenet-config.py | 84 ++++++++++++ .../2500-iter-0_001-lr-finetune.py | 109 ++++++++++++++++ .../1000-labels/2500-iter-0_01-lr-finetune.py | 109 ++++++++++++++++ .../1000-labels/90-epoch-0_001-lr-finetune.py | 109 ++++++++++++++++ .../1000-labels/90-epoch-0_01-lr-finetune.py | 109 ++++++++++++++++ .../1000-labels/finetune-eval-base.py | 30 +++++ .../all-labels/2500-iter-0_001-lr-finetune.py | 109 ++++++++++++++++ .../all-labels/2500-iter-0_01-lr-finetune.py | 109 ++++++++++++++++ .../all-labels/90-epoch-0_001-lr-finetune.py | 109 ++++++++++++++++ .../all-labels/90-epoch-0_01-lr-finetune.py | 109 ++++++++++++++++ .../finetune/all-labels/finetune-eval-base.py | 30 +++++ .../50-iters.py | 8 ++ .../500-iters.py | 8 ++ .../5000-iters.py | 8 ++ .../50000-iters.py | 8 ++ .../imagenet/linear-eval/linear-eval-base.py | 120 ++++++++++++++++++ .../imagenet/linear-eval/linear-eval-lr-s0.py | 4 + .../imagenet/linear-eval/linear-eval-lr-s1.py | 4 + .../imagenet/linear-eval/linear-eval-lr-s2.py | 4 + .../moco_v2_800ep_basetrain/50-iters.py | 8 ++ .../moco_v2_800ep_basetrain/500-iters.py | 8 ++ .../moco_v2_800ep_basetrain/5000-iters.py | 8 ++ .../moco_v2_800ep_basetrain/50000-iters.py | 8 ++ .../imagenet/no_basetrain/100000-iters.py | 8 ++ .../imagenet/no_basetrain/200000-iters.py | 8 ++ .../imagenet/no_basetrain/5000-iters.py | 8 ++ .../imagenet/no_basetrain/50000-iters.py | 8 ++ src/data/__pycache__/dataset.cpython-37.pyc | Bin 0 -> 6054 bytes src/utils/pretrain-configs/sen12ms-small.sh | 118 +++++++++++++++++ 31 files changed, 1366 insertions(+) create mode 100644 data/small_sample.pkl create mode 100755 python create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/base-imagenet-config.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_001-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_01-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_001-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_01-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/finetune-eval-base.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_001-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_01-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_001-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_01-lr-finetune.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/finetune-eval-base.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/500-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/5000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-base.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s0.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s1.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s2.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/500-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/5000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/100000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/200000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/5000-iters.py create mode 100644 src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/50000-iters.py create mode 100644 src/data/__pycache__/dataset.cpython-37.pyc create mode 100644 src/utils/pretrain-configs/sen12ms-small.sh diff --git a/data/small_sample.pkl b/data/small_sample.pkl new file mode 100644 index 0000000000000000000000000000000000000000..aebd3e1822f8bf5e536b1a90b4753bb811c790af GIT binary patch literal 19520 zcmb81Wt3dU5rvgF%*>1qX{Y;jKM0$xkQ0YC;7pWcr6`f)l~#@&w8PBI%*@Qp%*@OT zKT_3I@9fO%J#+g0opbc5y6aX|cXhozhqJ2Cst#_$4m zM-X?UXzn;W#f#9s5S7=K^`3*`VRr;^N1oXJos;5aY+n|C5E4Xu0 z=3(~+ac{~NZRcTl5xX~vd-KGiUnBb$v#s|M3@_pEO%lGXXTM8Q zJnWC8#j>|_bt#I6{gKok!FZRZcsP8Mgl}vu{auFQVgDxaZ%U@;%ThdCJ(5(9;Cj9s z#lz_!Njhi-tLB%dcsLy-O$Wi&dj*Pz^_*1CX;RYt6<3d|KV0GM)7l#?IbVsP;l-BJ zXQk-M1TDhQ(888lv+XJj4GK@oMi?wEuFBA$OtfgU*;H~hf`*f_=H%ojpsO=9cw%YF zdDK>VI6=b+ujISd^7}Oy8uY1VyEIrdT$7M4nEf=Xc;EX ztd|x)#b1Y^!LXM*;d+c_+jipm2*s-z&JUm~gk>#UXIpLqmX9V34V$KM)tGhzLF=H} zI#FBOn|32ZgZk)7t~6T1&|t3YBv;0086*)Nh4K(}c%gk=NVvq9^N`?FtKOZ!%uD5vb(w|1~JQO3r{=Fg>#qK%J{H&8s6+agc1TQc=t zm*V+ZZjmQvx$&xVJ&Nb)a*-$bvU8nWf2lV%|6$obyj)prh^D$5FxCZ^-R6H%yrF%C zlq|>HkmBLh23c)(-i;`phyS8|6941P>Bbb#BV&=Lu@UXGbOgg28w(3OZ7ei{W#S_# z9(J#uzZ$RnM^QZN-_i(3mOqcCc-X&sqRW0hJci<7|3>j|nArXuOYyvvE&4-hS=X9> zkE3{={1$nV{d#Xy98dAE{-WqFd!FC1wDGZNNXZ7{A$IfU62E$i_2hgL#=Hh6KW!|; zWEc0FQamibM$${`eXW~OJS>++j|1LDzB$Fi?rp@q*$<4jpxlAotJeUpMJF)19tWGm zS{N)h-;!DEc&S*>iry-6Z?SPJiigFbU617Uok;PpShVYryi(qp;^ACSx(dPWvbUjl zSS;FAXp-@6%Xk-SSu>xvxNpbMuyHEn)=sv!XJ}Z*BIDTE9dDJOMYz(E({aU?)R^IZjht z>y6mEF*N9dlHj~N+?}Dpuqv&S!H#zKU}!Lbl;q~LHG-C*ZCR3A@>bn0h6dG^MQ!6D zY&St`;TgM?xQgERyC*|~KGjxC>#gfO49$BAoAIIPNN&*5ps>sAy@ zM563qByv`#C%zc=k;Xiy%V$Rjs)huF{1U=r_25;xk(gbScQ#hqO+ z);eKX@6TZK;^-x{1y7U*7#a+NUdu~swjE?>FyMO$_#xVfVS}MTkM`ox80_SGh@rta z=p_)6_g+t7Xiy)$=wrO0--nhnS|f^Y;S@mT&w#OZPon5qJ>6a`Dfz^nB}zIGtd*^ygWk{MHNS z8LLOt>cO+m97?+u9+3~Ow`!c-nn!6?^!KQT5G)0=U1i|w9HqX*G5T9~;B*8-J(vh$61>{lntg$aUXP0(m zlOJXuO|XzuwIkd5EYM>J7SflFd1de3KbByjETOmRHrt8paRdu#qKso@I}1FXU?ELJ z(M01N;wP*gU7IB$(u7{9{UPg#^=36*Y3C%-h$ON<{XL0bAxV_c0dF!tnP4GFG%5-2 zXg-Btp_HR{{Bel&sRRq9932T8pVFU3u#i->Yug9AsXo145)G27b{@P3_YA@;q%LjN z#$+}7nf3Jy?UrtqTlK7ZtHut-oKUra(RnOBn_wZK>g2~bH}@QZg@mfDkmxT-&m~xJ zyGlK?_R;eQ77~}XLDIy;^?ZVb#HAhMknMH#1@%Hr=;Wmh6>n3%kT46SR2@IdK2G@} zf`vq%G^P4v@Qdq3k9J<8+7CD{sV5<$#HQpI_?K3bQ03Rc-+IgcW%VQwJ6iU~iSd`$ zlR)H@+3vV0wWozwuO3-h?jw-79vpme zuc^Lmf-Pf(Jtsa~FR{P;<(l{YI zyW=_^n|x&FBlRSpOPyvNZyg`4CxO!2N$Cy#4F0it64)nwJqex&K3+}gL7LO|iR|BW zeWIQOl-Y|i#~H^@){{W>)hXTJN&HjwB%twLHSVNO*OP$8_2nh_gZF3ZNk9d?sG#%N z!_U@}fC}`Dr1jUr&sCEKpn^eE(E23G=j%zpcltImPT+rG*)&j`gl>B3p8bc$IcAg8 z%+K}`A*zavA7wrpr-uB7kZ%{m||66!+253Bk ze3Rl~|BC446mNHwZ!H^zoH3-l(*^9`ApZ3UkZ)7wVfX6uK=wi0?=U?7&TYo(625xB z4?ej4U5bapR|ldzS()QUF1l7GGZ3x)=jQkr1?DD+E)2GgFluUel(`V~Wi zSxVol#uxHm6EvU3pYe_6C=IdbGX=k4XwbIug{nU^{g$95=u>SSbe>Lrw`{mX#*hpDDz%hhGc@Q@y}-A@4tRfIXs{saT;w?Z@K=Tg)1|fqvLBBB#?WAK&?(E{wc+m! z4f<4D1Hmr>{~%}`OmF3bS3gJmlcB-%rY!>b<5uMl7yN74%h8OoG)&>PX+`xC^KZg3 pzueFK4;u1%y-%LT|HIIrKJ+>ryifaIf;K?fP}F9l75`h?{(lUnNap|m literal 0 HcmV?d00001 diff --git a/python b/python new file mode 100755 index 0000000..17b6a5f --- /dev/null +++ b/python @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +source /scratch/crguest/miniconda3/etc/profile.d/conda.sh +conda activate hp120 +python "$@" \ No newline at end of file diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/base-imagenet-config.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/base-imagenet-config.py new file mode 100644 index 0000000..4059a92 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/base-imagenet-config.py @@ -0,0 +1,84 @@ +_base_ = '../../base.py' +# model settings +model = dict( + type='MOCO', + pretrained=None, + queue_len=65536, + feat_dim=128, + momentum=0.999, + backbone=dict( + type='ResNet', + depth=50, + in_channels=3, + out_indices=[4], # 0: conv-1, x: stage-x + norm_cfg=dict(type='BN')), + neck=dict( + type='NonLinearNeckV1', + in_channels=2048, + hid_channels=2048, + out_channels=128, + with_avg_pool=True), + head=dict(type='ContrastiveHead', temperature=0.2)) +# dataset settings +data_source_cfg = dict( + type='ImageNet', + memcached=False, + mclient_path='/not/used') + +data_train_list = "data/imagenet/meta/train+val.txt" +data_train_root = "data/imagenet" + +dataset_type = 'ContrastiveDataset' +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) +train_pipeline = [ + dict(type='RandomResizedCrop', size=224, scale=(0.2, 1.)), + dict( + type='RandomAppliedTrans', + transforms=[ + dict( + type='ColorJitter', + brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.4) + ], + p=0.8), + dict(type='RandomGrayscale', p=0.2), + dict( + type='RandomAppliedTrans', + transforms=[ + dict( + type='GaussianBlur', + sigma_min=0.1, + sigma_max=2.0) + ], + p=0.5), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=256, + workers_per_gpu=2, + drop_last=True, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.03, weight_decay=0.0001, momentum=0.9) +# learning policy +lr_config = dict(policy='CosineAnnealing', min_lr=0.) + +# cjrd added this flag, since OSS didn't support training by iters(?) +by_iter = True + +log_config = dict( + interval=25, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_001-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_001-lr-finetune.py new file mode 100644 index 0000000..bf2a5cb --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_001-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train-1000.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =True + +# learning policy +lr_config = dict( + by_epoch=False, + policy='step', + step=[833,1667], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_iters=2500 +checkpoint_config = dict(interval=2500) + +log_config = dict( + interval=1, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_01-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_01-lr-finetune.py new file mode 100644 index 0000000..46dd647 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/2500-iter-0_01-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train-1000.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =True + +# learning policy +lr_config = dict( + by_epoch=False, + policy='step', + step=[833,1667], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_iters=2500 +checkpoint_config = dict(interval=2500) + +log_config = dict( + interval=1, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_001-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_001-lr-finetune.py new file mode 100644 index 0000000..3e84fed --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_001-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train-1000.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =False + +# learning policy +lr_config = dict( + by_epoch=True, + policy='step', + step=[30,60], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_epochs=90 +checkpoint_config = dict(interval=90) + +log_config = dict( + interval=1, + by_epoch=True, + hooks=[ + dict(type='TextLoggerHook', by_epoch=True), + dict(type='TensorboardLoggerHook', by_epoch=True) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_01-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_01-lr-finetune.py new file mode 100644 index 0000000..b093dcf --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/90-epoch-0_01-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train-1000.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =False + +# learning policy +lr_config = dict( + by_epoch=True, + policy='step', + step=[30,60], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_epochs=90 +checkpoint_config = dict(interval=90) + +log_config = dict( + interval=1, + by_epoch=True, + hooks=[ + dict(type='TextLoggerHook', by_epoch=True), + dict(type='TensorboardLoggerHook', by_epoch=True) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/finetune-eval-base.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/finetune-eval-base.py new file mode 100644 index 0000000..78b99d2 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/1000-labels/finetune-eval-base.py @@ -0,0 +1,30 @@ +train_cfg = {} +test_cfg = {} +optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb +# yapf:disable +# yapf:enable +# runtime settings +dist_params = dict(backend='nccl') +cudnn_benchmark = True +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +# model settings +model = dict( + type='Classification', + pretrained=None, + backbone=dict( + type='ResNet', + depth=50, + in_channels=3, + out_indices=[4], # 0: conv-1, x: stage-x + norm_cfg=dict(type='BN')), + head=dict( + type='ClsHead', with_avg_pool=True, in_channels=2048, + num_classes=10, + + ) +) +prefetch = False diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_001-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_001-lr-finetune.py new file mode 100644 index 0000000..1f82c17 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_001-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =True + +# learning policy +lr_config = dict( + by_epoch=False, + policy='step', + step=[833,1667], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_iters=2500 +checkpoint_config = dict(interval=2500) + +log_config = dict( + interval=1, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_01-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_01-lr-finetune.py new file mode 100644 index 0000000..2801f70 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/2500-iter-0_01-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=False, + initial=False, + interval=25, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =True + +# learning policy +lr_config = dict( + by_epoch=False, + policy='step', + step=[833,1667], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_iters=2500 +checkpoint_config = dict(interval=2500) + +log_config = dict( + interval=1, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_001-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_001-lr-finetune.py new file mode 100644 index 0000000..54e7cf5 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_001-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =False + +# learning policy +lr_config = dict( + by_epoch=True, + policy='step', + step=[30,60], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_epochs=90 +checkpoint_config = dict(interval=90) + +log_config = dict( + interval=1, + by_epoch=True, + hooks=[ + dict(type='TextLoggerHook', by_epoch=True), + dict(type='TensorboardLoggerHook', by_epoch=True) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_01-lr-finetune.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_01-lr-finetune.py new file mode 100644 index 0000000..b5b8809 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/90-epoch-0_01-lr-finetune.py @@ -0,0 +1,109 @@ +_base_ = "finetune-eval-base.py" + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/no/matter', + # this will be ignored if type != ImageListMultihead + +) + + +data_train_list = "data/imagenet/meta/train.txt" +data_train_root = 'data/imagenet' + +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = 'data/imagenet' + +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = 'data/imagenet' + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) + +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=64, # x4 from update_interval + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + + +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + dataset=data['test'], + by_epoch=True, + initial=False, + interval=1, + imgs_per_gpu=32, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), +] + +by_iter =False + +# learning policy +lr_config = dict( + by_epoch=True, + policy='step', + step=[30,60], + gamma=0.1 # multiply LR by this number at each step +) + +# momentum and weight decay from VTAB and IDRL +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0., + paramwise_options={'\Ahead.': dict(lr_mult=100)}) + + +# runtime settings +# total iters or total epochs +total_epochs=90 +checkpoint_config = dict(interval=90) + +log_config = dict( + interval=1, + by_epoch=True, + hooks=[ + dict(type='TextLoggerHook', by_epoch=True), + dict(type='TensorboardLoggerHook', by_epoch=True) + ]) + +optimizer_config = dict(update_interval=4) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/finetune-eval-base.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/finetune-eval-base.py new file mode 100644 index 0000000..78b99d2 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/finetune/all-labels/finetune-eval-base.py @@ -0,0 +1,30 @@ +train_cfg = {} +test_cfg = {} +optimizer_config = dict() # grad_clip, coalesce, bucket_size_mb +# yapf:disable +# yapf:enable +# runtime settings +dist_params = dict(backend='nccl') +cudnn_benchmark = True +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +# model settings +model = dict( + type='Classification', + pretrained=None, + backbone=dict( + type='ResNet', + depth=50, + in_channels=3, + out_indices=[4], # 0: conv-1, x: stage-x + norm_cfg=dict(type='BN')), + head=dict( + type='ClsHead', with_avg_pool=True, in_channels=2048, + num_classes=10, + + ) +) +prefetch = False diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50-iters.py new file mode 100644 index 0000000..b8a01db --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/imagenet_r50_supervised.pth') + +# epoch related +total_iters=50 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/500-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/500-iters.py new file mode 100644 index 0000000..3548ce6 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/500-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/imagenet_r50_supervised.pth') + +# epoch related +total_iters=500 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/5000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/5000-iters.py new file mode 100644 index 0000000..399a69d --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/5000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/imagenet_r50_supervised.pth') + +# epoch related +total_iters=5000 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50000-iters.py new file mode 100644 index 0000000..1a1a739 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/imagenet_r50_supervised_basetrain/50000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/imagenet_r50_supervised.pth') + +# epoch related +total_iters=50000 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-base.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-base.py new file mode 100644 index 0000000..3857484 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-base.py @@ -0,0 +1,120 @@ +_base_ = '../../../base.py' +# model settings +model = dict( + type='Classification', + pretrained=None, + with_sobel=False, + backbone=dict( + type='ResNet', + depth=50, + in_channels=3, + out_indices=[4], # 0: conv-1, x: stage-x + norm_cfg=dict(type='BN'), + frozen_stages=4), + head=dict( + type='ClsHead', with_avg_pool=True, in_channels=2048, + num_classes=10, + + ) +) + +# dataset settings +data_source_cfg = dict( + type="ImageNet", + memcached=False, + mclient_path='/not/used', + # this will be ignored if type != ImageListMultihead + +) + +# used to trian the linear classifier +data_train_list = "data/imagenet/meta/train.txt" +data_train_root = "data/imagenet" + +# used for val (ie picking the final model) +data_val_list = "data/imagenet/meta/val.txt" +data_val_root = "data/imagenet" + +# used for testing evaluation: we've never seen this data before (not even during pretraining) +data_test_list = "data/imagenet/meta/test.txt" +data_test_root = "data/imagenet" + +dataset_type = "ClassificationDataset" +img_norm_cfg = dict(mean=[0.5,0.6,0.7], std=[0.1,0.2,0.3]) +train_pipeline = [ + dict(type='RandomResizedCrop', size=224), + dict(type='RandomHorizontalFlip'), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +test_pipeline = [ + dict(type='Resize', size=256), + dict(type='CenterCrop', size=224), + dict(type='ToTensor'), + dict(type='Normalize', **img_norm_cfg), +] +data = dict( + batch_size=512, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_source=dict( + list_file=data_train_list, root=data_train_root, + **data_source_cfg), + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_source=dict( + list_file=data_val_list, root=data_val_root, **data_source_cfg), + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_source=dict( + list_file=data_test_list, root=data_test_root, **data_source_cfg), + pipeline=test_pipeline)) + +# additional hooks +custom_hooks = [ + dict( + name="val", + type='ValidateHook', + dataset=data['val'], + by_epoch=False, + initial=False, + interval=100, + imgs_per_gpu=128, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))), + dict( + name="test", + type='ValidateHook', + by_epoch=False, + dataset=data['test'], + initial=False, + interval=100, + imgs_per_gpu=128, + workers_per_gpu=2, + eval_param=dict(topk=(1,5))) +] + +# learning policy +lr_config = dict( + by_epoch=False, + policy='step', + step=[1651,3333]) +checkpoint_config = dict(interval=5000) + +# runtime settings +total_iters = 5000 +checkpoint_config = dict(interval=total_iters) + +# cjrd added this flag, since OSS didn't support training by iters(?) +by_iter = True + +log_config = dict( + interval=10, + by_epoch=False, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + dict(type='TensorboardLoggerHook', by_epoch=False) + ]) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s0.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s0.py new file mode 100644 index 0000000..730e7fa --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s0.py @@ -0,0 +1,4 @@ +_base_="linear-eval-base.py" + +# optimizer +optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s1.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s1.py new file mode 100644 index 0000000..730e7fa --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s1.py @@ -0,0 +1,4 @@ +_base_="linear-eval-base.py" + +# optimizer +optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s2.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s2.py new file mode 100644 index 0000000..730e7fa --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/linear-eval/linear-eval-lr-s2.py @@ -0,0 +1,4 @@ +_base_="linear-eval-base.py" + +# optimizer +optimizer = dict(type='SGD', lr=30., momentum=0.9, weight_decay=0.) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50-iters.py new file mode 100644 index 0000000..94798de --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/moco_v2_800ep.pth') + +# epoch related +total_iters=50 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/500-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/500-iters.py new file mode 100644 index 0000000..0cba86c --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/500-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/moco_v2_800ep.pth') + +# epoch related +total_iters=500 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/5000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/5000-iters.py new file mode 100644 index 0000000..2b29885 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/5000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/moco_v2_800ep.pth') + +# epoch related +total_iters=5000 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50000-iters.py new file mode 100644 index 0000000..6403853 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/moco_v2_800ep_basetrain/50000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent +model=dict(pretrained='data/basetrain_chkpts/moco_v2_800ep.pth') + +# epoch related +total_iters=50000 +checkpoint_config = dict(interval=total_iters) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/100000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/100000-iters.py new file mode 100644 index 0000000..19e030f --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/100000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent + +# epoch related +total_iters=100000 +checkpoint_config = dict(interval=total_iters) +checkpoint_config = dict(interval=total_iters//2) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/200000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/200000-iters.py new file mode 100644 index 0000000..d013f98 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/200000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent + +# epoch related +total_iters=200000 +checkpoint_config = dict(interval=total_iters) +checkpoint_config = dict(interval=total_iters//2) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/5000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/5000-iters.py new file mode 100644 index 0000000..0026346 --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/5000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent + +# epoch related +total_iters=5000 +checkpoint_config = dict(interval=total_iters) +checkpoint_config = dict(interval=total_iters//2) diff --git a/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/50000-iters.py b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/50000-iters.py new file mode 100644 index 0000000..5085eea --- /dev/null +++ b/src/OpenSelfSup/configs/hpt-pretrain/imagenet/no_basetrain/50000-iters.py @@ -0,0 +1,8 @@ +_base_="../base-imagenet-config.py" + +# this will merge with the parent + +# epoch related +total_iters=50000 +checkpoint_config = dict(interval=total_iters) +checkpoint_config = dict(interval=total_iters//2) diff --git a/src/data/__pycache__/dataset.cpython-37.pyc b/src/data/__pycache__/dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f83ec2fc40eb62cc18836604fbf343084d7869c2 GIT binary patch literal 6054 zcmai2e{37qeZOBMk4K84WLdIo*`eb&HWOR2WY={w&tYswZn}nTY%fkPaV{sm6Lr#& zN56M+WbTNzQMzo!?7ApAU|ZT!9Z;bCXTW}-+fZQMAM;RPD7FIIfJ3(d+kn9s1`HiA z6y1jG^LMz!SCLs=ifN_jH3J#y*&ICQK{h(-%%BX zDNGF%n@TNETB@z8DNnOC*{0jNEDhU`Wya3RGH08z%-a@9Jt(vac408KXcuMMNUNkN zs?r*@M+YtC)|fqpy1_Cm%W}+Qd1kQ!E3%QMWsj%)rf%h?4^d)PEM)81aZ@5B3Ewh!<7=1i`9cE9mBkX9?V%2vw`v7~2J&hj6*l{+?=GX}~&lcE8c8Yxj zJx-&?W9*}Lg`KevdSm^^y_rqL+vgqd9^2HK+Lme`@}6KH+k_;&X|LiP^bWCS*w3-E z?C0@6hySy;E&H%{xc?-pq4yE=e$G3>&QB}s0=tOX^V7=eQPdZ`!>iRz6Y^58DAh|3 zNQ3pNniy}o!5Wp(4c2;~f%`xwVi3s?$r8yCF^S}fklr7RfHbHx{iqi;UBDx3kXY;K zksZ86_!O1Xf zze44XUKy-juXC82;+KyoN;Q|5+!c|>{V*}Yj@M2M?zt?*ib1R+5S(_8QF-LtR5neRgxs~MbW#)*5-6WTB5JffAqB?=c zP)n+blFxwKAkov01V0qyGG2y4EcLWl>uXy|Pv0RzeJC7=65=BfQ!N+k13~H#?aO%6 zAxs^m@ljcBkBfB(HB+fEcZ z?e!L)0HbPf+A5zUvWI$TVqS_ZCHsgq5X$`Hco8&8ff&Z|SP+Q;p-8bD1iwKhNIx<_ zs!RL2i%P8X$w-M|&(lh*MzERNWHSaoMO$6d_^BPO+MreAL9B}!dc7X$t45rOGMK@j z88)?LjUvfyt(R$+cJieq=FrcCSLNT;d)ZiJ%37ZPTCB#|O%?B12+*o?5?fW}Qz#aB z1yo9tyvyD2g>6M43mviEYTZgSpH=lF`BjyL?LXx9*3{P}eeUj&q zGw4kwy+b#J)L`XIC(tBFiHdHh;~I2?(o_#TGIF&+J-TJOqb$;)X^DK7)&u% z`9zFJ5UHE0G(3e@pdn4xp(V&tYHQmtr7=p$ahh?FlisUZq6c1^KS_0iKZSxnP2?Cz zVzwQ>-C2*sqn1`^dXdvk+g?LYK{iY-NKQz5Dbg+$z-v&$!yiR6`Uz_B6dU?R4pWi8 ztap@Um2%75d1Nu<9b}LUZ{0S#dNn z7-amG8XIUq)C7fPg61LXs^AiALU1oA4s1B)Ow8JY`Gk<)U(b zEEY355Xd*T@vGdz%FCXXoyBVT3vPrgQ#a3*tW%+kvvp*SI#NBS+*oe)G z%JojK5EuGWEQh&@gDHvNh zjp;2Qf>>EC_4mf5cqA^c^1BFss&eCxuhAOzQF}B)J=hoMLNU3gA2|Q$+ zO4#-);c|tX?NzaW#`$9vc-{3%6jrYCb?;!eELv_547_i)vlb*8UqXz?L}BCx-R!HM zM8>$@eeCk5UJ&zVPM?th!{f*y&itvFOaMHzG76>z>ks~_d0>w@DN@* zFb4)+gBPySgLs52tgL1>3D>FJ!9fiiGcWlKu!cWcYYmSe zOuL;pnTGTTSjG&LNS+W@Hp$lkyJd_WA_yI$=#kP5Pysq1e5W`#PcTi^R6-201~@FS z8|6XCBmDM-w3m^#?cg9EN}HoRLzywPK2^Tu7y@85S4V)DcUqnce@8_`Or8n`j0Iw( zF;^lIGZ`726b568!&e&luyG-s@L{8zB^jJCkQvg}L1%iWv4dvH6Y}GOW|=o40LRq6 zt@L}cM}F{atnmG&F7;g_JCe|c{8&EY0CXj_J3dsxFlG;Zn9{)zb4QqBpbcPcLd>KE z9m%7IlOE1@T3^|cSej*kzTeoG-Rb9>I#9sg*MQJ_*J!bIg3uuI{QgCNRsw#)-k0Jbn2?$9x>v1t;a$ptA<766cC1pa_1L z$RQ%ogu>+!oZOKUU`#Qx{z%Vi5qSp~Cegcohc8pVCJ318OAgMp{!NE35w}5v5qX1% zN5loe@lS@QSz;U~LaRvfgS_OkM2!-)m8d@hyb+R+_YqO*WO}p#MVryf>I_l{(MixO z63HIa#)+$er@h}C2NP5?{AK#|H6pJNks(Y_ZQIsmO$Mu1(eNyNl}-;hrIa{vXp6(& zFig?mW;;wFL}^TyAT5JBjmjB{6PJX@CbaFqKera*5*8KRN^!ESculu#;Pyg*-?|tlt->fzH7w`Nt zzx0h-^S9OSWiFriY7Lq8HCpWJAkDw4eg4fKPmkA{-@pD3|9CbWt2I9}_1(Wxj=xiD z{`iM4E|rgYpC{JyiaDvGV%g{?{G)?sfXE1Jd0!_h!g(Ike((Z%%ma z`GuC~PRy-@EpJYq`*GDom{QDjM=fiRI{#@$JYcjg-Dl6pc#^1Aw>UV}thgQT z8EItQv27e90i-9YPw{iKE%KhkXa?aDU@`8>!j`-wN-`MHxs~2eWIJeZMWS`EnvUOC z!@UIg0@sPG(+p*4xwGq$ABb6kc1cFo(`%zi*?Q9#K;N^FMtVyp5^K9IF9Y}ynvmdz zyaq`@Avs)LIVlfB&LMr0OW~Dc$=VklDiPLI>7TO z%6yT!X-r7;1T8r2kmEo=GE4I96lQxnBjGS6CFa=_mYsuVD1uy6UOmXGF1qf*8=xny v!1O(;Wtbyo$vkF`jDJ?SR<4%ImQJB^9QYg*h}_U8bsCe!b(w`LjH3F#_Oiop literal 0 HcmV?d00001 diff --git a/src/utils/pretrain-configs/sen12ms-small.sh b/src/utils/pretrain-configs/sen12ms-small.sh new file mode 100644 index 0000000..a2778c9 --- /dev/null +++ b/src/utils/pretrain-configs/sen12ms-small.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# COPY THIS SCRIPT AND SET ALL OF THE VARIABLES + + +############### +# PRETRAINING # +############### + +# shortname: the short name used to reference the dataset +export shortname="imagenet" + +# the RGB pixel means/std of the dataset, +# DON'T just use the default if you don't know it! +# Compute using: ./compute-dataset-pixel-mean-std.py +export pixel_means="0.5,0.6,0.7" +export pixel_stds="0.1,0.2,0.3" + +# how many iterations of pretraining? (each iter is one minibatch of 256) +# with basetraining +export bt_iters="50,500,5000,50000" +# without basetraining +export no_bt_iters="5000,50000,100000,200000" + +# We pretrain on both training and validation sets +# BUT NOT TEST!!! +# This file should have a list of all images for pretraining +# i.e. the train and val set +# it should NOT have labels (you'll get an error if it does) +export train_val_combined_list_path="data/imagenet/meta/train+val.txt" + +# the list of images just for training (used for the linear evaluation) +# NEEDS labels +export train_list_path="data/imagenet/meta/train.txt" +# the list of images just for training (used for val on the linear evaluation) +# NEEDS labels +export val_list_path="data/imagenet/meta/val.txt" +# the list of images just for training (used for test on the linear evaluation) +# NEEDS labels +export test_list_path="data/imagenet/meta/test.txt" + +# the base data path that the image lists reference +export base_data_path="data/imagenet" + +# +# OPTIONAL - only change if you know what you're doing ;) +# + +# 224 is a standard, but may not be appropriate for you data +export crop_size="224" + +# basetrain weights, update this array with experiments you're running +export basetrain_weights=( + # standard moco basetrain + "data/basetrain_chkpts/moco_v2_800ep.pth" + + # supervised basetrain + "data/basetrain_chkpts/imagenet_r50_supervised.pth" + + # no basetrain + "" +) + +######## +# Eval # +######## + +# COMMENT OUT THIS SECTION IF YOU DO NOT WANT TO GEN EVAL CONFIGS + +# assuming the linear/semi-sup is a image classification +# (need to create your own config for other eval problems) +export num_classes="10" # eg 10 for cifar-10 + +# resize images to this size before taking a center crop of crop_size (defined above) +export test_precrop_size="256" + +## NOTE: only change these if you know what you're doing +export linear_iters="5000" + +export linear_lr_drop_iters="1651,3333" # when we drop the LR by 10 (1/3 and 2/3) +export linear_lr='30' # linear layer learning rate + +# number of times to run the linear eval +export linear_reruns=3 + + +############ +# Finetune # +############ + +export ft_num_train_labels="1000,all" # do 1000 label and all train finetune evaluation +# TODO(cjrd) add number of finetune reruns (use different datasets of 100 and 1k labels) + +# learning rates +export ft_lrs="0.01,0.001" + +# finetuning amount when done by epochs +export ft_by_epoch="90" +export ft_by_epoch_lr_steps="30,60" + +# finetuning amount when done by iters +export ft_by_iter="2500" +export ft_by_iter_lr_steps="833,1667" + +########## +# Extras # +########## + +# you may need to reduce this number if your cpu load is too high +export workers_per_gpu=2 + +# Uncomment if using multiclass problem +# export dataset_type="AUROCDataset" +# export image_head_class_type="ImageListMultihead" +# export bce_string="use_bce_loss=True" +# # map the input values to classes (see chexpert.sh) +# export class_map="class_map={'0.0': '0', '-1.0': '0', '': '0', '1.0': '1'}" +# export eval_params="dict()" From de661c0c7a21e4f1a7a35eda2ce0663556351531 Mon Sep 17 00:00:00 2001 From: Colorado Reed Date: Tue, 20 Apr 2021 10:11:20 -0700 Subject: [PATCH 57/59] submodule branch --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 742d048..004240f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "SEN12MS"] path = SEN12MS url = https://github.com/Berkeley-Data/SEN12MS.git - branch = taeil + branch = ken [submodule "OpenSelfSup"] path = OpenSelfSup url = https://github.com/Berkeley-Data/OpenSelfSup.git From 368aa0a2b60c5b0cf27092cbf51c9e8d061548b0 Mon Sep 17 00:00:00 2001 From: Colorado Reed Date: Tue, 20 Apr 2021 10:33:18 -0700 Subject: [PATCH 58/59] removed submodules --- .gitmodules | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.gitmodules b/.gitmodules index 004240f..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +0,0 @@ -[submodule "SEN12MS"] - path = SEN12MS - url = https://github.com/Berkeley-Data/SEN12MS.git - branch = ken -[submodule "OpenSelfSup"] - path = OpenSelfSup - url = https://github.com/Berkeley-Data/OpenSelfSup.git From 741f6b225c8d62f7e9b2f8aa94eb85323d3bb5a2 Mon Sep 17 00:00:00 2001 From: Colorado Reed Date: Tue, 20 Apr 2021 10:34:48 -0700 Subject: [PATCH 59/59] rm submod --- OpenSelfSup | 1 - SEN12MS | 1 - 2 files changed, 2 deletions(-) delete mode 160000 OpenSelfSup delete mode 160000 SEN12MS diff --git a/OpenSelfSup b/OpenSelfSup deleted file mode 160000 index add4f80..0000000 --- a/OpenSelfSup +++ /dev/null @@ -1 +0,0 @@ -Subproject commit add4f806fa4ca9128975e9688d81e621def47334 diff --git a/SEN12MS b/SEN12MS deleted file mode 160000 index f824eeb..0000000 --- a/SEN12MS +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f824eebb94e6e08b211cf963d6dc768c82c3bd08