Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4861210
minimal environment with up2date packages
floschne Dec 8, 2020
5c6eeb7
updated readme
floschne Dec 8, 2020
fbbdb81
ignored data and models
floschne Dec 9, 2020
646f2b3
ignored jupyer stuff
floschne Dec 9, 2020
3a0fd5f
- commented out ndcg and i2t stuff
floschne Dec 10, 2020
121bf54
added flags for i2t t2i and gpu
floschne Dec 14, 2020
0ed0730
added timing outputs for evaluation
floschne Dec 14, 2020
cc9f94d
fixed flags for i2t and t2i
floschne Dec 14, 2020
6f562b1
improved code readability by renaming some variable names and adding …
floschne Dec 28, 2020
d0a175d
exiting program if CUDA_VISIBLE_DEVICES is not set
floschne Dec 28, 2020
fb63f91
improved code readability by renaming some variable names and adding …
floschne Dec 30, 2020
4237815
first working (but not fully optimized) IR Inference
floschne Dec 30, 2020
cbbc327
further optimized computation time by only computing the query embedd…
floschne Dec 30, 2020
5798289
CocoImageRetrievalDataset is now inheriting from torch.data.Dataset
floschne Dec 30, 2020
372f588
Splitted computation of img and txt embeddings in TERAN
floschne Dec 31, 2020
deb7cd0
implemented pre-computation of img embeddings
floschne Dec 31, 2020
8b9e3ff
using pre-computed image embeddings
floschne Dec 31, 2020
6c697dc
optimized loading of per-computed embeddings
floschne Dec 31, 2020
3bf88b5
minor bugfix
floschne Dec 31, 2020
9f17dfe
modularized code a bit
floschne Jan 3, 2021
915dcbd
own fn to load teran
floschne Jan 3, 2021
60a3644
Merge pull request #1 from floschne/first_ir_experiments
floschne Jan 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@
*.ipynb_checkpoints
*.json
*.pth.tar


.idea
data
pretrained_models
*.tar
*.ipynb
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ conda activate teran
export PYTHONPATH=.
```

2.1 Setup minimal python environment for CUDA 10.1 using conda:
```
conda env create --file environment_min.yml
conda activate teran
export PYTHONPATH=.
```
## Get the data
1. Download and extract the data folder, containing annotations, the splits by Karpathy et al. and ROUGEL - SPICE precomputed relevances for both COCO and Flickr30K datasets:

Expand Down
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .data import d
63 changes: 63 additions & 0 deletions configs/teran_coco_MrSw_IR.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
dataset:
name: 'coco'
images-path: 'data/coco/images' # not needed if using pre-extracted bottom-up features
data: 'data'
restval: True
pre-extracted-features: False

image-retrieval:
dataset: 'coco' # for now only coco support
split: 'test' # we can remove this in later versions
num_imgs: 5000
batch_size: 100 # 100 takes ~10s; 1000 takes ~14s to encode the data (compute the TE outputs)
pre_extracted_img_features_root: 'data/coco/features_36'
create_query_batch: False
alignment_mode: 'MrSw'
use_precomputed_img_embeddings: False
pre_computed_img_embeddings_root: 'data/coco/pre_computed_embeddings'

text-model:
name: 'bert'
pretrain: 'bert-base-uncased'
word-dim: 768
extraction-hidden-layer: 6
fine-tune: True
pre-extracted: False
layers: 0
dropout: 0.1

image-model:
name: 'bottomup'
pre-extracted-features-root: 'data/coco/features_36'
transformer-layers: 4
dropout: 0.1
pos-encoding: 'concat-and-process'
crop-size: 224 # not used
fine-tune: False
feat-dim: 2048
norm: True

model:
name: 'teran'
embed-size: 1024
text-aggregation: 'first'
image-aggregation: 'first'
layers: 2
exclude-stopwords: False
shared-transformer: False
dropout: 0.1

training:
lr: 0.00001 # 0.000006
grad-clip: 2.0
max-violation: True
loss-type: 'alignment'
alignment-mode: 'MrSw'
measure: 'dot'
margin: 0.2
bs: 40
scheduler: 'steplr'
gamma: 0.1
step-size: 20
warmup: null
warmup-period: 1000
63 changes: 63 additions & 0 deletions configs/teran_coco_MrSw_IR_PreComp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
dataset:
name: 'coco'
images-path: 'data/coco/images' # not needed if using pre-extracted bottom-up features
data: 'data'
restval: True
pre-extracted-features: False

image-retrieval:
dataset: 'coco' # for now only coco support
split: 'test' # we can remove this in later versions
num_imgs: 5000
batch_size: 100 # 100 takes ~10s; 1000 takes ~14s to encode the data (compute the TE outputs)
pre_extracted_img_features_root: 'data/coco/features_36'
create_query_batch: False
alignment_mode: 'MrSw'
use_precomputed_img_embeddings: True
pre_computed_img_embeddings_root: 'data/coco/pre_computed_embeddings'

text-model:
name: 'bert'
pretrain: 'bert-base-uncased'
word-dim: 768
extraction-hidden-layer: 6
fine-tune: True
pre-extracted: False
layers: 0
dropout: 0.1

image-model:
name: 'bottomup'
pre-extracted-features-root: 'data/coco/features_36'
transformer-layers: 4
dropout: 0.1
pos-encoding: 'concat-and-process'
crop-size: 224 # not used
fine-tune: False
feat-dim: 2048
norm: True

model:
name: 'teran'
embed-size: 1024
text-aggregation: 'first'
image-aggregation: 'first'
layers: 2
exclude-stopwords: False
shared-transformer: False
dropout: 0.1

training:
lr: 0.00001 # 0.000006
grad-clip: 2.0
max-violation: True
loss-type: 'alignment'
alignment-mode: 'MrSw'
measure: 'dot'
margin: 0.2
bs: 40
scheduler: 'steplr'
gamma: 0.1
step-size: 20
warmup: null
warmup-period: 1000
59 changes: 59 additions & 0 deletions configs/teran_inf_coco_MrSw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
dataset:
name: 'coco'
images-path: 'data/coco/images' # not needed if using pre-extracted bottom-up features
data: 'data'
restval: True
pre-extracted-features: False

text-model:
name: 'bert'
pretrain: 'bert-base-uncased'
word-dim: 768
extraction-hidden-layer: 6
fine-tune: True
pre-extracted: False
layers: 0
dropout: 0.1

#text-model:
# name: 'gru'
# word-dim: 300
# fine-tune: True
# pre-extracted: False
# layers: 1

image-model:
name: 'bottomup'
pre-extracted-features-root: 'data/coco/features_36'
transformer-layers: 4
dropout: 0.1
pos-encoding: 'concat-and-process'
crop-size: 224 # not used
fine-tune: False
feat-dim: 2048
norm: True

model:
name: 'teran'
embed-size: 1024
text-aggregation: 'first'
image-aggregation: 'first'
layers: 2
exclude-stopwords: False
shared-transformer: False
dropout: 0.1

training:
lr: 0.00001 # 0.000006
grad-clip: 2.0
max-violation: True
loss-type: 'alignment'
alignment-mode: 'MrSw'
measure: 'dot'
margin: 0.2
bs: 40
scheduler: 'steplr'
gamma: 0.1
step-size: 20
warmup: null
warmup-period: 1000
Loading