Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
184 commits
Select commit Hold shift + click to select a range
c61fa71
rqvae & k-core research
peterochek Dec 15, 2024
173944c
setup python env for dev
peterochek Dec 15, 2024
7e11d9a
use faiss as init codebook
peterochek Dec 18, 2024
d1c2a9d
use conda env
peterochek Dec 18, 2024
900d647
handle collisions
peterochek Dec 18, 2024
7ff12a0
add review md
peterochek Dec 18, 2024
7c827a3
add rqvae dataset
peterochek Dec 22, 2024
51410ae
rqvae training implemented
peterochek Dec 23, 2024
7e17fd6
revert unneded changes
peterochek Dec 23, 2024
1186e4f
add eval pass for rqvae (from checkpoint)
peterochek Dec 23, 2024
fba00ba
e2e model
peterochek Dec 23, 2024
8ec8ea5
add todo
peterochek Dec 23, 2024
a659767
fix collapse (correct infer)
peterochek Dec 24, 2024
36773ec
Merge pull request #11 from NonameUntitled/peterochek/end2end_tiger
peterochek Dec 24, 2024
2cf409e
add tiger draft, todospks, review marks
peterochek Dec 24, 2024
f63d617
fix item_id mapping
peterochek Dec 24, 2024
55fc204
use idx instead of mapping in dataset
peterochek Dec 24, 2024
f401727
fix tiger & get autoregression
peterochek Dec 24, 2024
155b224
update reviews
peterochek Dec 28, 2024
0ab69dd
add codebook embeddings
peterochek Dec 30, 2024
e24e0a9
draft seq2seq model run
peterochek Dec 30, 2024
411e52f
speed up semantic id fetching
peterochek Dec 31, 2024
28904f7
fix infer (20 items) & logits
peterochek Dec 31, 2024
fa42428
tweak config
peterochek Dec 31, 2024
9a95b95
clear outputs from main.ipynb
peterochek Jan 1, 2025
1561d05
Merge remote-tracking branch 'origin/master' into peterochek/reproduc…
peterochek Jan 2, 2025
438560d
tiger next_item_predictions
peterochek Jan 2, 2025
3a59be8
add coverage & sequence dataset
peterochek Jan 2, 2025
e78a5ae
unify datasets
peterochek Jan 3, 2025
b0809f5
small fxs
peterochek Jan 3, 2025
2f70ea7
remove multi domain scientific dataset
peterochek Jan 3, 2025
d35f291
add item<->embedding mapping
peterochek Jan 3, 2025
a097a0e
broken grid config (no eval / validation callbacks)
peterochek Jan 4, 2025
93064c6
tiger_new model
peterochek Jan 4, 2025
eb88dfa
fix tiger (separate layers for decoder)
peterochek Jan 4, 2025
b45bf39
add bos to tiger
peterochek Jan 4, 2025
6e3d32e
use mps & fix encoder pos embeddings
peterochek Jan 5, 2025
e40c321
fixed position embeddings
peterochek Jan 5, 2025
fee9aaf
remove todos
peterochek Jan 5, 2025
92f584b
add remark
peterochek Jan 5, 2025
c339731
fix item_embeddings (now take from rqvae)
peterochek Jan 14, 2025
b6ddb0f
remove todo
peterochek Jan 15, 2025
e13947f
add collision solver
peterochek Jan 15, 2025
37a22d8
add draft code (nan breaking)
peterochek Jan 19, 2025
c2e7ee2
fix NaNs
peterochek Jan 19, 2025
465702f
add new collsion solver
peterochek Jan 22, 2025
9cd9b8c
upudate tiger
peterochek Jan 22, 2025
5a913d9
fix solver init
peterochek Jan 24, 2025
5d15b32
sasrec compare
peterochek Jan 24, 2025
b835293
update collsion solver & use dedup loss
peterochek Jan 26, 2025
dde3835
optimize item embs
peterochek Jan 26, 2025
83e99a2
add caching in trie
peterochek Jan 26, 2025
48172f5
add autoregressive
peterochek Jan 26, 2025
eec5530
use collision solver
peterochek Jan 26, 2025
d155767
fix remarks
peterochek Jan 30, 2025
10aa188
fixed flattened
peterochek Jan 30, 2025
ad3e0cc
add trie draft
peterochek Jan 30, 2025
a699b2b
fix sasrec baseline
peterochek Feb 2, 2025
fe18a56
create init tree structure
peterochek Feb 2, 2025
06f2af3
draft trie
peterochek Feb 3, 2025
db3aaf0
working trie
peterochek Feb 8, 2025
3bbafa9
working tiger (1 min / eval)
peterochek Feb 8, 2025
8ddf8f2
add sasrec freezed model
peterochek Feb 8, 2025
705bad6
small_fixes + todo
peterochek Feb 8, 2025
098982b
TODO: add projector to tiger
peterochek Feb 9, 2025
cebe016
TODO ask about last / next item predicition
peterochek Feb 13, 2025
e5fe978
use last item prediciton
peterochek Feb 13, 2025
91b5649
fix collision solver
peterochek Feb 13, 2025
5943aa5
fix if inner tree is empty
peterochek Feb 13, 2025
11937a3
use int in infer after training (why?)
peterochek Feb 13, 2025
032b5ba
add sasrec for last
peterochek Feb 14, 2025
31449be
fix for last
peterochek Feb 14, 2025
e40c8ae
return to next_item_pred for sasrec
peterochek Feb 14, 2025
0137046
remove last_pred sasrec
peterochek Feb 14, 2025
c25e49d
fix for sasrec weight only
peterochek Feb 14, 2025
b62e11d
fix trie device
peterochek Feb 14, 2025
a70fc78
fix tiger weights only
peterochek Feb 14, 2025
e21e517
revert trie device
peterochek Feb 14, 2025
6c69111
return collion solver
peterochek Feb 14, 2025
0ad4046
add todos & remove projector
peterochek Feb 15, 2025
8c07876
gpu
peterochek Feb 15, 2025
30ce91c
move trie
peterochek Feb 15, 2025
9591183
debug
peterochek Feb 15, 2025
640dada
fix for sasrec last_item
peterochek Feb 15, 2025
b461af9
move trie to device
peterochek Feb 15, 2025
e31a976
add sequence full dataset
peterochek Feb 16, 2025
a30432f
sasrec semantic
peterochek Feb 16, 2025
5da9423
fix sasrec
peterochek Feb 16, 2025
83a97c2
sasrec semantic draft
peterochek Feb 16, 2025
926e0c4
fix sasrec
peterochek Feb 16, 2025
491a940
fix sasrec native
peterochek Feb 16, 2025
d6e9138
better trie query
peterochek Feb 16, 2025
62924ec
rename todopk
peterochek Feb 16, 2025
de106df
fix decoder codebook
peterochek Feb 16, 2025
e1975a2
тест rqvae
iskbaga Feb 15, 2025
f568d3c
вынес в разные места версии деревьев
iskbaga Feb 16, 2025
c97e0a9
add sasrec_semantic
peterochek Feb 16, 2025
dd77cdc
unfreeze embs
peterochek Feb 16, 2025
ac5f123
попытка ускорить SimplifiedTree (_get_ids оказался медленней)
iskbaga Feb 16, 2025
228edbd
убрал лишний assert
iskbaga Feb 16, 2025
5a5d039
привел к общему виду деревья плюс описание к солверу
iskbaga Feb 19, 2025
c9a0960
поменял trie на simplified tree
iskbaga Feb 19, 2025
407be2b
возможность не учитывать остатки
iskbaga Feb 19, 2025
fd89947
теперь в ините деревьев rqvae а не emb table
iskbaga Feb 19, 2025
1e13ae3
тест с другим деревом
iskbaga Feb 20, 2025
6250cbe
CollisionSolver теперь на cuda
iskbaga Feb 20, 2025
0ff42ae
Merge remote-tracking branch 'origin/iskbaga' into peterochek/reprodu…
peterochek Feb 21, 2025
ec80d50
use single codebook for training & query
peterochek Feb 21, 2025
6f8d757
move init
peterochek Feb 21, 2025
42cbf1c
unfreeze
peterochek Feb 21, 2025
b2e0520
add parametera to semantic sasrec
peterochek Feb 22, 2025
3fd32b6
fix sasrec
peterochek Feb 22, 2025
59c760f
fix sasrec semantic
peterochek Feb 22, 2025
b0ac4d1
add debugs
peterochek Feb 22, 2025
5f5adeb
remove debugging
peterochek Feb 22, 2025
26030d1
debug
peterochek Feb 22, 2025
df7a445
debug
peterochek Feb 22, 2025
cca0d7c
fixes
peterochek Feb 22, 2025
a97acc2
semantic last refactors (cat passes grads)
peterochek Feb 22, 2025
cf30973
TODOPK + loss fixes
peterochek Feb 22, 2025
769c514
cross entropy loss in Tiger, remove mps
peterochek Feb 22, 2025
b3193ea
fix decoder
peterochek Feb 22, 2025
c7a3e93
honest sasrec
peterochek Feb 23, 2025
3c86115
fix sasrec freezed
peterochek Feb 23, 2025
632045f
sasrec freezed learnable
peterochek Feb 23, 2025
8e96a9b
empty cuda
peterochek Feb 23, 2025
95e816c
freeze sasrec
peterochek Feb 24, 2025
61b5c54
fix cuda ordinal
peterochek Mar 1, 2025
6bad6ce
remove laplas logs
peterochek Mar 1, 2025
7083792
Merge remote-tracking branch 'origin/master' into peterochek/reproduc…
peterochek Mar 1, 2025
4077b05
ruff format
peterochek Mar 1, 2025
e96ee1f
refactor sasrec semantic
peterochek Mar 1, 2025
aa01179
unify semantic sasrec
peterochek Mar 1, 2025
930e2af
fixes for fast exps
peterochek Mar 1, 2025
181817b
refactor tiger init_embeddings
peterochek Mar 1, 2025
083176c
move decoder pos embs
peterochek Mar 1, 2025
0e878ee
small refactors
peterochek Mar 1, 2025
af08f7d
test sasrec-like eval in tiger
peterochek Mar 1, 2025
870f3b4
remove unneeded codebook in semantic
peterochek Mar 2, 2025
7887d20
Merge pull request #20 from CTLab-ITMO/peterochek/reproduce_tiger_tmp
peterochek Mar 2, 2025
532dde0
add causal mask
peterochek Mar 2, 2025
cf6bdbe
sasrec real draft
peterochek Mar 5, 2025
91e49ff
add sasrec real / semantic
peterochek Mar 5, 2025
b0b893c
add uid embeds
peterochek Mar 5, 2025
b25b001
fix random sampler
peterochek Mar 5, 2025
7b6b3d0
scientific full
peterochek Mar 8, 2025
2f58166
semantic learnable uid loo
peterochek Mar 8, 2025
49dcb96
freezed sasrec semantic
peterochek Mar 8, 2025
4853002
freezed loo
peterochek Mar 8, 2025
f3c3c48
learnable loo
peterochek Mar 8, 2025
785b1e3
loo & sequence dataset fixes
peterochek Mar 8, 2025
5fc4317
add validation size logging
peterochek Mar 8, 2025
2f484cb
uid
peterochek Mar 8, 2025
d3d05d6
sequence dfs
peterochek Mar 8, 2025
83f07a6
use second cuda
peterochek Mar 8, 2025
521fe86
revert cuda & remove freezed
peterochek Mar 8, 2025
3c8a999
update all sasrec model (last item left TODO)
peterochek Mar 12, 2025
d021676
adapt full & in batch to last item
peterochek Mar 12, 2025
9182a55
fix configs for sasrec's last item
peterochek Mar 12, 2025
98b45d5
rename sasrec semantic config
peterochek Mar 12, 2025
63f20ea
loo configs
peterochek Mar 12, 2025
37a8dbc
add loss fix (1e-9)
peterochek Mar 15, 2025
ecb2113
sequence on fixed sasrec real
peterochek Mar 15, 2025
c429951
correct sasrec (use codebook only)
peterochek Mar 23, 2025
0eb0b13
more frequent checks
peterochek Mar 23, 2025
1ebb5b9
fix event id indexing
peterochek Mar 23, 2025
162745b
faster semantic train
peterochek Mar 23, 2025
38c6fd3
add uv deps
peterochek Apr 6, 2025
4cf39c8
Merge pull request #18 from CTLab-ITMO/peterochek/reproduce_tiger
peterochek Jun 8, 2025
92f28b3
Revert "ruff format"
peterochek Jun 8, 2025
a1e6f10
Revert "ruff format"
peterochek Jun 8, 2025
ef7f50c
Merge remote-tracking branch 'origin/peterochek/rqvae' into peteroche…
peterochek Jun 8, 2025
91b3f69
bump deps
peterochek Jun 8, 2025
d5c4abb
add conditional torch cuda deps
peterochek Jun 8, 2025
ad67f7f
fix small issues after merge
peterochek Jun 8, 2025
a1b5b45
add debug lines for scientific dataset
peterochek Jun 8, 2025
26d7247
add letter dataset
peterochek Jun 9, 2025
356c072
add letter batch processor
peterochek Jun 9, 2025
488a94c
comment out dev thing
peterochek Jun 9, 2025
7ecb8d9
remove uv.lock file
peterochek Jun 9, 2025
f69db6b
Merge remote-tracking branch 'origin/peterochek/rqvae' into peteroche…
peterochek Jun 9, 2025
165a996
remove unneeded +1 call in range
peterochek Jun 9, 2025
5e6e325
Merge pull request #23 from CTLab-ITMO/peterochek/letter-rqvae-data
peterochek Jun 9, 2025
b0cd74d
add mapped tensor
peterochek Jun 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
.idea
__pycache__
data/*
tensorboard_logs/*
*tensorboard_logs*/*
saved_logs/*
.venv
papers
checkpoints/*
*.prof
uv.lock
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
186 changes: 186 additions & 0 deletions configs/train/letter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
{
"experiment_name": "letter_data",
"best_metric": "validation/ndcg@20",
"train_epochs_num": 100,
"dataset": {
"type": "letter_full",
"path_to_data_dir": "../data",
"name": "Beauty_letter",
"max_sequence_length": 50,
"samplers": {
"type": "last_item_prediction",
"negative_sampler_type": "random"
},
"beauty_inter_json": "../../LETTER/data/Beauty/Beauty.inter.json"
},
"dataloader": {
"train": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "letter",
"beauty_index_json": "../../LETTER/data/Beauty/Beauty.index.json",
"semantic_length": 4
},
"drop_last": true,
"shuffle": true
},
"validation": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "letter",
"beauty_index_json": "../../LETTER/data/Beauty/Beauty.index.json",
"semantic_length": 4
},
"drop_last": false,
"shuffle": false
}
},
"model": {
"type": "tiger",
"rqvae_train_config_path": "../configs/train/rqvae_train_config.json",
"rqvae_checkpoint_path": "../checkpoints/rqvae_beauty_final_state.pth",
"embs_extractor_path": "../data/Beauty/rqvae/data_full.pt",
"sequence_prefix": "item",
"predictions_prefix": "logits",
"positive_prefix": "labels",
"labels_prefix": "labels",
"embedding_dim": 64,
"num_heads": 2,
"num_encoder_layers": 2,
"num_decoder_layers": 2,
"dim_feedforward": 256,
"dropout": 0.3,
"activation": "gelu",
"layer_norm_eps": 1e-9,
"initializer_range": 0.02
},
"optimizer": {
"type": "basic",
"optimizer": {
"type": "adam",
"lr": 0.001
},
"clip_grad_threshold": 5.0
},
"loss": {
"type": "composite",
"losses": [
{
"type": "ce",
"predictions_prefix": "logits",
"labels_prefix": "semantic.labels",
"weight": 1.0,
"output_prefix": "semantic_loss"
},
{
"type": "ce",
"predictions_prefix": "dedup.logits",
"labels_prefix": "dedup.labels",
"weight": 1.0,
"output_prefix": "dedup_loss"
}
],
"output_prefix": "loss"
},
"callback": {
"type": "composite",
"callbacks": [
{
"type": "metric",
"on_step": 1,
"loss_prefix": "loss"
},
{
"type": "validation",
"on_step": 1024,
"pred_prefix": "logits",
"labels_prefix": "labels",
"metrics": {
"ndcg@5": {
"type": "ndcg",
"k": 5
},
"ndcg@10": {
"type": "ndcg",
"k": 10
},
"ndcg@20": {
"type": "ndcg",
"k": 20
},
"recall@5": {
"type": "recall",
"k": 5
},
"recall@10": {
"type": "recall",
"k": 10
},
"recall@20": {
"type": "recall",
"k": 20
},
"coverage@5": {
"type": "coverage",
"k": 5
},
"coverage@10": {
"type": "coverage",
"k": 10
},
"coverage@20": {
"type": "coverage",
"k": 20
}
}
},
{
"type": "eval",
"on_step": 2048,
"pred_prefix": "logits",
"labels_prefix": "labels",
"metrics": {
"ndcg@5": {
"type": "ndcg",
"k": 5
},
"ndcg@10": {
"type": "ndcg",
"k": 10
},
"ndcg@20": {
"type": "ndcg",
"k": 20
},
"recall@5": {
"type": "recall",
"k": 5
},
"recall@10": {
"type": "recall",
"k": 10
},
"recall@20": {
"type": "recall",
"k": 20
},
"coverage@5": {
"type": "coverage",
"k": 5
},
"coverage@10": {
"type": "coverage",
"k": 10
},
"coverage@20": {
"type": "coverage",
"k": 20
}
}
}
]
}
}

65 changes: 65 additions & 0 deletions configs/train/rqvae_train_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"experiment_name": "rqvae_beauty",
"train_epochs_num": 200,
"dataset": {
"type": "rqvae",
"path_to_data_dir": "../data",
"name": "Beauty",
"samplers": {
"type": "identity"
}
},
"dataloader": {
"train": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "embed"
},
"drop_last": false,
"shuffle": true
},
"validation": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "embed"
},
"drop_last": false,
"shuffle": false
}
},
"model": {
"type": "rqvae",
"embedding_dim": 512,
"hidden_dim": 64,
"n_iter": 100,
"codebook_sizes": [256, 256, 256],
"should_init_codebooks": true,
"should_reinit_unused_clusters": true,
"initializer_range": 0.02
},
"optimizer": {
"type": "basic",
"optimizer": {
"type": "adam",
"lr": 5e-5
},
"clip_grad_threshold": 5.0
},
"loss": {
"type": "rqvae_loss",
"beta": 0.25,
"output_prefix": "loss"
},
"callback": {
"type": "composite",
"callbacks": [
{
"type": "metric",
"on_step": 1,
"loss_prefix": "loss"
}
]
}
}
86 changes: 86 additions & 0 deletions configs/train/rqvae_train_grid_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"experiment_name": "rqvae_beauty_grid",
"train_epochs_num": 50,
"dataset": {
"type": "rqvae",
"path_to_data_dir": "../data",
"name": "Beauty",
"samplers": {
"type": "identity"
}
},
"dataset_params": {
},
"dataloader": {
"train": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "embed"
},
"drop_last": false,
"shuffle": true
},
"validation": {
"type": "torch",
"batch_size": 256,
"batch_processor": {
"type": "embed"
},
"drop_last": false,
"shuffle": false
}
},
"model": {
"type": "rqvae",
"input_dim": 512,
"codebook_sizes": [256, 256, 256, 256],
"should_init_codebooks": true,
"should_reinit_unused_clusters": true,
"initializer_range": 0.02
},
"model_params": {
"n_iter": [
100,
500,
2000
],
"hidden_dim": [
128,
512,
2048
]
},
"optimizer": {
"type": "basic",
"optimizer": {
"type": "adam",
"lr": 1e-4
},
"clip_grad_threshold": 5.0,
"scheduler": {
"type": "step",
"step_size": 100,
"gamma": 0.96
}
},
"optimizer_params": {
},
"loss": {
"type": "rqvae_loss",
"beta": 0.25,
"output_prefix": "loss"
},
"loss_params": {
},
"callback": {
"type": "composite",
"callbacks": [
{
"type": "metric",
"on_step": 1,
"loss_prefix": "loss"
}
]
}
}
Loading