From 187b45b8474e939974590467cb78520429a19356 Mon Sep 17 00:00:00 2001 From: Sugam Date: Sun, 18 Jan 2026 15:24:11 +0400 Subject: [PATCH 1/2] add letter count easy task --- .../experiments/easy-tasks/README.md | 7 ++ .../experiments/easy-tasks/letter-count.yaml | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 welt_training/experiments/easy-tasks/letter-count.yaml diff --git a/welt_training/experiments/easy-tasks/README.md b/welt_training/experiments/easy-tasks/README.md index 340b1e1..2393eae 100644 --- a/welt_training/experiments/easy-tasks/README.md +++ b/welt_training/experiments/easy-tasks/README.md @@ -32,3 +32,10 @@ docker run -it --rm --gpus all \ export WANDB_PROJECT="ocr" welt-train welt_training/experiments/easy-tasks/ocr.yaml ``` + +### Letter Count + +```bash +export WANDB_PROJECT="letter-count" +welt-train welt_training/experiments/easy-tasks/letter-count.yaml +``` diff --git a/welt_training/experiments/easy-tasks/letter-count.yaml b/welt_training/experiments/easy-tasks/letter-count.yaml new file mode 100644 index 0000000..5ffc703 --- /dev/null +++ b/welt_training/experiments/easy-tasks/letter-count.yaml @@ -0,0 +1,66 @@ +# Model Setup +image_encoder_model_name_or_path: null +bytes_encoder_model_name_or_path: prajjwal1/bert-tiny +latent_transformer_model_name_or_path: sbintuitions/tiny-lm +bytes_decoder_model_name_or_path: sign/utf8-lm-tiny +load_pretrained: false + +# Dataset setup +dataset_name: magus4450/english-words-small-letter-count +dataset_text_template: + - "\x0E{text}\x0F " + - "{count}" +max_sequence_length: 128 +max_word_length: 16 + +max_eval_samples: 32 + +# Data Loader +dataloader_num_workers: 8 +dataloader_prefetch_factor: 4 +dataloader_pin_memory: true +dataloader_persistent_workers: true + +# Training setup +remove_unused_columns: false # Necessary +per_device_train_batch_size: 32 +per_device_eval_batch_size: 32 +auto_find_batch_size: true +output_dir: ./output/letter-count +overwrite_output_dir: true +do_train: true + +# Optimizer setup +optim: dion +weight_decay: 0.001 +max_steps: 10000 +warmup_ratio: 0.05 +learning_rate: 6.0e-4 + +# Evaluation +do_eval: true +eval_on_start: true +eval_strategy: steps +eval_steps: 100 +metric_for_best_model: chrf # Using generation-based metric +eval_metrics: [sacrebleu, chrf, wer] # Generation-based evaluation metrics +predict_with_generate: true +generation_max_length: 50 # Max tokens/words to generate during evaluation +log_samples: 5 # Number of sample predictions to log + +# Logging +logging_steps: 10 +logging_strategy: steps +include_tokens_per_second: true +include_num_input_tokens_seen: true +report_to: wandb + +# FLOPS profiling +profile_flops: true +flops_profile_steps: 500 +flops_warmup_steps: 10 +flops_active_steps: 10 + +# Dtype +bf16: true +dtype: bfloat16 From fc6355522f270b1d4b1592694bab6dd8310447d5 Mon Sep 17 00:00:00 2001 From: Sugam Karki <71115441+Magus4450@users.noreply.github.com> Date: Mon, 19 Jan 2026 09:32:51 +0400 Subject: [PATCH 2/2] Update welt_training/experiments/easy-tasks/letter-count.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- welt_training/experiments/easy-tasks/letter-count.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/welt_training/experiments/easy-tasks/letter-count.yaml b/welt_training/experiments/easy-tasks/letter-count.yaml index 5ffc703..7d2d344 100644 --- a/welt_training/experiments/easy-tasks/letter-count.yaml +++ b/welt_training/experiments/easy-tasks/letter-count.yaml @@ -3,7 +3,7 @@ image_encoder_model_name_or_path: null bytes_encoder_model_name_or_path: prajjwal1/bert-tiny latent_transformer_model_name_or_path: sbintuitions/tiny-lm bytes_decoder_model_name_or_path: sign/utf8-lm-tiny -load_pretrained: false +load_pretrained: true # Dataset setup dataset_name: magus4450/english-words-small-letter-count