processed_dataset['train'] = dataset['train'].map(
preprocess_function_test,
batched=True,
batch_size=100000,
num_proc=1,
remove_columns=dataset["train"].column_names,
load_from_cache_file=False,
desc="Running tokenizer on dataset",
keep_in_memory=True,
)