diff --git a/examples/neuron-mapping/activation_observations.py b/examples/neuron-mapping/activation_observations.py new file mode 100644 index 0000000..ff11d7d --- /dev/null +++ b/examples/neuron-mapping/activation_observations.py @@ -0,0 +1,133 @@ +import torch +import os +import numpy as np +import random +from datetime import datetime +from taker.eval import run_evaluation +from taker.data_classes import PruningConfig, EvalConfig, RunDataHistory +from taker.activations import get_top_frac, get_midlayer_activations +from taker.model import Model +from taker.texts import infer_dataset_config +from taker.eval import evaluate_all + +print("starting activation observations") + +c = PruningConfig( + wandb_project = "testing", # repo to push results to + # model_repo = "nickypro/tinyllama-15M", + # model_repo = "facebook/opt-1.3b", + model_repo = "nickypro/llama-7b-hf-rand", + token_limit = 1000, # trim the input to this max length + run_pre_test = False, # evaluate the unpruned model + eval_sample_size = 1e3, + collection_sample_size = 1e3, + # Removals parameters + ff_frac = 0.01, # % of feed forward neurons to prune + attn_frac = 0.00, # % of attention neurons to prune + focus = "pile", # the “reference” dataset + cripple = "physics", # the “unlearned” dataset + additional_datasets=tuple(), # any extra datasets to evaluate on + recalculate_activations = False, # iterative vs non-iterative + dtype = "int4", + n_steps = 1, +) + +def set_seed(seed): + random.seed(seed) # Python random module. + np.random.seed(seed) # Numpy module. + torch.manual_seed(seed) # PyTorch random number generator for CPU. + torch.cuda.manual_seed(seed) # PyTorch random number generator for CUDA. + torch.backends.cudnn.deterministic = True # To ensure that CUDA selects deterministic algorithms. + torch.backends.cudnn.benchmark = False + +def save_data_dict( model_size: str, + data: any, + name: str ): + now = datetime.now().strftime( "%Y-%m-%d_%H:%M:%S" ) + os.makedirs( f'saved_tensors/{model_size}', exist_ok=True ) + filepath = f'saved_tensors/{model_size}/{name}-{model_size}-recent.pt' + torch.save( data, filepath ) + print( f'Saved {filepath} to {model_size}' ) + filepath = f'saved_tensors/{model_size}/{name}-{model_size}-{now}.pt' + torch.save( data, filepath ) + print( f'Saved {filepath} to {model_size}' ) + return filepath + +def load_pt_file(filepath: str): + data = torch.load(filepath) + for key in data.keys(): + print(key) + return data + + +def get_activations(c: PruningConfig, datasets: list[str]): + # Initilaise Model and show details about model + opt = Model( + c.model_size, + limit=c.token_limit, + dtype=c.dtype, + svd_attn=c.svd_attn, + use_accelerator=c.use_accelerator, + model_device=c.model_device, + mask_fn=c.mask_fn, + ) + saved_files = [] + for dataset in datasets: + midlayer_activations = get_midlayer_activations( + opt=opt, + dataset_name=dataset, + sample_size=c.collection_sample_size, + attn_mode=c.attn_mode, + collect_attn=True, + collect_ids=True, + random_subset_frac=0.01 + ) + + results = {} + results["dataset"] = dataset + results["criteria"] = midlayer_activations.raw["criteria"] + results["attn"] = midlayer_activations.raw["attn"] + results["input_ids"] = midlayer_activations.raw["input_ids"] + results["expected_ids"] = midlayer_activations.raw["expected_ids"] + + filepath = save_data_dict("llama-7b", results, f"{dataset}_activations") + print(f"file for {dataset} activations saved to: ", filepath) + saved_files.append(filepath) + + return saved_files + +all_datasets = ["biology", + "chemistry", + "civil", + "code", + "emotion", + "math", + "physics", + "pile_ArXiv", + "pile_Enron_Emails", + "pile_EuroParl", + "pile_FreeLaw", + "pile_Github", + "pile_Gutenberg", + "pile_HackerNews", + "pile_NIH_ExPorter", + "pile_PhilPapers", + "pile_PubMed_Abstracts", + "pile_PubMed_Central", + "pile_StackExchange", + "pile_Ubuntu_IRC", + "pile_USPTO_Backgrounds", + "pile_Wikipedia", + "poems"] + +test_datasets = ["physics", "math", "code", "pile_Github", "pile_HackerNews"] + +data = get_activations(c, all_datasets) + +# filepath = save_data_dict("llama-7b", data, "test_activations") +# print("file saved to: ", filepath) + +# filepath = "saved_tensors/llama-7b/test_activations-llama-7b-recent.pt" +# loaded_data = load_pt_file(filepath)["physics"] + +# print(loaded_data["attn"]) diff --git a/examples/neuron-mapping/compare.py b/examples/neuron-mapping/compare.py index 47a0f40..8449f5a 100644 --- a/examples/neuron-mapping/compare.py +++ b/examples/neuron-mapping/compare.py @@ -1,29 +1,63 @@ import torch +import datetime +import os +from taker.activations import get_top_frac -def compare_pruned_ff_criteria(cripple_repos: list[str], model_size: str, path: str="/home/ubuntu/taker-rashid/examples/neuron-mapping/saved_tensors/", focus_repo: str = "pile",): - # cripple_repos = ["physics", "bio", "code"] - directory = f"{path}{model_size}/" - suffix = "-"+model_size+"-0.01-recent.pt" + +#takes repo name, model size and timestamp as strings and returns ff_scores tensor by loading relevant file +def load_tensors_for_repo(repo, model_size="hf", timestamp="recent"): + directory = "/home/ubuntu/taker-rashid/examples/neuron-mapping/saved_tensors/"+model_size+"/" + filename = repo+"-pile-"+model_size+"-"+timestamp+".pt" + data = torch.load(directory+filename) + return data["ff_scores"], data["ff_criteria"] + + +def get_ff_criteria_for_ff_frac(repo, ff_frac): + ff_scores, _ = load_tensors_for_repo(repo) + criteria, _ = get_top_frac(ff_scores, ff_frac) + return criteria + +def compare_pruned_ff_criteria(repos, ff_fracs, model_size="hf"): ratios = {} - ratios["model_size"] = model_size - - for repo1 in cripple_repos: - #load ff_criteria from repo1 - repo1_tensors = torch.load(directory+repo1+"-"+focus_repo+suffix) - repo1_ff_criteria = repo1_tensors["ff_criteria"] - ratios[repo1] = {} - for repo2 in cripple_repos: - if repo1 == repo2: - continue - #load ff_criteria from repo2 - repo2_tensors = torch.load(directory+repo2+"-"+focus_repo+suffix) - repo2_ff_criteria = repo2_tensors["ff_criteria"] - - - matches = torch.logical_and(repo1_ff_criteria, repo2_ff_criteria) - ratio = torch.sum(matches)/torch.sum(repo1_ff_criteria) - ratios[repo1][repo2] = ratio - + for ff_frac in ff_fracs: + ratios[ff_frac] = {} + for repo1 in repos: + ratios[ff_frac][repo1] = {} + ff_criteria_repo1 = get_ff_criteria_for_ff_frac(repo1, ff_frac) + for repo2 in repos: + if repo1 == repo2: + continue + ff_criteria_repo2 = get_ff_criteria_for_ff_frac(repo2, ff_frac) + matches = torch.logical_and(ff_criteria_repo1, ff_criteria_repo2) + ratio = torch.sum(matches)/torch.sum(ff_criteria_repo1) + ratios[ff_frac][repo1][repo2] = ratio + return ratios - -print(compare_pruned_ff_criteria(["cifar20-trees", "cifar20-veh1", "cifar20-veh2"], "Cifar100", path="/home/ubuntu/tetra/taker/examples/neuron-mapping/saved_tensors/", focus_repo="cifar20-split")) \ No newline at end of file + +def load_pt_file(directory: str, filename: str): + data = torch.load(directory+filename) + for key in data.keys(): + print(key) + return data + +def save_pruning_data_dict( model_size: str, + data: any, + name: str ): + now = datetime.datetime.now().strftime( "%Y-%m-%d_%H:%M:%S" ) + os.makedirs( f'saved_tensors/{model_size}', exist_ok=True ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-recent.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-{now}.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + return filename + + + +ff_fracs = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5] +cripple_repos = ["biology", "chemistry", "physics", "math", "code", "poems", "civil", "emotion", "pile_FreeLaw", "pile_PubMed_Abstracts", "pile_PubMed_Central", "pile_NIH_ExPorter", "pile_Enron_Emails", "pile_Github", "pile_StackExchange", "pile_HackerNews", "pile_ArXiv", "pile_Wikipedia", "pile_Ubuntu_IRC", "pile_USPTO_Backgrounds", "pile_PhilPapers", "pile_EuroParl", "pile_Gutenberg", "pile_PhilPapers", "pile_EuroParl", "pile_Gutenberg"] + +ratios = compare_pruned_ff_criteria(cripple_repos, ff_fracs) +filename = save_pruning_data_dict("hf", ratios, "pruning_ratios") +print("saved pruning ratios to: ", filename) \ No newline at end of file diff --git a/examples/neuron-mapping/crossevaluate-mistral.py b/examples/neuron-mapping/crossevaluate-mistral.py new file mode 100644 index 0000000..085ef11 --- /dev/null +++ b/examples/neuron-mapping/crossevaluate-mistral.py @@ -0,0 +1,220 @@ +import torch +import os +from datetime import datetime +from taker.eval import run_evaluation +from taker.data_classes import PruningConfig, EvalConfig +from taker.activations import get_top_frac +from taker.model import Model +from taker.texts import infer_dataset_config + + +#most of pruningconfig is not used, but some eval functions use this as copied from elsewhere. +c = PruningConfig( + wandb_project = "testing", # repo to push results to + # model_repo = "nickypro/tinyllama-15M", + # model_repo = "facebook/opt-1.3b", + model_repo = "mistralai/Mistral-7b-instruct-v0.2", + token_limit = 1000, # trim the input to this max length + run_pre_test = False, # evaluate the unpruned model + eval_sample_size = 1e4, + collection_sample_size = 1e4, + # Removals parameters + ff_frac = 0.01, # % of feed forward neurons to prune + attn_frac = 0.00, # % of attention neurons to prune + focus = "pile", # the “reference” dataset + cripple = "physics", # the “unlearned” dataset + additional_datasets=tuple(), # any extra datasets to evaluate on + recalculate_activations = False, # iterative vs non-iterative + dtype = "int4", + n_steps = 1, +) + +opt = Model( + c.model_size, + limit=c.token_limit, + dtype=c.dtype, + svd_attn=c.svd_attn, + use_accelerator=c.use_accelerator, + model_device=c.model_device, + mask_fn=c.mask_fn, +) + +all_datasets = ["biology", + "chemistry", + "civil", + "code", + "emotion", + "math", + "physics", + "pile_ArXiv", + "pile_Enron_Emails", + "pile_EuroParl", + "pile_FreeLaw", + "pile_Github", + "pile_Gutenberg", + "pile_HackerNews", + "pile_NIH_ExPorter", + "pile_PhilPapers", + "pile_PubMed_Abstracts", + "pile_PubMed_Central", + "pile_StackExchange", + "pile_Ubuntu_IRC", + "pile_USPTO_Backgrounds", + "pile_Wikipedia", + "poems"] + +test_datasets = ["biology", "chemistry", "physics", "code", "pile_Github"] + +pile_datasets = ["pile_ArXiv", + "pile_Enron_Emails", + "pile_EuroParl", + "pile_FreeLaw", + "pile_Github", + "pile_Gutenberg", + "pile_HackerNews", + "pile_NIH_ExPorter", + "pile_PhilPapers", + "pile_PubMed_Abstracts", + "pile_PubMed_Central", + "pile_StackExchange", + "pile_Ubuntu_IRC", + "pile_USPTO_Backgrounds", + "pile_Wikipedia"] + +mmlu_subsets = ['abstract_algebra', 'anatomy', 'astronomy', 'business_ethics', 'clinical_knowledge', 'college_biology', 'college_chemistry', 'college_computer_science', 'college_mathematics', 'college_medicine', 'college_physics', 'computer_security', 'conceptual_physics', 'econometrics', 'electrical_engineering', 'elementary_mathematics', 'formal_logic', 'global_facts', 'high_school_biology', 'high_school_chemistry', 'high_school_computer_science', 'high_school_european_history', 'high_school_geography', 'high_school_government_and_politics', 'high_school_macroeconomics', 'high_school_mathematics', 'high_school_microeconomics', 'high_school_physics', 'high_school_psychology', 'high_school_statistics', 'high_school_us_history', 'high_school_world_history', 'human_aging', 'human_sexuality', 'international_law', 'jurisprudence', 'logical_fallacies', 'machine_learning', 'management', 'marketing', 'medical_genetics', 'miscellaneous', 'moral_disputes', 'moral_scenarios', 'nutrition', 'philosophy', 'prehistory', 'professional_accounting', 'professional_law', 'professional_medicine', 'professional_psychology', 'public_relations', 'security_studies', 'sociology', 'us_foreign_policy', 'virology', 'world_religions'] + +mmlu_datasets = ["mmlu:"+x for x in mmlu_subsets] + +#filepath hardcoded, can't get relative path to work. may need to be edited based on where stuff is cloned +def load_tensors_for_repo(repo, model_size="v0.2", timestamp="recent"): + directory = "/home/ubuntu/taker-rashid/examples/neuron-mapping/saved_tensors/"+model_size+"/" + filename = repo+"-pile-"+model_size+"-"+timestamp+".pt" + data = torch.load(directory+filename) + return data["ff_scores"], data["ff_criteria"] + +def save_data_dict( model_size: str, + data: any, + name: str ): + now = datetime.now().strftime( "%Y-%m-%d_%H:%M:%S" ) + os.makedirs( f'saved_tensors/{model_size}', exist_ok=True ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-recent.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-{now}.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + return filename + + +def get_ff_criteria_for_ff_frac(repo, ff_frac): + # ff_start_time = datetime.now() + ff_scores, _ = load_tensors_for_repo(repo) + # ff_mid_time = datetime.now() + criteria, _ = get_top_frac(ff_scores, ff_frac) + # ff_end_time = datetime.now() + + # print("Time taken to load tensors for ff_scores: ", ff_mid_time - ff_start_time) + # print("Time taken to get ff_criteria from ff_scores: ", ff_end_time - ff_mid_time) + # print("Time taken to get ff_criteria from ff_frac: ", ff_end_time - ff_start_time) + return criteria + + +#pruning dataset is the dataset to use to determine which neurons to prune, target dataset is the dataset to find the accuracy of +def find_accuracy(pruning_dataset, target_dataset, ff_frac): + for i in range(3): + try: + find_accuracy_fn(pruning_dataset, target_dataset, ff_frac) + except: + pass + return -1 + +def find_accuracy_fn(pruning_dataset, target_dataset, ff_frac): + # find_acc_start = datetime.now() + opt.init_model(do_model_import=False) + + if target_dataset[:4] == "mmlu": + eval_config: EvalConfig = infer_dataset_config("mmlu") + eval_config.dataset_subset = target_dataset[5:] + else: + eval_config: EvalConfig = infer_dataset_config(target_dataset) + eval_config.num_tokens_to_skip = c.collection_sample_size + eval_config.sample_size = c.eval_sample_size + # print(f"checking accuracy for ff_frac: {ff_frac} for pruning dataset: {pruning_dataset} and target_dataset: {target_dataset}") + if ff_frac == 0: + unpruned_accuracy = run_evaluation(opt, eval_config) + # find_acc_mid = datetime.now() + # print(f"unpruned accuracy is: {unpruned_accuracy.percent}") + # print(f"time to find unpruned accuracy for dataset: {target_dataset} is: {find_acc_mid - find_acc_start}") + return unpruned_accuracy.percent["base"] + ff_criteria = get_ff_criteria_for_ff_frac(pruning_dataset, ff_frac) + opt.delete_ff_keys(ff_criteria) + eval_data = run_evaluation(opt, eval_config) + # find_acc_end = datetime.now() + # print(f"time to find accuracy for pruning dataset: {pruning_dataset} and target_dataset: {target_dataset} with ff_frac: {ff_frac} is: {find_acc_end - find_acc_start} and has accuracy: {eval_data.percent}") + return eval_data.percent["base"] + + +#binary search to find ff_frac for a given target accuracy, +#upto given precision while being below target +#also check if should compare base accuracy or topk accuracy. +def find_correct_ff_frac(dataset: str, target_accuracy: float, accuracy_precision: float, ff_frac_precision=1e-6, lower=0, upper=1): + print(f"trying to find correct ff_frac for {dataset} with target accuracy {target_accuracy}") + #check if upper and lower are reasonable, if not default to 1 and 0. + # ff_start = datetime.now() + if upper < 1: + acc_upper = find_accuracy(dataset, dataset, upper) + if acc_upper > target_accuracy: + upper = 1 + if lower > 0: + acc_lower = find_accuracy(dataset, dataset, lower) + if acc_lower < target_accuracy: + lower = 0 + #binary search for ff_frac that reaches accuracy below target accuracy + while upper >= lower + ff_frac_precision: + acc_mid = find_accuracy(dataset, dataset, (lower + upper)/2) + if acc_mid >= target_accuracy-accuracy_precision and acc_mid <= target_accuracy: + # ff_end = datetime.now() + # print(f"time to find correct ff_frac for target accuracy is: {ff_end - ff_start}") + return (lower + upper)/2 + elif acc_mid < target_accuracy: + upper = (lower + upper)/2 + else: + lower = (lower + upper)/2 + print("reached limit of ff_frac_precision without reaching target accuracy, likely due to evaluation not being deterministic") + return lower + +def compareEvaluations(datasets): + final_data = {} + final_data["sample_size"] = c.eval_sample_size + for dataset1 in datasets: + dataset_start = datetime.now() + final_data[dataset1] = {} + unpruned_accuracy = find_accuracy(dataset1, dataset1, 0) + target_accuracy=0.8*unpruned_accuracy + final_data[dataset1]["unpruned_accuracy"] = unpruned_accuracy + final_data[dataset1]["target_accuracy"] = target_accuracy + + ff_frac = find_correct_ff_frac(dataset1, target_accuracy=target_accuracy, accuracy_precision=2, upper=0.128) + final_data[dataset1]["ff_frac"] = ff_frac + + for dataset2 in [*datasets, *mmlu_datasets]: + final_data[dataset1][dataset2] = {} + # print("finding accuracy for: ", dataset2, "with neurons pruned based on: ", dataset1) + unpruned_accuracy = find_accuracy(dataset1, dataset2, 0) + pruned_accuracy = find_accuracy(dataset1, dataset2, ff_frac) + final_data[dataset1][dataset2]["unpruned_accuracy"] = unpruned_accuracy + final_data[dataset1][dataset2]["pruned_accuracy"] = pruned_accuracy + final_data[dataset1][dataset2]["accuracy_difference"] = unpruned_accuracy - pruned_accuracy + final_data[dataset1][dataset2]["accuracy_ratio"] = pruned_accuracy/unpruned_accuracy + dataset_end = datetime.now() + print("time to get data for dataset: ", dataset1, "is ", dataset_end - dataset_start, "results: ", final_data[dataset1]) + + return final_data + +startTime = datetime.now() +print("run started at: ", startTime) +answer = compareEvaluations([ *all_datasets[1:], all_datasets[0]]) +saved_file_name = save_data_dict("v0.2", answer, "cross_pruning_accuracy") +print("saved to: ", saved_file_name, "data: ", answer) +endTime = datetime.now() +print("run ended at: ", endTime, "time elapsed: ", endTime - startTime) diff --git a/examples/neuron-mapping/crossevaluate.py b/examples/neuron-mapping/crossevaluate.py new file mode 100644 index 0000000..5d0fb9a --- /dev/null +++ b/examples/neuron-mapping/crossevaluate.py @@ -0,0 +1,202 @@ +import torch +import os +from datetime import datetime +from taker.eval import run_evaluation +from taker.data_classes import PruningConfig, EvalConfig +from taker.activations import get_top_frac +from taker.model import Model +from taker.texts import infer_dataset_config + + +#most of pruningconfig is not used, but some eval functions use this as copied from elsewhere. +c = PruningConfig( + wandb_project = "testing", # repo to push results to + # model_repo = "nickypro/tinyllama-15M", + # model_repo = "facebook/opt-1.3b", + model_repo = "NousResearch/Llama-2-7b-hf", + token_limit = 1000, # trim the input to this max length + run_pre_test = False, # evaluate the unpruned model + eval_sample_size = 1e4, + collection_sample_size = 1e4, + # Removals parameters + ff_frac = 0.01, # % of feed forward neurons to prune + attn_frac = 0.00, # % of attention neurons to prune + focus = "pile", # the “reference” dataset + cripple = "physics", # the “unlearned” dataset + additional_datasets=tuple(), # any extra datasets to evaluate on + recalculate_activations = False, # iterative vs non-iterative + dtype = "int8", + n_steps = 1, +) + +all_datasets = ["biology", + "chemistry", + "civil", + "code", + "emotion", + "math", + "physics", + "pile_ArXiv", + "pile_Enron_Emails", + "pile_EuroParl", + "pile_FreeLaw", + "pile_Github", + "pile_Gutenberg", + "pile_HackerNews", + "pile_NIH_ExPorter", + "pile_PhilPapers", + "pile_PubMed_Abstracts", + "pile_PubMed_Central", + "pile_StackExchange", + "pile_Ubuntu_IRC", + "pile_USPTO_Backgrounds", + "pile_Wikipedia", + "poems"] + +test_datasets = ["biology", "chemistry", "physics", "code", "pile_Github"] + +pile_datasets = ["pile_ArXiv", + "pile_Enron_Emails", + "pile_EuroParl", + "pile_FreeLaw", + "pile_Github", + "pile_Gutenberg", + "pile_HackerNews", + "pile_NIH_ExPorter", + "pile_PhilPapers", + "pile_PubMed_Abstracts", + "pile_PubMed_Central", + "pile_StackExchange", + "pile_Ubuntu_IRC", + "pile_USPTO_Backgrounds", + "pile_Wikipedia"] + +#filepath hardcoded, can't get relative path to work. may need to be edited based on where stuff is cloned +def load_tensors_for_repo(repo, model_size="hf", timestamp="recent"): + directory = "/home/ubuntu/taker-rashid/examples/neuron-mapping/saved_tensors/"+model_size+"/" + filename = repo+"-pile-"+model_size+"-"+timestamp+".pt" + data = torch.load(directory+filename) + return data["ff_scores"], data["ff_criteria"] + +def save_data_dict( model_size: str, + data: any, + name: str ): + now = datetime.now().strftime( "%Y-%m-%d_%H:%M:%S" ) + os.makedirs( f'saved_tensors/{model_size}', exist_ok=True ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-recent.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-{now}.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + return filename + + +def get_ff_criteria_for_ff_frac(repo, ff_frac): + # ff_start_time = datetime.now() + ff_scores, _ = load_tensors_for_repo(repo) + # ff_mid_time = datetime.now() + criteria, _ = get_top_frac(ff_scores, ff_frac) + # ff_end_time = datetime.now() + + # print("Time taken to load tensors for ff_scores: ", ff_mid_time - ff_start_time) + # print("Time taken to get ff_criteria from ff_scores: ", ff_end_time - ff_mid_time) + # print("Time taken to get ff_criteria from ff_frac: ", ff_end_time - ff_start_time) + return criteria + + +#pruning dataset is the dataset to use to determine which neurons to prune, target dataset is the dataset to find the accuracy of +def find_accuracy(pruning_dataset, target_dataset, ff_frac): + # find_acc_start = datetime.now() + opt = Model( + c.model_size, + limit=c.token_limit, + dtype=c.dtype, + svd_attn=c.svd_attn, + use_accelerator=c.use_accelerator, + model_device=c.model_device, + mask_fn=c.mask_fn, + ) + + eval_config: EvalConfig = infer_dataset_config(target_dataset) + eval_config.num_tokens_to_skip = c.collection_sample_size + eval_config.sample_size = c.eval_sample_size + # print(f"checking accuracy for ff_frac: {ff_frac} for pruning dataset: {pruning_dataset} and target_dataset: {target_dataset}") + if ff_frac == 0: + unpruned_accuracy = run_evaluation(opt, eval_config) + # find_acc_mid = datetime.now() + # print(f"unpruned accuracy is: {unpruned_accuracy.percent}") + # print(f"time to find unpruned accuracy for dataset: {target_dataset} is: {find_acc_mid - find_acc_start}") + return unpruned_accuracy.percent["base"] + ff_criteria = get_ff_criteria_for_ff_frac(pruning_dataset, ff_frac) + opt.delete_ff_keys(ff_criteria) + eval_data = run_evaluation(opt, eval_config) + # find_acc_end = datetime.now() + # print(f"time to find accuracy for pruning dataset: {pruning_dataset} and target_dataset: {target_dataset} with ff_frac: {ff_frac} is: {find_acc_end - find_acc_start} and has accuracy: {eval_data.percent}") + return eval_data.percent["base"] + + +#binary search to find ff_frac for a given target accuracy, +#upto given precision while being below target +#also check if should compare base accuracy or topk accuracy. +def find_correct_ff_frac(dataset: str, target_accuracy: float, accuracy_precision: float, ff_frac_precision=1e-6, lower=0, upper=1): + print(f"trying to find correct ff_frac for {dataset} with target accuracy {target_accuracy}") + #check if upper and lower are reasonable, if not default to 1 and 0. + # ff_start = datetime.now() + if upper < 1: + acc_upper = find_accuracy(dataset, dataset, upper) + if acc_upper > target_accuracy: + upper = 1 + if lower > 0: + acc_lower = find_accuracy(dataset, dataset, lower) + if acc_lower < target_accuracy: + lower = 0 + #binary search for ff_frac that reaches accuracy below target accuracy + while upper >= lower + ff_frac_precision: + acc_mid = find_accuracy(dataset, dataset, (lower + upper)/2) + if acc_mid >= target_accuracy-accuracy_precision and acc_mid <= target_accuracy: + # ff_end = datetime.now() + # print(f"time to find correct ff_frac for target accuracy is: {ff_end - ff_start}") + return (lower + upper)/2 + elif acc_mid < target_accuracy: + upper = (lower + upper)/2 + else: + lower = (lower + upper)/2 + print("reached limit of ff_frac_precision without reaching target accuracy, likely due to evaluation not being deterministic") + return lower + +def compareEvaluations(datasets): + final_data = {} + final_data["sample_size"] = c.eval_sample_size + for dataset1 in datasets: + dataset_start = datetime.now() + final_data[dataset1] = {} + unpruned_accuracy = find_accuracy(dataset1, dataset1, 0) + target_accuracy=0.8*unpruned_accuracy + final_data[dataset1]["unpruned_accuracy"] = unpruned_accuracy + final_data[dataset1]["target_accuracy"] = target_accuracy + + ff_frac = find_correct_ff_frac(dataset1, target_accuracy=target_accuracy, accuracy_precision=2, upper=0.128) + final_data[dataset1]["ff_frac"] = ff_frac + + for dataset2 in datasets: + final_data[dataset1][dataset2] = {} + # print("finding accuracy for: ", dataset2, "with neurons pruned based on: ", dataset1) + unpruned_accuracy = find_accuracy(dataset1, dataset2, 0) + pruned_accuracy = find_accuracy(dataset1, dataset2, ff_frac) + final_data[dataset1][dataset2]["unpruned_accuracy"] = unpruned_accuracy + final_data[dataset1][dataset2]["pruned_accuracy"] = pruned_accuracy + final_data[dataset1][dataset2]["accuracy_difference"] = unpruned_accuracy - pruned_accuracy + final_data[dataset1][dataset2]["accuracy_ratio"] = pruned_accuracy/unpruned_accuracy + dataset_end = datetime.now() + print("time to get data for dataset: ", dataset1, "is ", dataset_end - dataset_start, "results: ", final_data[dataset1]) + + return final_data + +startTime = datetime.now() +print("run started at: ", startTime) +answer = compareEvaluations(pile_datasets) +saved_file_name = save_data_dict("hf", answer, "cross_pruning_accuracy") +print("saved to: ", saved_file_name, "data: ", answer) +endTime = datetime.now() +print("run ended at: ", endTime, "time elapsed: ", endTime - startTime) diff --git a/examples/neuron-mapping/crossevaluation_results.py b/examples/neuron-mapping/crossevaluation_results.py new file mode 100644 index 0000000..fd99b9e --- /dev/null +++ b/examples/neuron-mapping/crossevaluation_results.py @@ -0,0 +1,293 @@ +#raw results pasted from console, forgot to save data to file before running. sample of 10k on llama-2-7b, targetting accuracy of 80% of unpruned for each pile related dataset. + +from datetime import datetime +import torch +import os + +answer = { + 'pile_ArXiv': + {'unpruned_accuracy': 56.328302259051, 'target_accuracy': 45.0626418072408, 'ff_frac': 0.096, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 44.963993291900955, 'accuracy_difference': 11.364308967150045, 'accuracy_ratio': 0.7982486865148861}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 53.551803376960734, 'accuracy_difference': 2.48776101508642, 'accuracy_ratio': 0.9556070600820111}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 57.87143779396846, 'accuracy_difference': 5.754864889790646, 'accuracy_ratio': 0.9095521090013046}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 64.13249810174639, 'accuracy_difference': 3.6256643887623454, 'accuracy_ratio': 0.9464911051968061}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 72.91837291837292, 'accuracy_difference': 5.642005642005643, 'accuracy_ratio': 0.9281825553110159}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 55.91955591955592, 'accuracy_difference': 4.531804531804532, 'accuracy_ratio': 0.9250338702393497}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 50.0, 'accuracy_difference': 3.5806902054657854, 'accuracy_ratio': 0.9331720029784066}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 54.818401937046005, 'accuracy_difference': 4.145278450363193, 'accuracy_ratio': 0.9296977660972405}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 51.223951223951225, 'accuracy_difference': 5.487305487305484, 'accuracy_ratio': 0.9032413350449294}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 57.221115537848604, 'accuracy_difference': 4.482071713147413, 'accuracy_ratio': 0.927360774818402}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 57.47565747565748, 'accuracy_difference': 7.1799071799071825, 'accuracy_ratio': 0.8889514426460239}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 58.18770861967406, 'accuracy_difference': 4.800706852542703, 'accuracy_ratio': 0.923784289276808}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 56.627625952779326, 'accuracy_difference': 2.900167317345236, 'accuracy_ratio': 0.9512804497189256}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 61.83728552149605, 'accuracy_difference': 4.37632542895701, 'accuracy_ratio': 0.9339059542873781}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 61.09181141439206, 'accuracy_difference': 3.7717121588089313, 'accuracy_ratio': 0.9418515684774292} + }, + 'pile_Enron_Emails': + {'unpruned_accuracy': 56.039564392047154, 'target_accuracy': 44.83165151363772, 'ff_frac': 0.00396875, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 56.19019433757522, 'accuracy_difference': 0.138107921475779, 'accuracy_ratio': 0.9975481611208407}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 52.64262164052353, 'accuracy_difference': 3.396942751523625, 'accuracy_ratio': 0.9393831342485293}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 63.22973346859725, 'accuracy_difference': 0.39656921516185406, 'accuracy_ratio': 0.993767212639513}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 66.85649202733485, 'accuracy_difference': 0.9016704631738861, 'accuracy_ratio': 0.9866928141196245}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 77.66857766857767, 'accuracy_difference': 0.8918008918008979, 'accuracy_ratio': 0.9886482103556121}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 57.2026572026572, 'accuracy_difference': 3.248703248703251, 'accuracy_ratio': 0.946259220231823}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 51.58587672052663, 'accuracy_difference': 1.9948134849391579, 'accuracy_ratio': 0.9627699180938198}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.76029055690073, 'accuracy_difference': 0.20338983050847048, 'accuracy_ratio': 0.9965505913272011}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 56.21985621985622, 'accuracy_difference': 0.4914004914004906, 'accuracy_ratio': 0.9913350449293967}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 61.41434262948207, 'accuracy_difference': 0.2888446215139453, 'accuracy_ratio': 0.995318805488297}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 64.4007644007644, 'accuracy_difference': 0.2548002548002586, 'accuracy_ratio': 0.9960591133004926}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 62.909876300805024, 'accuracy_difference': 0.07853917141174094, 'accuracy_ratio': 0.9987531172069826}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 59.19315857966165, 'accuracy_difference': 0.33463469046291294, 'accuracy_ratio': 0.9943785134291068}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.97262386736071, 'accuracy_difference': 0.2409870830923495, 'accuracy_ratio': 0.9963604600378512}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 64.1985111662531, 'accuracy_difference': 0.6650124069478949, 'accuracy_ratio': 0.9897475133894414} + }, + 'pile_EuroParl': + {'unpruned_accuracy': 63.62630268375911, 'target_accuracy': 50.90104214700729, 'ff_frac': 0.007, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 56.10141067376936, 'accuracy_difference': 0.226891585281642, 'accuracy_ratio': 0.9959719789842382}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 56.05954640823259, 'accuracy_difference': -0.0199820161854376, 'accuracy_ratio': 1.0003565697985382}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 49.24836299916997, 'accuracy_difference': 14.377939684589137, 'accuracy_ratio': 0.7740252210465285}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 67.77714502657555, 'accuracy_difference': -0.018982536066815214, 'accuracy_ratio': 1.0002801512816921}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 78.43297843297843, 'accuracy_difference': 0.1274001274001364, 'accuracy_ratio': 0.9983783157650873}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 60.42406042406042, 'accuracy_difference': 0.027300027300029228, 'accuracy_ratio': 0.9995483968086708}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 53.381208856971874, 'accuracy_difference': 0.19948134849391153, 'accuracy_ratio': 0.996276991809382}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.91525423728814, 'accuracy_difference': 0.04842615012105966, 'accuracy_ratio': 0.9991787122207623}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 54.663754663754666, 'accuracy_difference': 2.047502047502043, 'accuracy_ratio': 0.9638960205391528}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 61.41434262948207, 'accuracy_difference': 0.2888446215139453, 'accuracy_ratio': 0.995318805488297}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 64.64646464646465, 'accuracy_difference': 0.009100009100009743, 'accuracy_ratio': 0.9998592540464462}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 62.74298056155508, 'accuracy_difference': 0.245434910661686, 'accuracy_ratio': 0.9961034912718205}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 58.263617772820226, 'accuracy_difference': 1.2641754973043362, 'accuracy_ratio': 0.9787632729544035}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 66.20397146712936, 'accuracy_difference': 0.009639483323695686, 'accuracy_ratio': 0.999854418401514}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 64.53598014888337, 'accuracy_difference': 0.32754342431762495, 'accuracy_ratio': 0.9949502677888292} + }, + 'pile_FreeLaw': + {'unpruned_accuracy': 67.75816249050874, 'target_accuracy': 54.20652999240699, 'ff_frac': 0.024, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 56.17046463450725, 'accuracy_difference': 0.1578376245437525, 'accuracy_ratio': 0.9971978984238178}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 55.30022979318613, 'accuracy_difference': 0.7393345988610207, 'accuracy_ratio': 0.9868069174540918}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 62.372037259061145, 'accuracy_difference': 1.2542654246979623, 'accuracy_ratio': 0.9802869981156689}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 53.76803340926348, 'accuracy_difference': 13.990129081245257, 'accuracy_ratio': 0.7935285053929122}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 77.38647738647738, 'accuracy_difference': 1.1739011739011858, 'accuracy_ratio': 0.9850573381211628}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 56.05605605605606, 'accuracy_difference': 4.395304395304393, 'accuracy_ratio': 0.9272918861959958}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 52.80271294633952, 'accuracy_difference': 0.777977259126267, 'accuracy_ratio': 0.9854802680565898}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.42130750605327, 'accuracy_difference': 0.5423728813559308, 'accuracy_ratio': 0.9908015768725361}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 55.03685503685504, 'accuracy_difference': 1.6744016744016719, 'accuracy_ratio': 0.9704749679075738}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 61.45418326693227, 'accuracy_difference': 0.2490039840637479, 'accuracy_ratio': 0.9959644874899112}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 64.22786422786423, 'accuracy_difference': 0.4277004277004295, 'accuracy_ratio': 0.9933849401829697}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 62.3699194973493, 'accuracy_difference': 0.6184959748674643, 'accuracy_ratio': 0.9901807980049876}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 59.295408068414204, 'accuracy_difference': 0.2323852017103576, 'accuracy_ratio': 0.9960961898813241}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.46173125120494, 'accuracy_difference': 0.7518796992481214, 'accuracy_ratio': 0.9886446353180958}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 63.87096774193548, 'accuracy_difference': 0.9925558312655127, 'accuracy_ratio': 0.9846977811782708} + }, + 'pile_Github': + {'unpruned_accuracy': 78.56037856037857, 'target_accuracy': 62.84830284830286, 'ff_frac': 0.016, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 54.562493834467794, 'accuracy_difference': 1.7658084245832057, 'accuracy_ratio': 0.9686514886164624}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 53.37196523129184, 'accuracy_difference': 2.667599160755316, 'accuracy_ratio': 0.9523979318951685}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 62.74093885456055, 'accuracy_difference': 0.8853638291985604, 'accuracy_ratio': 0.9860849398463546}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 65.61313591495824, 'accuracy_difference': 2.145026575550503, 'accuracy_ratio': 0.968342905168791}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 61.08836108836109, 'accuracy_difference': 17.472017472017477, 'accuracy_ratio': 0.7775975906405652}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 53.535353535353536, 'accuracy_difference': 6.916006916006914, 'accuracy_ratio': 0.885593858196598}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 50.578495910632356, 'accuracy_difference': 3.00219429483343, 'accuracy_ratio': 0.9439687267311989}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.2953995157385, 'accuracy_difference': 0.6682808716706958, 'accuracy_ratio': 0.9886662286465179}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 55.5009555009555, 'accuracy_difference': 1.2103012103012105, 'accuracy_ratio': 0.9786585365853658}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 60.99601593625498, 'accuracy_difference': 0.7071713147410392, 'accuracy_ratio': 0.9885391444713478}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 62.31686231686232, 'accuracy_difference': 2.3387023387023405, 'accuracy_ratio': 0.9638282899366643}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 54.33928922049872, 'accuracy_difference': 8.649126251718045, 'accuracy_ratio': 0.8626870324189526}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 57.7244841048522, 'accuracy_difference': 1.8033091652723598, 'accuracy_ratio': 0.9697064334790755}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.5195681511471, 'accuracy_difference': 0.6940427993059615, 'accuracy_ratio': 0.9895181249090115}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 63.84119106699752, 'accuracy_difference': 1.022332506203476, 'accuracy_ratio': 0.9842387146136189} + }, + 'pile_Gutenberg': + {'unpruned_accuracy': 60.45136045136045, 'target_accuracy': 48.36108836108836, 'ff_frac': 0.032, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 55.67722205780803, 'accuracy_difference': 0.6510802012429693, 'accuracy_ratio': 0.9884413309982487}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 53.152163053252075, 'accuracy_difference': 2.8874013387950797, 'accuracy_ratio': 0.9484756641112498}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 62.427372498386056, 'accuracy_difference': 1.198930185373051, 'accuracy_ratio': 0.9811566893752718}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 62.38610478359909, 'accuracy_difference': 5.372057706909651, 'accuracy_ratio': 0.9207171872811317}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 71.56247156247156, 'accuracy_difference': 6.997906997907009, 'accuracy_ratio': 0.910923201668018}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 46.89234689234689, 'accuracy_difference': 13.559013559013557, 'accuracy_ratio': 0.775703748306488}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 50.0, 'accuracy_difference': 3.5806902054657854, 'accuracy_ratio': 0.9331720029784066}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.101694915254235, 'accuracy_difference': 0.8619854721549629, 'accuracy_ratio': 0.9853810775295664}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 54.13595413595414, 'accuracy_difference': 2.5753025753025724, 'accuracy_ratio': 0.954589216944801}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 61.06573705179283, 'accuracy_difference': 0.6374501992031867, 'accuracy_ratio': 0.9896690879741727}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 63.5999635999636, 'accuracy_difference': 1.055601055601059, 'accuracy_ratio': 0.983673469387755}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 61.92813665815826, 'accuracy_difference': 1.0602788140585062, 'accuracy_ratio': 0.9831670822942644}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 59.04443205056702, 'accuracy_difference': 0.4833612195575441, 'accuracy_ratio': 0.9918800749531542}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.25930210140737, 'accuracy_difference': 0.9543088490456881, 'accuracy_ratio': 0.9855874217498909}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 62.759305210918114, 'accuracy_difference': 2.1042183622828787, 'accuracy_ratio': 0.9675592960979342} + }, + 'pile_HackerNews': + {'unpruned_accuracy': 53.580690205465785, 'target_accuracy': 42.86455216437263, 'ff_frac': 0.064, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 51.62276807734044, 'accuracy_difference': 4.705534181710561, 'accuracy_ratio': 0.9164623467600701}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 46.75791787391348, 'accuracy_difference': 9.281646518133677, 'accuracy_ratio': 0.8343733285790694}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 59.00581020012912, 'accuracy_difference': 4.620492483629988, 'accuracy_ratio': 0.9273807798231628}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 62.11085801063022, 'accuracy_difference': 5.647304479878521, 'accuracy_ratio': 0.9166549936965961}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 72.12667212667213, 'accuracy_difference': 6.433706433706433, 'accuracy_ratio': 0.9181049461369165}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 48.958048958048956, 'accuracy_difference': 11.493311493311495, 'accuracy_ratio': 0.8098750564503989}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 41.701575902653104, 'accuracy_difference': 11.879114302812681, 'accuracy_ratio': 0.778294862248697}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 54.94430992736078, 'accuracy_difference': 4.019370460048421, 'accuracy_ratio': 0.931833114323259}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 50.887250887250886, 'accuracy_difference': 5.824005824005823, 'accuracy_ratio': 0.8973042362002568}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 58.54581673306773, 'accuracy_difference': 3.1573705179282854, 'accuracy_ratio': 0.9488297013720742}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 60.07826007826008, 'accuracy_difference': 4.577304577304581, 'accuracy_ratio': 0.9292047853624208}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 56.086785784409976, 'accuracy_difference': 6.901629687806789, 'accuracy_ratio': 0.8904301745635911}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 51.319947945714816, 'accuracy_difference': 8.207845324409746, 'accuracy_ratio': 0.8621174266083697}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 62.58916522074417, 'accuracy_difference': 3.6244457297088886, 'accuracy_ratio': 0.9452613189692822}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 59.89081885856079, 'accuracy_difference': 4.9727047146402015, 'accuracy_ratio': 0.9233358837031369} + }, + 'pile_NIH_ExPorter': + {'unpruned_accuracy': 58.9636803874092, 'target_accuracy': 47.170944309927364, 'ff_frac': 0.08, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 51.7707408503502, 'accuracy_difference': 4.557561408700799, 'accuracy_ratio': 0.9190893169877408}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 53.74163253072235, 'accuracy_difference': 2.2979318613248054, 'accuracy_ratio': 0.9589944731681227}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 59.12570321866642, 'accuracy_difference': 4.500599465092684, 'accuracy_ratio': 0.9292651108856357}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 64.77790432801822, 'accuracy_difference': 2.9802581624905145, 'accuracy_ratio': 0.9560162487743381}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 76.88597688597689, 'accuracy_difference': 1.674401674401679, 'accuracy_ratio': 0.9786864357697208}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 57.11165711165711, 'accuracy_difference': 3.339703339703341, 'accuracy_ratio': 0.9447538762607256}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 51.276680630361064, 'accuracy_difference': 2.304009575104722, 'accuracy_ratio': 0.956999255398362}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 46.56658595641647, 'accuracy_difference': 12.39709443099273, 'accuracy_ratio': 0.7897503285151117}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 53.42615342615343, 'accuracy_difference': 3.2851032851032826, 'accuracy_ratio': 0.9420731707317074}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 49.133466135458164, 'accuracy_difference': 12.569721115537853, 'accuracy_ratio': 0.7962873284907183}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 55.94685594685595, 'accuracy_difference': 8.708708708708713, 'accuracy_ratio': 0.8653061224489795}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 61.466719026114276, 'accuracy_difference': 1.5216964461024887, 'accuracy_ratio': 0.9758416458852869}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 57.87321063394683, 'accuracy_difference': 1.6545826361777287, 'accuracy_ratio': 0.9722048719550281}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 61.72161172161172, 'accuracy_difference': 4.491999228841337, 'accuracy_ratio': 0.9321589751055466}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 62.15384615384615, 'accuracy_difference': 2.70967741935484, 'accuracy_ratio': 0.9582249426166795} + }, + 'pile_PhilPapers': + {'unpruned_accuracy': 56.71125671125671, 'target_accuracy': 45.36900536900537, 'ff_frac': 0.112, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 50.66587747854395, 'accuracy_difference': 5.6624247805070524, 'accuracy_ratio': 0.8994746059544658}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 52.65261264861625, 'accuracy_difference': 3.386951743430906, 'accuracy_ratio': 0.9395614191477983}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 35.774232223554364, 'accuracy_difference': 27.852070460204743, 'accuracy_ratio': 0.5622553993332366}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 62.699316628701595, 'accuracy_difference': 5.058845861807143, 'accuracy_ratio': 0.9253396834290516}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 75.87587587587588, 'accuracy_difference': 2.6845026845026894, 'accuracy_ratio': 0.9658287964786284}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 53.52625352625353, 'accuracy_difference': 6.925106925106924, 'accuracy_ratio': 0.8854433237994882}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 49.66088170756034, 'accuracy_difference': 3.919808497905443, 'accuracy_ratio': 0.926842889054356}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 53.0363196125908, 'accuracy_difference': 5.927360774818396, 'accuracy_ratio': 0.8994743758212879}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 45.11784511784512, 'accuracy_difference': 11.593411593411588, 'accuracy_ratio': 0.7955712451861362}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 56.19521912350598, 'accuracy_difference': 5.507968127490038, 'accuracy_ratio': 0.9107344632768362}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 59.98725998725999, 'accuracy_difference': 4.668304668304671, 'accuracy_ratio': 0.9277973258268825}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 60.11191831926173, 'accuracy_difference': 2.8764971529550323, 'accuracy_ratio': 0.9543329177057357}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 53.541550474065815, 'accuracy_difference': 5.986242796058747, 'accuracy_ratio': 0.8994378513429107}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 62.40601503759399, 'accuracy_difference': 3.807595912859071, 'accuracy_ratio': 0.9424952685980492}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 59.831265508684865, 'accuracy_difference': 5.032258064516128, 'accuracy_ratio': 0.9224177505738332} + }, + 'pile_PubMed_Abstracts': + {'unpruned_accuracy': 61.70318725099602, 'target_accuracy': 49.36254980079681, 'ff_frac': 0.008812500000000001, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 30.876985301371214, 'accuracy_difference': 25.451316957679786, 'accuracy_ratio': 0.5481611208406305}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 43.60075931661505, 'accuracy_difference': 12.438805075432107, 'accuracy_ratio': 0.7780353004100553}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 46.2787051553998, 'accuracy_difference': 17.34759752835931, 'accuracy_ratio': 0.7273517901145093}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 53.4832953682612, 'accuracy_difference': 14.274867122247535, 'accuracy_ratio': 0.7893262361675304}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 52.50705250705251, 'accuracy_difference': 26.05332605332606, 'accuracy_ratio': 0.6683655739603845}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 44.83574483574483, 'accuracy_difference': 15.615615615615617, 'accuracy_ratio': 0.7416829745596869}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 44.604029523239575, 'accuracy_difference': 8.97666068222621, 'accuracy_ratio': 0.8324646314221892}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 49.4818401937046, 'accuracy_difference': 9.481840193704599, 'accuracy_ratio': 0.83919185282523}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 49.149149149149146, 'accuracy_difference': 7.562107562107563, 'accuracy_ratio': 0.8666559691912709}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 50.54780876494024, 'accuracy_difference': 11.155378486055774, 'accuracy_ratio': 0.8192090395480226}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 44.58094458094458, 'accuracy_difference': 20.07462007462008, 'accuracy_ratio': 0.6895144264602392}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 46.81916355782447, 'accuracy_difference': 16.169251914392298, 'accuracy_ratio': 0.7432980049875313}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 45.38018218999814, 'accuracy_difference': 14.147611080126424, 'accuracy_ratio': 0.7623360399750155}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 57.54771544245229, 'accuracy_difference': 8.665895508000773, 'accuracy_ratio': 0.8691221429611297}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 51.44416873449131, 'accuracy_difference': 13.41935483870968, 'accuracy_ratio': 0.7931140015302218} + }, + 'pile_PubMed_Central': + {'unpruned_accuracy': 64.65556465556466, 'target_accuracy': 51.72445172445173, 'ff_frac': 0.064, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 50.32060767485449, 'accuracy_difference': 6.007694584196507, 'accuracy_ratio': 0.8933450087565674}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 53.72165051453692, 'accuracy_difference': 2.317913877510236, 'accuracy_ratio': 0.9586379033695847}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 58.839804482154385, 'accuracy_difference': 4.786498201604722, 'accuracy_ratio': 0.9247717060443542}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 63.202353834472284, 'accuracy_difference': 4.555808656036454, 'accuracy_ratio': 0.9327636923938926}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 75.2934752934753, 'accuracy_difference': 3.2669032669032703, 'accuracy_ratio': 0.958415382833314}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 56.88415688415689, 'accuracy_difference': 3.5672035672035634, 'accuracy_ratio': 0.9409905163329821}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 51.08717334929184, 'accuracy_difference': 2.4935168561739474, 'accuracy_ratio': 0.9534623976172748}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 48.67796610169491, 'accuracy_difference': 10.285714285714285, 'accuracy_ratio': 0.8255584756898817}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 52.534352534352536, 'accuracy_difference': 4.1769041769041735, 'accuracy_ratio': 0.9263478818998717}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 49.97011952191235, 'accuracy_difference': 11.733067729083665, 'accuracy_ratio': 0.8098466505246167}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 51.506051506051506, 'accuracy_difference': 13.149513149513155, 'accuracy_ratio': 0.7966220971147079}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 60.91694482623208, 'accuracy_difference': 2.0714706459846823, 'accuracy_ratio': 0.9671134663341646}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 57.17605502881577, 'accuracy_difference': 2.3517382413087944, 'accuracy_ratio': 0.9604934415990006}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 61.64449585502217, 'accuracy_difference': 4.569115095430888, 'accuracy_ratio': 0.930994322317659}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 61.90570719602977, 'accuracy_difference': 2.95781637717122, 'accuracy_ratio': 0.9543993879112471} + }, + 'pile_StackExchange': + {'unpruned_accuracy': 62.988415472216765, 'target_accuracy': 50.39073237777342, 'ff_frac': 0.032, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 51.859524514156064, 'accuracy_difference': 4.468777744894936, 'accuracy_ratio': 0.9206654991243434}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 52.31291837346388, 'accuracy_difference': 3.726646018583274, 'accuracy_ratio': 0.9334997325726511}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 61.98469058378677, 'accuracy_difference': 1.6416120999723347, 'accuracy_ratio': 0.974199159298449}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 65.04365983295368, 'accuracy_difference': 2.714502657555059, 'accuracy_ratio': 0.9599383667180276}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 61.41596141596142, 'accuracy_difference': 17.144417144417147, 'accuracy_ratio': 0.7817676358160547}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 55.47365547365548, 'accuracy_difference': 4.977704977704974, 'accuracy_ratio': 0.9176576847809725}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 49.371633752244165, 'accuracy_difference': 4.209056453221621, 'accuracy_ratio': 0.9214445271779599}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 57.365617433414045, 'accuracy_difference': 1.5980629539951536, 'accuracy_ratio': 0.9728975032851512}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 53.544453544453546, 'accuracy_difference': 3.166803166803163, 'accuracy_ratio': 0.9441591784338896}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 60.059760956175296, 'accuracy_difference': 1.6434262948207206, 'accuracy_ratio': 0.973365617433414}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 59.65055965055965, 'accuracy_difference': 5.00500500500501, 'accuracy_ratio': 0.9225897255453905}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 48.95935597879443, 'accuracy_difference': 14.029059493422338, 'accuracy_ratio': 0.7772755610972569}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 56.16285554935862, 'accuracy_difference': 3.364937720765944, 'accuracy_ratio': 0.943472829481574}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 64.91228070175438, 'accuracy_difference': 1.301330248698676, 'accuracy_ratio': 0.9803464842043965}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 61.77667493796526, 'accuracy_difference': 3.0868486352357323, 'accuracy_ratio': 0.9524100994644223} + }, + 'pile_Ubuntu_IRC': + {'unpruned_accuracy': 59.52779327012456, 'target_accuracy': 47.622234616099654, 'ff_frac': 0.028, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 55.05573641116701, 'accuracy_difference': 1.272565847883989, 'accuracy_ratio': 0.9774080560420315}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 54.10130882206015, 'accuracy_difference': 1.9382555699870068, 'accuracy_ratio': 0.9654127295418079}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 55.621138061422116, 'accuracy_difference': 8.005164622336991, 'accuracy_ratio': 0.8741846644441224}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 66.76157934700076, 'accuracy_difference': 0.9965831435079764, 'accuracy_ratio': 0.985292057711164}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 76.13067613067614, 'accuracy_difference': 2.4297024297024308, 'accuracy_ratio': 0.9690721649484536}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 58.87705887705888, 'accuracy_difference': 1.5743015743015718, 'accuracy_ratio': 0.9739575493000151}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 51.35647316975863, 'accuracy_difference': 2.224217035707156, 'accuracy_ratio': 0.9584884586746092}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 58.1501210653753, 'accuracy_difference': 0.8135593220338961, 'accuracy_ratio': 0.9862023653088042}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 53.362453362453365, 'accuracy_difference': 3.3488033488033437, 'accuracy_ratio': 0.9409499358151477}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 61.09561752988048, 'accuracy_difference': 0.6075697211155386, 'accuracy_ratio': 0.9901533494753834}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 63.57266357266357, 'accuracy_difference': 1.0829010829010883, 'accuracy_ratio': 0.9832512315270935}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 61.309640683290795, 'accuracy_difference': 1.6787747889259705, 'accuracy_ratio': 0.973347880299252}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 46.477040342071014, 'accuracy_difference': 13.050752928053548, 'accuracy_ratio': 0.7807620237351655}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.21110468478889, 'accuracy_difference': 1.0025062656641666, 'accuracy_ratio': 0.984859513757461}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 63.116625310173696, 'accuracy_difference': 1.7468982630272976, 'accuracy_ratio': 0.9730680948737567} + }, + 'pile_USPTO_Backgrounds': + {'unpruned_accuracy': 66.21361095045306, 'target_accuracy': 52.97088876036245, 'ff_frac': 0.126, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 49.29466311532011, 'accuracy_difference': 7.033639143730888, 'accuracy_ratio': 0.8751313485113835}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 51.9632330902188, 'accuracy_difference': 4.076331301828354, 'accuracy_ratio': 0.9272597610982349}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 57.078299363644746, 'accuracy_difference': 6.548003320114361, 'accuracy_ratio': 0.8970865342803305}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 62.88914198936978, 'accuracy_difference': 4.869020501138955, 'accuracy_ratio': 0.9281411962459728}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 74.72927472927474, 'accuracy_difference': 3.8311038311038317, 'accuracy_ratio': 0.9512336383644157}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 52.68905268905269, 'accuracy_difference': 7.762307762307763, 'accuracy_ratio': 0.8715941592653921}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 48.68342309994016, 'accuracy_difference': 4.8972671055256285, 'accuracy_ratio': 0.9086001489203277}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 51.59322033898305, 'accuracy_difference': 7.370460048426146, 'accuracy_ratio': 0.8750000000000001}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 50.923650923650925, 'accuracy_difference': 5.787605787605784, 'accuracy_ratio': 0.8979460847240052}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 54.62151394422311, 'accuracy_difference': 7.081673306772906, 'accuracy_ratio': 0.8852300242130751}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 59.35025935025935, 'accuracy_difference': 5.3053053053053105, 'accuracy_ratio': 0.9179451090781139}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 58.30551737679168, 'accuracy_difference': 4.682898095425088, 'accuracy_ratio': 0.9256546134663343}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 55.85610708310095, 'accuracy_difference': 3.67168618702361, 'accuracy_ratio': 0.9383198001249219}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 52.91112396375554, 'accuracy_difference': 13.302486986697517, 'accuracy_ratio': 0.7990973940893871}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 59.265508684863526, 'accuracy_difference': 5.598014888337467, 'accuracy_ratio': 0.9136954858454476} + }, + 'pile_Wikipedia': + {'unpruned_accuracy': 64.863523573201, 'target_accuracy': 51.8908188585608, 'ff_frac': 0.064, + 'pile_ArXiv': {'unpruned_accuracy': 56.328302259051, 'pruned_accuracy': 53.98046759396271, 'accuracy_difference': 2.3478346650882926, 'accuracy_ratio': 0.9583187390542907}, + 'pile_Enron_Emails': {'unpruned_accuracy': 56.039564392047154, 'pruned_accuracy': 51.50364671795384, 'accuracy_difference': 4.535917674093312, 'accuracy_ratio': 0.9190586557318595}, + 'pile_EuroParl': {'unpruned_accuracy': 63.62630268375911, 'pruned_accuracy': 57.98210827261828, 'accuracy_difference': 5.64419441114083, 'accuracy_ratio': 0.9112914915205101}, + 'pile_FreeLaw': {'unpruned_accuracy': 67.75816249050874, 'pruned_accuracy': 60.73462414578588, 'accuracy_difference': 7.0235383447228585, 'accuracy_ratio': 0.8963440257739179}, + 'pile_Github': {'unpruned_accuracy': 78.56037856037857, 'pruned_accuracy': 75.65747565747566, 'accuracy_difference': 2.902902902902909, 'accuracy_ratio': 0.9630487663616355}, + 'pile_Gutenberg': {'unpruned_accuracy': 60.45136045136045, 'pruned_accuracy': 55.45545545545546, 'accuracy_difference': 4.9959049959049935, 'accuracy_ratio': 0.917356615986753}, + 'pile_HackerNews': {'unpruned_accuracy': 53.580690205465785, 'pruned_accuracy': 51.44623977658089, 'accuracy_difference': 2.1344504288848967, 'accuracy_ratio': 0.9601638123603873}, + 'pile_NIH_ExPorter': {'unpruned_accuracy': 58.9636803874092, 'pruned_accuracy': 57.67554479418886, 'accuracy_difference': 1.288135593220339, 'accuracy_ratio': 0.9781537450722734}, + 'pile_PhilPapers': {'unpruned_accuracy': 56.71125671125671, 'pruned_accuracy': 52.215852215852216, 'accuracy_difference': 4.495404495404493, 'accuracy_ratio': 0.9207317073170732}, + 'pile_PubMed_Abstracts': {'unpruned_accuracy': 61.70318725099602, 'pruned_accuracy': 60.82669322709163, 'accuracy_difference': 0.8764940239043852, 'accuracy_ratio': 0.9857949959644875}, + 'pile_PubMed_Central': {'unpruned_accuracy': 64.65556465556466, 'pruned_accuracy': 63.33606333606333, 'accuracy_difference': 1.3195013195013274, 'accuracy_ratio': 0.9795918367346937}, + 'pile_StackExchange': {'unpruned_accuracy': 62.988415472216765, 'pruned_accuracy': 60.52424896917338, 'accuracy_difference': 2.464166503043387, 'accuracy_ratio': 0.9608790523690774}, + 'pile_Ubuntu_IRC': {'unpruned_accuracy': 59.52779327012456, 'pruned_accuracy': 57.63153002416806, 'accuracy_difference': 1.8962632459565043, 'accuracy_ratio': 0.9681449094316051}, + 'pile_USPTO_Backgrounds': {'unpruned_accuracy': 66.21361095045306, 'pruned_accuracy': 65.06651243493349, 'accuracy_difference': 1.1470985155195734, 'accuracy_ratio': 0.9826757897801717}, + 'pile_Wikipedia': {'unpruned_accuracy': 64.863523573201, 'pruned_accuracy': 51.32506203473945, 'accuracy_difference': 13.53846153846154, 'accuracy_ratio': 0.7912777352716144} + } + } + +def save_data_dict( model_size: str, + data: any, + name: str ): + now = datetime.now().strftime( "%Y-%m-%d_%H:%M:%S" ) + os.makedirs( f'saved_tensors/{model_size}', exist_ok=True ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-recent.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + filename = f'saved_tensors/{model_size}/{name}-{model_size}-{now}.pt' + torch.save( data, filename ) + print( f'Saved {filename} to {model_size}' ) + return filename + +filename = save_data_dict("hf", answer, "crossevaluation_results") \ No newline at end of file diff --git a/examples/neuron-mapping/mistral-log.txt b/examples/neuron-mapping/mistral-log.txt new file mode 100644 index 0000000..f447815 --- /dev/null +++ b/examples/neuron-mapping/mistral-log.txt @@ -0,0 +1,2084 @@ +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +run started at: 2024-03-15 01:50:12.005664 +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +trying to find correct ff_frac for chemistry with target accuracy -0.8 +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +time to get data for dataset: chemistry is 9:35:53.623159 results: {'unpruned_accuracy': -1, 'target_accuracy': -0.8, 'ff_frac': 0.064, 'chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'civil': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'code': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'emotion': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'math': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_ArXiv': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Enron_Emails': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_EuroParl': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_FreeLaw': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Github': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Gutenberg': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_HackerNews': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_NIH_ExPorter': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PhilPapers': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PubMed_Abstracts': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PubMed_Central': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_StackExchange': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Ubuntu_IRC': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_USPTO_Backgrounds': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Wikipedia': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'poems': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:abstract_algebra': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:anatomy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:astronomy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:business_ethics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:clinical_knowledge': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_computer_science': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_medicine': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:computer_security': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:conceptual_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:econometrics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:electrical_engineering': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:elementary_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:formal_logic': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:global_facts': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_computer_science': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_european_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_geography': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_government_and_politics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_macroeconomics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_microeconomics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_psychology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_statistics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_us_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_world_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:human_aging': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:human_sexuality': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:international_law': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:jurisprudence': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:logical_fallacies': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:machine_learning': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:management': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:marketing': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:medical_genetics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:miscellaneous': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:moral_disputes': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:moral_scenarios': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:nutrition': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:philosophy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:prehistory': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_accounting': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_law': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_medicine': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_psychology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:public_relations': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:security_studies': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:sociology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:us_foreign_policy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:virology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:world_religions': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}} +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +trying to find correct ff_frac for civil with target accuracy -0.8 +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +time to get data for dataset: civil is 8:41:39.793750 results: {'unpruned_accuracy': -1, 'target_accuracy': -0.8, 'ff_frac': 0.064, 'chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'civil': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'code': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'emotion': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'math': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_ArXiv': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Enron_Emails': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_EuroParl': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_FreeLaw': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Github': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Gutenberg': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_HackerNews': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_NIH_ExPorter': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PhilPapers': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PubMed_Abstracts': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_PubMed_Central': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_StackExchange': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Ubuntu_IRC': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_USPTO_Backgrounds': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'pile_Wikipedia': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'poems': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:abstract_algebra': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:anatomy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:astronomy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:business_ethics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:clinical_knowledge': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_computer_science': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_medicine': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:college_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:computer_security': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:conceptual_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:econometrics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:electrical_engineering': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:elementary_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:formal_logic': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:global_facts': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_biology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_chemistry': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_computer_science': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_european_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_geography': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_government_and_politics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_macroeconomics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_mathematics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_microeconomics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_physics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_psychology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_statistics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_us_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:high_school_world_history': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:human_aging': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:human_sexuality': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:international_law': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:jurisprudence': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:logical_fallacies': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:machine_learning': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:management': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:marketing': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:medical_genetics': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:miscellaneous': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:moral_disputes': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:moral_scenarios': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:nutrition': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:philosophy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:prehistory': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_accounting': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_law': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_medicine': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:professional_psychology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:public_relations': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:security_studies': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:sociology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:us_foreign_policy': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:virology': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}, 'mmlu:world_religions': {'unpruned_accuracy': -1, 'pruned_accuracy': -1, 'accuracy_difference': 0, 'accuracy_ratio': 1.0}} +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +trying to find correct ff_frac for code with target accuracy -0.8 +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +Warning: 'pile_deduped' has no 'test' split. Using 'train' split and skipping 100 texts instead. +- Loaded mistralai/Mistral-7b-instruct-v0.2 + - Registered 32 Attention Layers +W \ No newline at end of file diff --git a/examples/neuron-mapping/outputs.log b/examples/neuron-mapping/outputs.log new file mode 100644 index 0000000..e69de29 diff --git a/examples/neuron-mapping/plot.py b/examples/neuron-mapping/plot.py new file mode 100644 index 0000000..dbc7fde --- /dev/null +++ b/examples/neuron-mapping/plot.py @@ -0,0 +1,52 @@ +import torch +import numpy as np +import matplotlib.pyplot as plt + +def load_pt_file(directory: str, filename: str): + data = torch.load(directory+filename) + for key in data.keys(): + print(key) + return data + +cripple_repos = ["pile_FreeLaw", "biology", "chemistry", "pile_PubMed_Abstracts", "pile_PubMed_Central", "pile_NIH_ExPorter", "pile_Enron_Emails", "code", "pile_Github", "pile_StackExchange", "pile_Ubuntu_IRC", "pile_HackerNews", "poems", "civil", "emotion", "physics", "math", "pile_ArXiv", "pile_Wikipedia", "pile_USPTO_Backgrounds", "pile_PhilPapers", "pile_EuroParl", "pile_Gutenberg"] + +def plot_ratios(ratios, ff_frac): + datasets = cripple_repos + grid = [ + [ + ratios[ff_frac][dataset_a][dataset_b].item() if dataset_a != dataset_b else np.nan + for dataset_b in datasets + ] + for dataset_a in datasets + ] + grid = np.ma.masked_where(np.isnan(grid), grid) + + average = np.mean(grid) + + plt.imshow(grid) + for i in range(len(datasets)): + for j in range(len(datasets)): + plt.text( + j, + i, + f"{grid[i, j]:.2f}", + ha="center", + va="center", + color="black" if grid[i, j] > average else "white", + ) + plt.xticks( + range(len(datasets)), [dataset for dataset in datasets], rotation=90 + ) + plt.yticks(range(len(datasets)), [dataset for dataset in datasets]) + + plt.subplots_adjust(bottom=0.19, top=0.97) + + plt.title(f"FF Criteria Overlap for Prune Ratio {ff_frac}") + + plt.show() + + +ratios = load_pt_file("/home/rashid/ml/trajectories/rash92_fork/taker/examples/neuron-mapping/saved_tensors/hf/","pruning_ratios-hf-recent.pt") +ff_fracs = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5] +for ff_frac in ff_fracs: + plot_ratios(ratios, ff_frac) diff --git a/examples/neuron-mapping/plots/ff_frac_0.01.png b/examples/neuron-mapping/plots/ff_frac_0.01.png new file mode 100644 index 0000000..8bdb2b4 Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.01.png differ diff --git a/examples/neuron-mapping/plots/ff_frac_0.02.png b/examples/neuron-mapping/plots/ff_frac_0.02.png new file mode 100644 index 0000000..94ab349 Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.02.png differ diff --git a/examples/neuron-mapping/plots/ff_frac_0.05.png b/examples/neuron-mapping/plots/ff_frac_0.05.png new file mode 100644 index 0000000..697946d Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.05.png differ diff --git a/examples/neuron-mapping/plots/ff_frac_0.1.png b/examples/neuron-mapping/plots/ff_frac_0.1.png new file mode 100644 index 0000000..02109a7 Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.1.png differ diff --git a/examples/neuron-mapping/plots/ff_frac_0.2.png b/examples/neuron-mapping/plots/ff_frac_0.2.png new file mode 100644 index 0000000..832cea4 Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.2.png differ diff --git a/examples/neuron-mapping/plots/ff_frac_0.5.png b/examples/neuron-mapping/plots/ff_frac_0.5.png new file mode 100644 index 0000000..9035db4 Binary files /dev/null and b/examples/neuron-mapping/plots/ff_frac_0.5.png differ diff --git a/examples/neuron-mapping/prune_repos.py b/examples/neuron-mapping/prune_repos.py index 119f8b7..41f69e3 100644 --- a/examples/neuron-mapping/prune_repos.py +++ b/examples/neuron-mapping/prune_repos.py @@ -5,8 +5,6 @@ import torch def compare_pruned_ff_criteria(cripple_repos: list[str], model_size: str): - # cripple_repos = ["physics", "bio", "code"] - print("model_size: ",model_size) directory = "/home/ubuntu/taker-rashid/examples/neuron-mapping/saved_tensors/"+model_size+"/" focus_repo = "pile" suffix = "-"+model_size+"-recent.pt" @@ -29,65 +27,39 @@ def compare_pruned_ff_criteria(cripple_repos: list[str], model_size: str): ratio = torch.sum(matches)/torch.sum(repo1_ff_criteria) ratios[repo1][repo2] = ratio - return ratios + return ratios -def get_shared_pruning_data( - model_repo: str = "nickypro/tinyllama-15M", - cripple_repos: list[str] = ["physics", "biology","chemistry", "math", "code"], - focus_repo: str = "pile" - ): - - # Configure initial model and tests - c = PruningConfig( - wandb_project = "testing", # repo to push results to - model_repo = model_repo, - # "metallama/llama-2-7b" - token_limit = 1000, # trim the input to this max length - run_pre_test = True, # evaluate the unpruned model - eval_sample_size = 1e3, - collection_sample_size = 1e3, - # Removals parameters - ff_frac = 0.2, # % of feed forward neurons to prune - attn_frac = 0.00, # % of attention neurons to prune - focus = focus_repo, # the “reference” dataset - cripple = "physics", # the “unlearned” dataset - additional_datasets=tuple(), # any extra datasets to evaluate on - recalculate_activations = False, # iterative vs non-iterative - n_steps = 1, - save=True, - save_subdirectory = "/home/ubuntu/tetra/taker/examples/neuron-mapping" - ) - - # Parse CLI for arguments - # c, args = cli_parser(c) - #list of repos to cripple - ff_frac_to_prune = [0.01,0.02,0.05,0.1,0.2] - model_size = c.model_repo.split('-')[-1] - - # Run the iterated pruning for each cripple repo, for a range of ff_frac pruned - shared_pruning_data = {} - for ff_frac in ff_frac_to_prune: - c.ff_frac = ff_frac - for repo in cripple_repos: - c.cripple = repo - print("running iteration for ", c.cripple, " vs ", c.focus, "with ff_frac: ", ff_frac) - with torch.no_grad(): - model, history = run_pruning(c) - # ratios = compare_pruned_ff_criteria(cripple_repos, model_size) - # shared_pruning_data[ff_frac] = ratios - - return {} +# Configure initial model and tests +c = PruningConfig( + wandb_project = "testing", # repo to push results to + # model_repo = "nickypro/tinyllama-15M", + # model_repo = "facebook/opt-1.3b", + # model_repo = "nickypro/llama-7b-hf-rand", + model_repo = "nickypro/mistral-7b-rand", + token_limit = 1000, # trim the input to this max length + run_pre_test = False, # evaluate the unpruned model + eval_sample_size = 1e5, + collection_sample_size = 1e5, + # Removals parameters + ff_frac = 0.01, # % of feed forward neurons to prune + attn_frac = 0.00, # % of attention neurons to prune + focus = "pile", # the “reference” dataset + cripple = "physics", # the “unlearned” dataset + additional_datasets=tuple(), # any extra datasets to evaluate on + recalculate_activations = False, # iterative vs non-iterative + dtype = "int4", + n_steps = 1, +) -cifar20_datasets = ["aquatic_mammals", "fish", "flowers", "food_containers", "fruit_and_vegetables", "household_electrical_devices", - "household_furniture", "insects", "large_carnivores", "large_outdoor", "large_omnivores_and_herbivores", "medium_mammals", - "non_insect_invertebrates", "people", "reptiles", "small_mammals"] +#list of repos to cripple +cripple_repos = ["emotion", "pile_FreeLaw", "pile_PubMed_Abstracts", "pile_PubMed_Central", "pile_NIH_ExPorter", "pile_Enron_Emails", "pile_Github", "pile_StackExchange", "pile_HackerNews", "pile_ArXiv", "pile_Wikipedia", "pile_Ubuntu_IRC", "pile_USPTO_Backgrounds", "pile_PhilPapers", "pile_EuroParl", "pile_Gutenberg", "pile_PhilPapers", "pile_EuroParl", "pile_Gutenberg", 'code', 'poems', 'civil', 'chemistry'] +#cripple_repos = ['physics', 'biology', 'math'] -shared_pruning_data = get_shared_pruning_data( - model_repo="Ahmed9275/Vit-Cifar100", - cripple_repos=cifar20_datasets, - focus_repo="cifar20-split") -print(shared_pruning_data) +#prune each repo and save tensors, doing some extra computation but only really need ff_scores for each repo, will do actual pruning for different values of ff_frac in compare.py +for repo in cripple_repos: + c.cripple = repo + print("running iteration for ", c.cripple, " vs ", c.focus, "with ff_frac: ", c.ff_frac) + with torch.no_grad(): + model, history = run_pruning(c) -# shared_pruning_data = get_shared_pruning_data("nickypro/tinyllama-15M") -# print(shared_pruning_data) \ No newline at end of file diff --git a/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-2024-02-14_02:06:23.pt b/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-2024-02-14_02:06:23.pt new file mode 100644 index 0000000..95e7127 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-2024-02-14_02:06:23.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-recent.pt new file mode 100644 index 0000000..2bb9968 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/biology-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-2024-02-14_03:32:00.pt b/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-2024-02-14_03:32:00.pt new file mode 100644 index 0000000..cc5fe1e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-2024-02-14_03:32:00.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-recent.pt new file mode 100644 index 0000000..85c0b0e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/chemistry-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-2024-02-14_09:01:02.pt b/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-2024-02-14_09:01:02.pt new file mode 100644 index 0000000..3048d46 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-2024-02-14_09:01:02.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-recent.pt new file mode 100644 index 0000000..67a56f6 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/civil-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-2024-02-14_07:35:18.pt b/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-2024-02-14_07:35:18.pt new file mode 100644 index 0000000..9bddbb2 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-2024-02-14_07:35:18.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-recent.pt new file mode 100644 index 0000000..cd262ab Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/code-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-12_23:26:50.pt b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-12_23:26:50.pt new file mode 100644 index 0000000..614dc5d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-12_23:26:50.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-13_21:59:58.pt b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-13_21:59:58.pt new file mode 100644 index 0000000..24a2ebe Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-2024-03-13_21:59:58.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-recent.pt new file mode 100644 index 0000000..5921c9d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/crossevaluation_results-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-2024-02-14_23:17:50.pt b/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-2024-02-14_23:17:50.pt new file mode 100644 index 0000000..5369148 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-2024-02-14_23:17:50.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-recent.pt new file mode 100644 index 0000000..f304977 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/emotion-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-2024-02-14_06:22:18.pt b/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-2024-02-14_06:22:18.pt new file mode 100644 index 0000000..6ccbed9 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-2024-02-14_06:22:18.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-recent.pt new file mode 100644 index 0000000..922c126 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/math-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-13_23:39:59.pt b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-13_23:39:59.pt new file mode 100644 index 0000000..1fb3d54 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-13_23:39:59.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-14_00:42:09.pt b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-14_00:42:09.pt new file mode 100644 index 0000000..d0884c3 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-2024-02-14_00:42:09.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-recent.pt new file mode 100644 index 0000000..4bcfc7d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/physics-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-2024-02-15_13:13:07.pt b/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-2024-02-15_13:13:07.pt new file mode 100644 index 0000000..88bcbbb Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-2024-02-15_13:13:07.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-recent.pt new file mode 100644 index 0000000..a95e7f1 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_ArXiv-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-2024-02-15_03:19:39.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-2024-02-15_03:19:39.pt new file mode 100644 index 0000000..fdf7b38 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-2024-02-15_03:19:39.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-recent.pt new file mode 100644 index 0000000..9d78d1d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Enron_Emails-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-2024-02-15_16:25:16.pt b/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-2024-02-15_16:25:16.pt new file mode 100644 index 0000000..0f5f204 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-2024-02-15_16:25:16.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-recent.pt new file mode 100644 index 0000000..f90bf73 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_EuroParl-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-2024-02-14_23:38:23.pt b/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-2024-02-14_23:38:23.pt new file mode 100644 index 0000000..e9b4450 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-2024-02-14_23:38:23.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-recent.pt new file mode 100644 index 0000000..4437eac Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_FreeLaw-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-2024-02-15_03:55:26.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-2024-02-15_03:55:26.pt new file mode 100644 index 0000000..6ee6019 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-2024-02-15_03:55:26.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-recent.pt new file mode 100644 index 0000000..6de0dc7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Github-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-2024-02-15_17:27:57.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-2024-02-15_17:27:57.pt new file mode 100644 index 0000000..faea425 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-2024-02-15_17:27:57.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-recent.pt new file mode 100644 index 0000000..964240f Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Gutenberg-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-2024-02-15_04:19:17.pt b/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-2024-02-15_04:19:17.pt new file mode 100644 index 0000000..5f1617c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-2024-02-15_04:19:17.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-recent.pt new file mode 100644 index 0000000..54cb129 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_HackerNews-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-2024-02-15_03:06:03.pt b/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-2024-02-15_03:06:03.pt new file mode 100644 index 0000000..9cfc205 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-2024-02-15_03:06:03.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-recent.pt new file mode 100644 index 0000000..2b1cedc Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_NIH_ExPorter-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-2024-02-15_16:08:25.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-2024-02-15_16:08:25.pt new file mode 100644 index 0000000..dd0c5f7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-2024-02-15_16:08:25.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-recent.pt new file mode 100644 index 0000000..1e948de Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PhilPapers-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-2024-02-14_23:51:09.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-2024-02-14_23:51:09.pt new file mode 100644 index 0000000..0fa9ff1 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-2024-02-14_23:51:09.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-recent.pt new file mode 100644 index 0000000..5b2d9d5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Abstracts-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-2024-02-15_02:54:03.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-2024-02-15_02:54:03.pt new file mode 100644 index 0000000..e81c927 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-2024-02-15_02:54:03.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-recent.pt new file mode 100644 index 0000000..02f1aa7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_PubMed_Central-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-2024-02-15_04:07:07.pt b/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-2024-02-15_04:07:07.pt new file mode 100644 index 0000000..74ddc85 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-2024-02-15_04:07:07.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-recent.pt new file mode 100644 index 0000000..49787ce Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_StackExchange-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-2024-02-15_15:54:03.pt b/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-2024-02-15_15:54:03.pt new file mode 100644 index 0000000..edc04a5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-2024-02-15_15:54:03.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-recent.pt new file mode 100644 index 0000000..49798df Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_USPTO_Backgrounds-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-2024-02-15_15:30:30.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-2024-02-15_15:30:30.pt new file mode 100644 index 0000000..f998b43 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-2024-02-15_15:30:30.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-recent.pt new file mode 100644 index 0000000..591d46d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Ubuntu_IRC-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-2024-02-15_13:24:48.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-2024-02-15_13:24:48.pt new file mode 100644 index 0000000..ba3ec2e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-2024-02-15_13:24:48.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-recent.pt new file mode 100644 index 0000000..70e9068 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pile_Wikipedia-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-2024-02-14_08:16:35.pt b/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-2024-02-14_08:16:35.pt new file mode 100644 index 0000000..2e080ba Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-2024-02-14_08:16:35.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-recent.pt new file mode 100644 index 0000000..f494f45 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/poems-pile-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-2024-02-16_01:22:37.pt b/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-2024-02-16_01:22:37.pt new file mode 100644 index 0000000..95159f1 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-2024-02-16_01:22:37.pt differ diff --git a/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-recent.pt b/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-recent.pt new file mode 100644 index 0000000..ff9c816 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/hf/pruning_ratios-hf-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-2024-03-16_03:05:57.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-2024-03-16_03:05:57.pt new file mode 100644 index 0000000..a6a6224 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-2024-03-16_03:05:57.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-recent.pt new file mode 100644 index 0000000..d07fcef Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/chemistry-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-2024-03-16_02:42:26.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-2024-03-16_02:42:26.pt new file mode 100644 index 0000000..450ddfe Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-2024-03-16_02:42:26.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-recent.pt new file mode 100644 index 0000000..ff8140c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/civil-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-2024-03-16_02:05:56.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-2024-03-16_02:05:56.pt new file mode 100644 index 0000000..86fa76b Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-2024-03-16_02:05:56.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-recent.pt new file mode 100644 index 0000000..7432dca Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/code-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-2024-03-15_21:45:16.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-2024-03-15_21:45:16.pt new file mode 100644 index 0000000..c27c5ab Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-2024-03-15_21:45:16.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-recent.pt new file mode 100644 index 0000000..556470b Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/emotion-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-2024-03-15_23:46:18.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-2024-03-15_23:46:18.pt new file mode 100644 index 0000000..892e698 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-2024-03-15_23:46:18.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-recent.pt new file mode 100644 index 0000000..686ffbc Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_ArXiv-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-2024-03-15_22:54:17.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-2024-03-15_22:54:17.pt new file mode 100644 index 0000000..8c50a18 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-2024-03-15_22:54:17.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-recent.pt new file mode 100644 index 0000000..7678ac0 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Enron_Emails-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_00:54:31.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_00:54:31.pt new file mode 100644 index 0000000..6f01d47 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_00:54:31.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_01:37:28.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_01:37:28.pt new file mode 100644 index 0000000..7161257 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-2024-03-16_01:37:28.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-recent.pt new file mode 100644 index 0000000..50341c1 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_EuroParl-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-2024-03-15_22:01:38.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-2024-03-15_22:01:38.pt new file mode 100644 index 0000000..f2768e1 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-2024-03-15_22:01:38.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-recent.pt new file mode 100644 index 0000000..c073ecb Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_FreeLaw-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-2024-03-15_23:07:34.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-2024-03-15_23:07:34.pt new file mode 100644 index 0000000..63370ef Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-2024-03-15_23:07:34.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-recent.pt new file mode 100644 index 0000000..319970a Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Github-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:09:58.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:09:58.pt new file mode 100644 index 0000000..197cdba Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:09:58.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:52:17.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:52:17.pt new file mode 100644 index 0000000..7055563 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-2024-03-16_01:52:17.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-recent.pt new file mode 100644 index 0000000..996702e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Gutenberg-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-2024-03-15_23:33:26.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-2024-03-15_23:33:26.pt new file mode 100644 index 0000000..4e572ae Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-2024-03-15_23:33:26.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-recent.pt new file mode 100644 index 0000000..f174fff Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_HackerNews-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-2024-03-15_22:40:48.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-2024-03-15_22:40:48.pt new file mode 100644 index 0000000..25d7c6d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-2024-03-15_22:40:48.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-recent.pt new file mode 100644 index 0000000..ec0bd18 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_NIH_ExPorter-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_00:41:19.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_00:41:19.pt new file mode 100644 index 0000000..dfe1971 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_00:41:19.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_01:24:16.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_01:24:16.pt new file mode 100644 index 0000000..def2c9e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-2024-03-16_01:24:16.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-recent.pt new file mode 100644 index 0000000..d562a19 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PhilPapers-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-2024-03-15_22:14:46.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-2024-03-15_22:14:46.pt new file mode 100644 index 0000000..06e1332 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-2024-03-15_22:14:46.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-recent.pt new file mode 100644 index 0000000..6874d62 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Abstracts-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-2024-03-15_22:27:52.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-2024-03-15_22:27:52.pt new file mode 100644 index 0000000..edff5a6 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-2024-03-15_22:27:52.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-recent.pt new file mode 100644 index 0000000..c11d60a Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_PubMed_Central-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-2024-03-15_23:20:25.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-2024-03-15_23:20:25.pt new file mode 100644 index 0000000..0fbcc08 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-2024-03-15_23:20:25.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-recent.pt new file mode 100644 index 0000000..f644574 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_StackExchange-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-2024-03-16_00:27:46.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-2024-03-16_00:27:46.pt new file mode 100644 index 0000000..6e992a2 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-2024-03-16_00:27:46.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-recent.pt new file mode 100644 index 0000000..a383815 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_USPTO_Backgrounds-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-2024-03-16_00:16:10.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-2024-03-16_00:16:10.pt new file mode 100644 index 0000000..3f65a1e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-2024-03-16_00:16:10.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-recent.pt new file mode 100644 index 0000000..87ab949 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Ubuntu_IRC-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-2024-03-15_23:59:08.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-2024-03-15_23:59:08.pt new file mode 100644 index 0000000..1b234fe Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-2024-03-15_23:59:08.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-recent.pt new file mode 100644 index 0000000..cefd69d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/pile_Wikipedia-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-2024-03-16_02:23:23.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-2024-03-16_02:23:23.pt new file mode 100644 index 0000000..bf61522 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-2024-03-16_02:23:23.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-recent.pt new file mode 100644 index 0000000..ee2065a Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-mistral/poems-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-2024-02-19_01:06:36.pt b/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-2024-02-19_01:06:36.pt new file mode 100644 index 0000000..1f7156b Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-2024-02-19_01:06:36.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-recent.pt new file mode 100644 index 0000000..16381f7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/chemistry-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-2024-02-19_00:42:48.pt b/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-2024-02-19_00:42:48.pt new file mode 100644 index 0000000..e94203a Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-2024-02-19_00:42:48.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-recent.pt new file mode 100644 index 0000000..b5f271b Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/civil-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-2024-02-19_00:06:10.pt b/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-2024-02-19_00:06:10.pt new file mode 100644 index 0000000..a9f8a6d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-2024-02-19_00:06:10.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-recent.pt new file mode 100644 index 0000000..42d553c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/code-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-2024-02-18_14:15:25.pt b/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-2024-02-18_14:15:25.pt new file mode 100644 index 0000000..5c6940d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-2024-02-18_14:15:25.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-recent.pt new file mode 100644 index 0000000..55282a5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/emotion-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-2024-02-18_16:19:21.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-2024-02-18_16:19:21.pt new file mode 100644 index 0000000..4399fd8 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-2024-02-18_16:19:21.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-recent.pt new file mode 100644 index 0000000..9c64ded Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_ArXiv-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-2024-02-18_15:25:57.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-2024-02-18_15:25:57.pt new file mode 100644 index 0000000..6c8ccdd Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-2024-02-18_15:25:57.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-recent.pt new file mode 100644 index 0000000..5e430cf Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Enron_Emails-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-2024-02-18_18:05:35.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-2024-02-18_18:05:35.pt new file mode 100644 index 0000000..06726c9 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-2024-02-18_18:05:35.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-recent.pt new file mode 100644 index 0000000..7f3b72c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_EuroParl-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-2024-02-18_14:31:01.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-2024-02-18_14:31:01.pt new file mode 100644 index 0000000..b93cc66 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-2024-02-18_14:31:01.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-recent.pt new file mode 100644 index 0000000..ecf1700 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_FreeLaw-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-2024-02-18_15:39:37.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-2024-02-18_15:39:37.pt new file mode 100644 index 0000000..c247154 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-2024-02-18_15:39:37.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-recent.pt new file mode 100644 index 0000000..05a324f Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Github-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-2024-02-18_20:06:36.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-2024-02-18_20:06:36.pt new file mode 100644 index 0000000..8e6f53c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-2024-02-18_20:06:36.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-recent.pt new file mode 100644 index 0000000..8dd981c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Gutenberg-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-2024-02-18_16:06:02.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-2024-02-18_16:06:02.pt new file mode 100644 index 0000000..f7d100e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-2024-02-18_16:06:02.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-recent.pt new file mode 100644 index 0000000..fbc21dc Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_HackerNews-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-2024-02-18_15:11:50.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-2024-02-18_15:11:50.pt new file mode 100644 index 0000000..8811eb7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-2024-02-18_15:11:50.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-recent.pt new file mode 100644 index 0000000..753dce5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_NIH_ExPorter-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-2024-02-18_17:46:56.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-2024-02-18_17:46:56.pt new file mode 100644 index 0000000..7181556 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-2024-02-18_17:46:56.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-recent.pt new file mode 100644 index 0000000..426cb0d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PhilPapers-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-2024-02-18_14:44:33.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-2024-02-18_14:44:33.pt new file mode 100644 index 0000000..cfab1e9 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-2024-02-18_14:44:33.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-recent.pt new file mode 100644 index 0000000..e4a32c6 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Abstracts-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-2024-02-18_14:58:27.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-2024-02-18_14:58:27.pt new file mode 100644 index 0000000..1ea8ac0 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-2024-02-18_14:58:27.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-recent.pt new file mode 100644 index 0000000..460cd18 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_PubMed_Central-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-2024-02-18_15:52:45.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-2024-02-18_15:52:45.pt new file mode 100644 index 0000000..45bde81 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-2024-02-18_15:52:45.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-recent.pt new file mode 100644 index 0000000..065fccb Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_StackExchange-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-2024-02-18_17:31:56.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-2024-02-18_17:31:56.pt new file mode 100644 index 0000000..71378e3 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-2024-02-18_17:31:56.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-recent.pt new file mode 100644 index 0000000..179241c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_USPTO_Backgrounds-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-2024-02-18_16:32:41.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-2024-02-18_16:32:41.pt new file mode 100644 index 0000000..c749211 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-2024-02-18_16:32:41.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-recent.pt new file mode 100644 index 0000000..67ada7e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/pile_Wikipedia-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-2024-02-19_00:23:44.pt b/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-2024-02-19_00:23:44.pt new file mode 100644 index 0000000..a48fc65 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-2024-02-19_00:23:44.pt differ diff --git a/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-recent.pt b/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-recent.pt new file mode 100644 index 0000000..1907ac4 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/rand-vit/poems-pile-rand-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/biology-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/biology-pile-v0.2-recent.pt new file mode 100644 index 0000000..8df7a24 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/biology-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/chemistry-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/chemistry-pile-v0.2-recent.pt new file mode 100644 index 0000000..9475cf7 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/chemistry-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/civil-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/civil-pile-v0.2-recent.pt new file mode 100644 index 0000000..69ef81d Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/civil-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/code-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/code-pile-v0.2-recent.pt new file mode 100644 index 0000000..66d3b8a Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/code-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/emotion-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/emotion-pile-v0.2-recent.pt new file mode 100644 index 0000000..6b27e3e Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/emotion-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/math-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/math-pile-v0.2-recent.pt new file mode 100644 index 0000000..6bc2ae5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/math-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/physics-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/physics-pile-v0.2-recent.pt new file mode 100644 index 0000000..8d8da8b Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/physics-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_ArXiv-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_ArXiv-pile-v0.2-recent.pt new file mode 100644 index 0000000..8f0a738 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_ArXiv-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_Enron_Emails-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_Enron_Emails-pile-v0.2-recent.pt new file mode 100644 index 0000000..8ae52a2 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_Enron_Emails-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_EuroParl-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_EuroParl-pile-v0.2-recent.pt new file mode 100644 index 0000000..dd459b5 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_EuroParl-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_FreeLaw-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_FreeLaw-pile-v0.2-recent.pt new file mode 100644 index 0000000..657fc43 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_FreeLaw-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_Github-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_Github-pile-v0.2-recent.pt new file mode 100644 index 0000000..824f657 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_Github-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_Gutenberg-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_Gutenberg-pile-v0.2-recent.pt new file mode 100644 index 0000000..9f6dc06 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_Gutenberg-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_HackerNews-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_HackerNews-pile-v0.2-recent.pt new file mode 100644 index 0000000..ba54a01 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_HackerNews-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_NIH_ExPorter-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_NIH_ExPorter-pile-v0.2-recent.pt new file mode 100644 index 0000000..7f5809c Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_NIH_ExPorter-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_PhilPapers-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_PhilPapers-pile-v0.2-recent.pt new file mode 100644 index 0000000..8d159b3 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_PhilPapers-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Abstracts-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Abstracts-pile-v0.2-recent.pt new file mode 100644 index 0000000..d8d24bb Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Abstracts-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Central-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Central-pile-v0.2-recent.pt new file mode 100644 index 0000000..75c21d0 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_PubMed_Central-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_StackExchange-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_StackExchange-pile-v0.2-recent.pt new file mode 100644 index 0000000..81c96d6 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_StackExchange-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_USPTO_Backgrounds-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_USPTO_Backgrounds-pile-v0.2-recent.pt new file mode 100644 index 0000000..c3afd18 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_USPTO_Backgrounds-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_Ubuntu_IRC-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_Ubuntu_IRC-pile-v0.2-recent.pt new file mode 100644 index 0000000..bb52da4 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_Ubuntu_IRC-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/pile_Wikipedia-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/pile_Wikipedia-pile-v0.2-recent.pt new file mode 100644 index 0000000..7642547 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/pile_Wikipedia-pile-v0.2-recent.pt differ diff --git a/examples/neuron-mapping/saved_tensors/v0.2/poems-pile-v0.2-recent.pt b/examples/neuron-mapping/saved_tensors/v0.2/poems-pile-v0.2-recent.pt new file mode 100644 index 0000000..28c1f01 Binary files /dev/null and b/examples/neuron-mapping/saved_tensors/v0.2/poems-pile-v0.2-recent.pt differ diff --git a/src/taker/prune.py b/src/taker/prune.py index c9e6a5a..e7a0bbc 100644 --- a/src/taker/prune.py +++ b/src/taker/prune.py @@ -11,7 +11,7 @@ from .eval import evaluate_all from .scoring import score_indices_by, score_indices from .activations import get_midlayer_activations, get_top_frac, \ - choose_attn_heads_by, save_timestamped_tensor_dict + choose_attn_heads_by, save_timestamped_tensor_dict, save_tensor_dict from .texts import prepare def prune_and_evaluate( @@ -72,7 +72,7 @@ def score_and_prune( opt: Model, focus_activations_data: ActivationOverview, cripple_activations_data: ActivationOverview, pruning_config: PruningConfig, - save=False, + save=True, ): # Get the top fraction FF activations and prune ff_frac, ff_eps = pruning_config.ff_frac, pruning_config.ff_eps @@ -146,6 +146,8 @@ def score_and_prune( opt: Model, "attn_threshold": attn_threshold if do_attn else 0, "ff_del": float( torch.sum(ff_criteria) ) if do_ff else 0, "attn_del": float( torch.sum(attn_criteria) ) if do_attn else 0, + "ff_scores": ff_scores.cpu().numpy(), + "ff_criteria": ff_criteria.cpu().numpy(), }}) data.update({'deletions_per_layer': { @@ -319,7 +321,7 @@ def run_pruning(c: PruningConfig): print(history.history[-1]) print(history.df.T) print(history.df.T.to_csv()) - + # print("masks: ", opt.masks["mlp_pre_out"]) return opt, history ###################################################################################### diff --git a/src/taker/texts.py b/src/taker/texts.py index d689922..7c885ef 100644 --- a/src/taker/texts.py +++ b/src/taker/texts.py @@ -124,6 +124,111 @@ def filter_dataset(_dataset): return _dataset.filter(filter_example) return filter_dataset + @staticmethod + def filter_pile_FreeLaw(_dataset): + def filter_pile_FreeLaw_example(example): + return example["meta"]["pile_set_name"] == "FreeLaw" + pile_FreeLaw_dataset = _dataset.filter(filter_pile_FreeLaw_example) + return pile_FreeLaw_dataset + + @staticmethod + def filter_pile_PubMed_Abstracts(_dataset): + def filter_pile_PubMed_Abstracts_example(example): + return example["meta"]["pile_set_name"] == "PubMed Abstracts" + pile_PubMed_Abstracts_dataset = _dataset.filter(filter_pile_PubMed_Abstracts_example) + return pile_PubMed_Abstracts_dataset + + @staticmethod + def filter_pile_PubMed_Central(_dataset): + def filter_pile_PubMed_Central_example(example): + return example["meta"]["pile_set_name"] == "PubMed Central" + pile_PubMed_Central_dataset = _dataset.filter(filter_pile_PubMed_Central_example) + return pile_PubMed_Central_dataset + + @staticmethod + def filter_pile_NIH_ExPorter(_dataset): + def filter_pile_NIH_ExPorter_example(example): + return example["meta"]["pile_set_name"] == "NIH ExPorter" + pile_NIH_ExPorter_dataset = _dataset.filter(filter_pile_NIH_ExPorter_example) + return pile_NIH_ExPorter_dataset + + @staticmethod + def filter_pile_Enron_Emails(_dataset): + def filter_pile_Enron_Emails_example(example): + return example["meta"]["pile_set_name"] == "Enron Emails" + pile_Enron_Emails_dataset = _dataset.filter(filter_pile_Enron_Emails_example) + return pile_Enron_Emails_dataset + + @staticmethod + def filter_pile_Github(_dataset): + def filter_pile_Github_example(example): + return example["meta"]["pile_set_name"] == "Github" + pile_Github_dataset = _dataset.filter(filter_pile_Github_example) + return pile_Github_dataset + + @staticmethod + def filter_pile_StackExchange(_dataset): + def filter_pile_StackExchange_example(example): + return example["meta"]["pile_set_name"] == "StackExchange" + pile_StackExchange_dataset = _dataset.filter(filter_pile_StackExchange_example) + return pile_StackExchange_dataset + + @staticmethod + def filter_pile_HackerNews(_dataset): + def filter_pile_HackerNews_example(example): + return example["meta"]["pile_set_name"] == "HackerNews" + pile_HackerNews_dataset = _dataset.filter(filter_pile_HackerNews_example) + return pile_HackerNews_dataset + + @staticmethod + def filter_pile_ArXiv(_dataset): + def filter_pile_ArXiv_example(example): + return example["meta"]["pile_set_name"] == "ArXiv" + pile_ArXiv_dataset = _dataset.filter(filter_pile_ArXiv_example) + return pile_ArXiv_dataset + + @staticmethod + def filter_pile_Wikipedia(_dataset): + def filter_pile_Wikipedia_example(example): + return example["meta"]["pile_set_name"] == "Wikipedia (en)" + pile_Wikipedia_dataset = _dataset.filter(filter_pile_Wikipedia_example) + return pile_Wikipedia_dataset + + @staticmethod + def filter_pile_Ubuntu_IRC(_dataset): + def filter_pile_Ubuntu_IRC_example(example): + return example["meta"]["pile_set_name"] == "Ubuntu IRC" + pile_Ubuntu_IRC_dataset = _dataset.filter(filter_pile_Ubuntu_IRC_example) + return pile_Ubuntu_IRC_dataset + + @staticmethod + def filter_pile_USPTO_Backgrounds(_dataset): + def filter_pile_USPTO_Backgrounds_example(example): + return example["meta"]["pile_set_name"] == "USPTO Backgrounds" + pile_USPTO_Backgrounds_dataset = _dataset.filter(filter_pile_USPTO_Backgrounds_example) + return pile_USPTO_Backgrounds_dataset + + @staticmethod + def filter_pile_PhilPapers(_dataset): + def filter_pile_PhilPapers_example(example): + return example["meta"]["pile_set_name"] == "PhilPapers" + pile_PhilPapers_dataset = _dataset.filter(filter_pile_PhilPapers_example) + return pile_PhilPapers_dataset + + @staticmethod + def filter_pile_EuroParl(_dataset): + def filter_pile_EuroParl_example(example): + return example["meta"]["pile_set_name"] == "EuroParl" + pile_EuroParl_dataset = _dataset.filter(filter_pile_EuroParl_example) + return pile_EuroParl_dataset + + @staticmethod + def filter_pile_Gutenberg(_dataset): + def filter_pile_Gutenberg_example(example): + return example["meta"]["pile_set_name"] == "Gutenberg (PG-19)" + pile_Gutenberg_dataset = _dataset.filter(filter_pile_Gutenberg_example) + return pile_Gutenberg_dataset + def get_cifar_dataset_configs(): cifar20_datasets = ["aquatic_mammals", "fish", "flowers", "food_containers", "fruit_and_vegetables", "household_electrical_devices", "household_furniture", "insects", "large_carnivores", "large_outdoor", "large_omnivores_and_herbivores", "medium_mammals", "non_insect_invertebrates", "people", "reptiles", "small_mammals", "trees", "veh1", "veh2"] return [EvalConfig(f"cifar20-{dataset}", @@ -136,6 +241,8 @@ def get_cifar_dataset_configs(): dataset_image_label_key = "coarse_label", dataset_filter=DatasetFilters.filter_cifar(count), ) for count, dataset in enumerate(cifar20_datasets)] + + def infer_dataset_config(dataset_name:str, dataset_subset:str=None): eval_configs = [ @@ -307,26 +414,113 @@ def infer_dataset_config(dataset_name:str, dataset_subset:str=None): dataset_image_key = "img", dataset_image_label_key = "coarse_label", ), - EvalConfig("cifar20-split", - dataset_repo = "cifar100", - dataset_type = "image-classification", - dataset_split = ["train", "test"], - is_train_mode = True, - dataset_image_key = "img", - dataset_image_label_key = "coarse_label", - ), - EvalConfig("bio", - dataset_repo = "camel-ai/biology", - dataset_text_key = "message_2", - dataset_has_test_split = False, - ), EvalConfig("emotion", dataset_repo = "dair-ai/emotion", dataset_type = "text-classification", dataset_text_key = "text", dataset_text_label_key = "label", dataset_has_test_split = True, - ) + ), + EvalConfig("biology", + dataset_repo = "camel-ai/biology", + dataset_text_key = "message_2", + dataset_has_test_split = False, + ), + EvalConfig("physics", + dataset_repo = "camel-ai/physics", + dataset_text_key = "message_2", + dataset_has_test_split = False, + ), + EvalConfig("chemistry", + dataset_repo = "camel-ai/chemistry", + dataset_text_key = "message_2", + dataset_has_test_split = False, + ), + EvalConfig("math", + dataset_repo = "camel-ai/math", + dataset_text_key = "message_2", + dataset_has_test_split = False, + ), + EvalConfig("poems", + dataset_repo = "sadFaceEmoji/english-poems", + dataset_text_key = "poem", + dataset_has_test_split = False, + ), + EvalConfig("pile_FreeLaw", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_FreeLaw, + ), + EvalConfig("pile_PubMed_Abstracts", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_PubMed_Abstracts, + ), + EvalConfig("pile_PubMed_Central", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_PubMed_Central, + ), + EvalConfig("pile_NIH_ExPorter", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_NIH_ExPorter, + ), + EvalConfig("pile_Enron_Emails", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_Enron_Emails, + ), + EvalConfig("pile_Github", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_Github, + ), + EvalConfig("pile_StackExchange", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_StackExchange, + ), + EvalConfig("pile_HackerNews", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_HackerNews, + ), + EvalConfig("pile_ArXiv", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_ArXiv, + ), + EvalConfig("pile_Wikipedia", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_Wikipedia, + ), + EvalConfig("pile_Ubuntu_IRC", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_Ubuntu_IRC, + ), + EvalConfig("pile_USPTO_Backgrounds", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_USPTO_Backgrounds, + ), + EvalConfig("pile_PhilPapers", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_PhilPapers, + ), + EvalConfig("pile_EuroParl", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_EuroParl, + ), + EvalConfig("pile_Gutenberg", + dataset_repo = "monology/pile-uncopyrighted", + dataset_text_key = "text", + dataset_filter = DatasetFilters.filter_pile_Gutenberg, + ), ] + get_cifar_dataset_configs() # Convert into searchable dict