From 3d287071dda2d0cdca243a9b686b578c989aae41 Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Tue, 30 Sep 2025 10:21:06 +0200 Subject: [PATCH 1/6] add ngn optimizer --- stepback/optim/main.py | 7 +++ stepback/optim/ngn.py | 97 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 stepback/optim/ngn.py diff --git a/stepback/optim/main.py b/stepback/optim/main.py index 4d8defe..a2727e7 100644 --- a/stepback/optim/main.py +++ b/stepback/optim/main.py @@ -9,6 +9,7 @@ from .adabound import AdaBoundW from .adabelief import AdaBelief from .lion import Lion +from .ngn import NGN # only applicable to linear regression from .spp import SPP @@ -152,6 +153,12 @@ def get_optimizer(opt_config: dict) -> Tuple[torch.optim.Optimizer, dict]: hyperp = {'lr': opt_config.get('lr', 1e-3), 'weight_decay': opt_config.get('weight_decay', 0) } + + elif name == 'ngn': + opt_obj = NGN + hyperp = {'lr': opt_config.get('lr', 1e-3), + } + else: raise KeyError(f"Unknown optimizer name {name}.") diff --git a/stepback/optim/ngn.py b/stepback/optim/ngn.py new file mode 100644 index 0000000..5c026ab --- /dev/null +++ b/stepback/optim/ngn.py @@ -0,0 +1,97 @@ +""" +Implements the NGN algorithm by Orvieto and Xiao. + +Reference: https://arxiv.org/pdf/2407.04358 +""" +import torch +import warnings +from math import sqrt + +from ..types import Params, LossClosure, OptFloat + +class NGN(torch.optim.Optimizer): + def __init__(self, + params: Params, + lr: float=1e-1, + ) -> None: + """ + NGN optimizer + + Parameters + ---------- + params : Params + Model parameters. + lr : float, optional + Learning rate, by default 1e-1. + """ + + if lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + + defaults = dict(lr=lr) + + super(NGN, self).__init__(params, defaults) + + # Initialization + self._number_steps = 0 + self.state['step_size_list'] = list() # for storing the adaptive step size term + + return + + def step(self, closure: LossClosure=None, loss: torch.Tensor=None) -> OptFloat: + """ + Performs a single optimization step. + + Parameters + ---------- + closure : LossClosure, optional + A callable that evaluates the model (possibly with backprop) and returns the loss, by default None. + + loss : torch.tensor, optional + The loss tensor. Use this when the backward step has already been performed. By default None. + + + Returns + ------- + (Stochastic) Loss function value. + """ + assert (closure is not None) or (loss is not None), "Either loss tensor or closure must be passed." + assert (closure is None) or (loss is None), "Pass either the loss tensor or the closure, not both." + + if closure is not None: + with torch.enable_grad(): + loss = closure() + + if len(self.param_groups) > 1: + warnings.warn("More than one param group. step_size_list contains adaptive term of last group.") + warnings.warn("More than one param group. This might cause issues for the step method.") + + self._number_steps += 1 + + # Update + grad_norm = self.compute_grad_norm() + for group in self.param_groups: + lr = group['lr'] + denom = 1 + lr / (2*loss) * (grad_norm**2) + gamma = (lr / denom).item() + + ### Update params + for p in group['params']: + p.data.add_(other=p.grad.data, alpha=-gamma) + + self.state['step_size_list'].append(gamma) + + return loss + + @torch.no_grad() + def compute_grad_norm(self): + grad_norm = 0. + for group in self.param_groups: + for p in group['params']: + assert p.grad is not None + + g = p.grad.data + grad_norm += torch.sum(torch.mul(g, g)) + + grad_norm = torch.sqrt(grad_norm) + return grad_norm From c9a1b25723ec4892604633be855c866cbe4531d7 Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Mon, 1 Dec 2025 17:43:52 +0100 Subject: [PATCH 2/6] add wsd schedule --- configs/test_shakespeare.json | 4 +- output/test_shakespeare.json | 164 +++++++++++++++++----------------- run.py | 63 +++++++------ stepback/base.py | 116 ++++++++++++++---------- stepback/models/llama.py | 2 +- stepback/optim/main.py | 23 ++++- 6 files changed, 211 insertions(+), 161 deletions(-) diff --git a/configs/test_shakespeare.json b/configs/test_shakespeare.json index 67f20d3..71bd6fd 100644 --- a/configs/test_shakespeare.json +++ b/configs/test_shakespeare.json @@ -1,10 +1,10 @@ { - "batch_size": 16, + "batch_size": 64, "dataset": "shakespeare", "dataset_kwargs": {}, "max_epoch": 10, "model": "llama", - "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 3, "n_heads": 2, "mlp": "mlp", "seq_len": 512}, + "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, "opt": [{"name": "adam", "lr": [1e-3], "lr_schedule": "constant", "warmup_steps": 100, "stepwise_schedule": true}], "loss_func": "sequence_cross_entropy", "score_func": "sequence_cross_entropy_accuracy", diff --git a/output/test_shakespeare.json b/output/test_shakespeare.json index f4d629b..dbe0c35 100644 --- a/output/test_shakespeare.json +++ b/output/test_shakespeare.json @@ -1,7 +1,7 @@ [ { "config": { - "batch_size": 16, + "batch_size": 64, "dataset": "shakespeare", "dataset_kwargs": {}, "loss_func": "sequence_cross_entropy", @@ -11,9 +11,9 @@ "dim": 384, "expand": 4, "mlp": "mlp", - "n_heads": 2, - "n_layers": 3, - "seq_len": 512, + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, "vocab_size": 92 }, "opt": { @@ -29,129 +29,129 @@ "history": [ { "epoch": 0, - "grad_norm": 1.992753267288208, + "grad_norm": 1.121427059173584, "learning_rate": 1e-13, - "model_norm": 64.88945770263672, - "train_epoch_time": 20.36537790298462, - "train_loss": 2.165691924560494, - "train_score": 0.36806988216568765, - "val_loss": 2.218102411840154, - "val_score": 0.3520025144363272 + "model_norm": 87.65153503417969, + "train_epoch_time": 36.848896980285645, + "train_loss": 2.461630094821005, + "train_score": 0.29407953720865837, + "val_loss": 2.4997918203421765, + "val_score": 0.28291834143218164 }, { "epoch": 1, - "grad_norm": 1.334716558456421, - "learning_rate": 0.001, - "model_norm": 66.11741638183594, - "train_epoch_time": 20.214939832687378, - "train_loss": 1.8825722357983157, - "train_score": 0.4417044885807278, - "val_loss": 2.0125111694993643, - "val_score": 0.4103133981255279 + "grad_norm": 1.3179094791412354, + "learning_rate": 0.000540000000046, + "model_norm": 88.65203094482422, + "train_epoch_time": 36.27439522743225, + "train_loss": 2.0504614697296275, + "train_score": 0.40136858857687097, + "val_loss": 2.11395750226438, + "val_score": 0.3813056111609484 }, { "epoch": 2, - "grad_norm": 1.0500348806381226, + "grad_norm": 1.0813289880752563, "learning_rate": 0.001, - "model_norm": 67.5, - "train_epoch_time": 19.981853008270264, - "train_loss": 1.7142505491094666, - "train_score": 0.48880484908921845, - "val_loss": 1.9234138680600572, - "val_score": 0.4437095905172414 + "model_norm": 89.66089630126953, + "train_epoch_time": 36.44406986236572, + "train_loss": 1.7927071080488317, + "train_score": 0.46938217360969936, + "val_loss": 1.929213602014579, + "val_score": 0.43094324097293935 }, { "epoch": 3, - "grad_norm": 1.0010490417480469, + "grad_norm": 1.2653794288635254, "learning_rate": 0.001, - "model_norm": 68.94657897949219, - "train_epoch_time": 19.904484033584595, - "train_loss": 1.6100540534773868, - "train_score": 0.5147796176741782, - "val_loss": 1.8390625857758796, - "val_score": 0.4670977011494253 + "model_norm": 90.63284301757812, + "train_epoch_time": 36.52592372894287, + "train_loss": 1.6544971603232101, + "train_score": 0.5043736549497848, + "val_loss": 1.8547480991869378, + "val_score": 0.4552821832806317 }, { "epoch": 4, - "grad_norm": 0.867782711982727, + "grad_norm": 0.9828493595123291, "learning_rate": 0.001, - "model_norm": 70.42530822753906, - "train_epoch_time": 19.90021586418152, - "train_loss": 1.5380675991148134, - "train_score": 0.5328275243806236, - "val_loss": 1.792324341576675, - "val_score": 0.4788793104818498 + "model_norm": 91.56520080566406, + "train_epoch_time": 36.71347689628601, + "train_loss": 1.5387006788034863, + "train_score": 0.5344926917056956, + "val_loss": 1.7642201408316156, + "val_score": 0.4774101251644327 }, { "epoch": 5, - "grad_norm": 0.7821542620658875, + "grad_norm": 1.0504655838012695, "learning_rate": 0.001, - "model_norm": 71.892822265625, - "train_epoch_time": 19.903041124343872, - "train_loss": 1.4823639410645215, - "train_score": 0.54531541538567, - "val_loss": 1.7534021857141078, - "val_score": 0.49129849144782145 + "model_norm": 92.474609375, + "train_epoch_time": 36.35963201522827, + "train_loss": 1.4742244492986452, + "train_score": 0.5499394725350772, + "val_loss": 1.7320461796017108, + "val_score": 0.49073442892009983 }, { "epoch": 6, - "grad_norm": 0.888006865978241, + "grad_norm": 0.8896018862724304, "learning_rate": 0.001, - "model_norm": 73.36498260498047, - "train_epoch_time": 19.90959620475769, - "train_loss": 1.4459222527787552, - "train_score": 0.5567617144705097, - "val_loss": 1.7188306109658602, - "val_score": 0.5006106322524191 + "model_norm": 93.3919906616211, + "train_epoch_time": 36.567251205444336, + "train_loss": 1.4220848283945573, + "train_score": 0.5644335094791232, + "val_loss": 1.6994645117892189, + "val_score": 0.5016414314405789 }, { "epoch": 7, - "grad_norm": 0.7781890034675598, + "grad_norm": 0.7851909399032593, "learning_rate": 0.001, - "model_norm": 74.81156921386719, - "train_epoch_time": 20.277348041534424, - "train_loss": 1.4094120469350628, - "train_score": 0.564420610495194, - "val_loss": 1.7047440726181557, - "val_score": 0.5073141165163325 + "model_norm": 94.29660034179688, + "train_epoch_time": 36.6595242023468, + "train_loss": 1.3814418076784745, + "train_score": 0.5747007264639418, + "val_loss": 1.6752228601381234, + "val_score": 0.5100907716904387 }, { "epoch": 8, - "grad_norm": 0.7673189043998718, + "grad_norm": 0.7439262866973877, "learning_rate": 0.001, - "model_norm": 76.27680969238281, - "train_epoch_time": 20.30754780769348, - "train_loss": 1.3734883747473103, - "train_score": 0.5729114344828986, - "val_loss": 1.693546010159898, - "val_score": 0.5119118175287356 + "model_norm": 95.21795654296875, + "train_epoch_time": 36.596500873565674, + "train_loss": 1.359556995163347, + "train_score": 0.5797659610573155, + "val_loss": 1.6677938104360166, + "val_score": 0.511516934831709 }, { "epoch": 9, - "grad_norm": 0.6948201060295105, + "grad_norm": 0.7252312898635864, "learning_rate": 0.001, - "model_norm": 77.71929931640625, - "train_epoch_time": 20.247668027877808, - "train_loss": 1.3485437734255699, - "train_score": 0.5800568226694924, - "val_loss": 1.672089501358997, - "val_score": 0.5159931754243785 + "model_norm": 96.16193389892578, + "train_epoch_time": 36.32176995277405, + "train_loss": 1.3168328549290662, + "train_score": 0.5910767124578292, + "val_loss": 1.6540300926850118, + "val_score": 0.5194550074474409 } ], "summary": { "data_parallel": "false", - "end_time": "2025-07-23 14:30:37.932485", - "final_model_norm": 77.71929931640625, - "init_model_norm": 64.04080963134766, + "end_time": "2025-12-01 15:12:04.720226", + "final_model_norm": 96.16193389892578, + "init_model_norm": 87.41546630859375, "input_dim": [ - 512 + 256 ], - "num_batches_per_epoch": 108, + "num_batches_per_epoch": 54, "num_workers": 0, "output_dim": [ - 512 + 256 ], - "start_time": "2025-07-23 14:25:44.459311", + "start_time": "2025-12-01 15:03:08.470563", "step_scheduler_on_epoch": false } } diff --git a/run.py b/run.py index 2424dba..8ecc6fb 100644 --- a/run.py +++ b/run.py @@ -25,17 +25,18 @@ parser.add_argument('--verbose', action="store_true", help="Verbose mode.") parser.add_argument('--force-deterministic', action="store_true", help="Use deterministic mode in Pytorch. Might require setting environment variables.") -def run_one(exp_id: str, - config_dir: str=DEFAULTS.config_dir, - output_dir: str=DEFAULTS.output_dir, - data_dir: str=DEFAULTS.data_dir, - device: str=DEFAULTS.device, - num_workers: int=DEFAULTS.num_workers, - data_parallel: Union[list, None]=DEFAULTS.data_parallel, - log_every_k_steps: Union[int, None]=DEFAULTS.log_every_k_steps, - verbose: bool=DEFAULTS.verbose, - force_deterministic: bool=DEFAULTS.force_deterministic - ): +def run_one( + exp_id: str, + config_dir: str=DEFAULTS.config_dir, + output_dir: str=DEFAULTS.output_dir, + data_dir: str=DEFAULTS.data_dir, + device: str=DEFAULTS.device, + num_workers: int=DEFAULTS.num_workers, + data_parallel: Union[list, None]=DEFAULTS.data_parallel, + log_every_k_steps: Union[int, None]=DEFAULTS.log_every_k_steps, + verbose: bool=DEFAULTS.verbose, + force_deterministic: bool=DEFAULTS.force_deterministic + ): """Function for running all runs from one config file. Default values for all arguments can be found in ``stepback/defaults.py``. @@ -83,14 +84,16 @@ def run_one(exp_id: str, for j, config in enumerate(exp_list): # each run gets id, by position in the list - B = Base(name=exp_id + f'_{j}', - config=config, - device=device, - data_dir=data_dir, - num_workers=num_workers, - data_parallel=data_parallel, - log_every_k_steps=log_every_k_steps, - verbose=verbose) + B = Base( + name=exp_id + f'_{j}', + config=config, + device=device, + data_dir=data_dir, + num_workers=num_workers, + data_parallel=data_parallel, + log_every_k_steps=log_every_k_steps, + verbose=verbose + ) B.setup() B.run() # train and validate @@ -106,15 +109,17 @@ def run_one(exp_id: str, print(args) - run_one(args.id, - config_dir=args.config_dir, - output_dir=args.output_dir, - data_dir=args.data_dir, - device=args.device, - num_workers=args.num_workers, - data_parallel=args.data_parallel, - log_every_k_steps=args.log_every_k_steps, - verbose=args.verbose, - force_deterministic=args.force_deterministic) + run_one( + args.id, + config_dir=args.config_dir, + output_dir=args.output_dir, + data_dir=args.data_dir, + device=args.device, + num_workers=args.num_workers, + data_parallel=args.data_parallel, + log_every_k_steps=args.log_every_k_steps, + verbose=args.verbose, + force_deterministic=args.force_deterministic + ) diff --git a/stepback/base.py b/stepback/base.py index 81b1a0a..9b92ebd 100644 --- a/stepback/base.py +++ b/stepback/base.py @@ -18,14 +18,17 @@ from .defaults import DEFAULTS class Base: - def __init__(self, name: str, - config: dict, - device: str=DEFAULTS.device, - data_dir: str=DEFAULTS.data_dir, - num_workers: int=DEFAULTS.num_workers, - data_parallel: Union[list, None]=DEFAULTS.data_parallel, - log_every_k_steps: Union[int, None]=DEFAULTS.log_every_k_steps, - verbose: bool=DEFAULTS.verbose): + def __init__( + self, + name: str, + config: dict, + device: str=DEFAULTS.device, + data_dir: str=DEFAULTS.data_dir, + num_workers: int=DEFAULTS.num_workers, + data_parallel: Union[list, None]=DEFAULTS.data_parallel, + log_every_k_steps: Union[int, None]=DEFAULTS.log_every_k_steps, + verbose: bool=DEFAULTS.verbose + ): """The main class. Performs one single training run plus evaluation. Parameters @@ -87,9 +90,11 @@ def __init__(self, name: str, self.check_config() # Create ditionary for results - self.results = {'config': self.config, - 'history': {}, - 'summary': {}} + self.results = { + 'config': self.config, + 'history': {}, + 'summary': {} + } self.results['summary']['num_workers'] = self.num_workers self.results['summary']['data_parallel'] = 'true' if self.data_parallel else 'false' @@ -122,11 +127,12 @@ def _setup_data(self): self.results['summary']['input_dim'], self.results['summary']['output_dim'] = infer_shapes(self.train_set) # construct train loader - self.train_loader = get_loader(ds=self.train_set, - seed=self.run_seed, - batch_size=self.config['batch_size'], - num_workers=self.num_workers, - drop_last=True + self.train_loader = get_loader( + ds=self.train_set, + seed=self.run_seed, + batch_size=self.config['batch_size'], + num_workers=self.num_workers, + drop_last=True ) return @@ -136,9 +142,10 @@ def _setup_model(self): torch.manual_seed(self.seed) # Reseed to have same initialization torch.cuda.manual_seed_all(self.seed) - self.model = get_model(config=self.config, - input_dim=self.results['summary'].get('input_dim',[]), - output_dim=self.results['summary'].get('output_dim',[]) + self.model = get_model( + config=self.config, + input_dim=self.results['summary'].get('input_dim',[]), + output_dim=self.results['summary'].get('output_dim',[]) ) self.model.to(self.device) @@ -168,8 +175,13 @@ def setup(self): opt_obj, hyperp = get_optimizer(self.config['opt']) self._init_opt(opt_obj, hyperp) - - self.sched, self._step_scheduler_on_epoch = get_scheduler(self.config['opt'], self.opt) + + # total number of iters (either in steps or epochs) for LR schedule + if self.config['opt'].get('stepwise_schedule', False): + num_iter = self.config['max_epoch'] * len(self.train_loader) + else: + num_iter = self.config['max_epoch'] + self.sched, self._step_scheduler_on_epoch = get_scheduler(self.config['opt'], num_iter, self.opt) #============ Results ============== opt_val = self._compute_opt_value() @@ -224,15 +236,19 @@ def run(self): # Validation with torch.no_grad(): - metric_dict = {'loss': Loss(self.config['loss_func'], backwards=False), - 'score': Loss(self.config['score_func'], backwards=False)} + metric_dict = { + 'loss': Loss(self.config['loss_func'], backwards=False), + 'score': Loss(self.config['score_func'], backwards=False) + } - train_dict = self.evaluate(self.train_set, - metric_dict = metric_dict, + train_dict = self.evaluate( + self.train_set, + metric_dict = metric_dict, ) - val_dict = self.evaluate(self.val_set, - metric_dict = metric_dict, + val_dict = self.evaluate( + self.val_set, + metric_dict = metric_dict, ) # Record metrics @@ -241,8 +257,11 @@ def run(self): # Record metrics specific to MoMo methods if self.opt.state.get('step_size_list'): - score_dict['step_size_list'] = [float(np.format_float_scientific(t,5)) for t in self.opt.state['step_size_list']] + score_dict['step_size_list'] = [ + float(np.format_float_scientific(t,5)) for t in self.opt.state['step_size_list'] + ] self.opt.state['step_size_list'] = list() + print(score_dict['step_size_list']) # fstar estimator (could be zero) if self.opt.state.get('fstar', None) is not None: score_dict['fstar'] = self.opt.state['fstar'] @@ -314,15 +333,14 @@ def train_epoch(self): pbar.set_description(f'Training - loss={loss_val:.3f} - time data: last={timings_dataloader[-1]:.3f},(mean={np.mean(timings_dataloader):.3f}) - time model+step: last={timings_model[-1]:.3f}(mean={np.mean(timings_model):.3f})') # Log loss_val and grad_norm every k steps + total_step_counter = len(self.train_loader) * self._epochs_trained + step_counter if self.log_every_k_steps is not None: - total_step_counter = len(self.train_loader) * self._epochs_trained + step_counter if step_counter % self.log_every_k_steps == 0: self._log_stepwise["loss"][total_step_counter] = loss_val.item() self._log_stepwise["grad_norm"][total_step_counter] = grad_norm(self.model) if not self._step_scheduler_on_epoch: self._log_stepwise["lr"][total_step_counter] = self.sched.get_last_lr()[0] - if not self._step_scheduler_on_epoch: self.sched.step() @@ -374,7 +392,9 @@ def evaluate(self, dataset, metric_dict): timings_model.append(t0-t1) pbar.set_description(f'Validating {dataset.split}') - pbar.set_description(f'Validating {dataset.split} - time data: last={timings_dataloader[-1]:.3f}(mean={np.mean(timings_dataloader):.3f}) - time model: last={timings_model[-1]:.3f}(mean={np.mean(timings_model):.3f})') + pbar.set_description( + f'Validating {dataset.split} - time data: last={timings_dataloader[-1]:.3f}(mean={np.mean(timings_dataloader):.3f}) - time model: last={timings_model[-1]:.3f}(mean={np.mean(timings_model):.3f})' + ) for _met in metric_dict.keys(): @@ -388,11 +408,13 @@ def evaluate(self, dataset, metric_dict): def save_checkpoint(self, path): """See https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html""" - torch.save({'epoch': self._epochs_trained, - 'model_state_dict': self.model.state_dict(), - 'opt_state_dict': self.opt.state_dict(), - }, - path + self.name + '.mt') + torch.save({ + 'epoch': self._epochs_trained, + 'model_state_dict': self.model.state_dict(), + 'opt_state_dict': self.opt.state_dict(), + }, + path + self.name + '.mt' + ) return @@ -415,17 +437,19 @@ def _compute_opt_value(self): warnings.warn("Using bias and weight decay. Note that the implementation her will also penalize the bias.") if self.config['loss_func'] == 'squared': - opt_val = ridge_opt_value(X=self.train_set.dataset.tensors[0].detach().numpy(), - y=self.train_set.dataset.tensors[1].detach().numpy(), - lmbda = self.config['opt'].get('weight_decay', 0), - fit_intercept = fit_intercept - ) + opt_val = ridge_opt_value( + X=self.train_set.dataset.tensors[0].detach().numpy(), + y=self.train_set.dataset.tensors[1].detach().numpy(), + lmbda = self.config['opt'].get('weight_decay', 0), + fit_intercept = fit_intercept + ) elif self.config['loss_func'] == 'logistic': - opt_val = logreg_opt_value(X=self.train_set.dataset.tensors[0].detach().numpy(), - y=self.train_set.dataset.tensors[1].detach().numpy().astype(int).reshape(-1), - lmbda = self.config['opt'].get('weight_decay', 0), - fit_intercept = fit_intercept - ) + opt_val = logreg_opt_value( + X=self.train_set.dataset.tensors[0].detach().numpy(), + y=self.train_set.dataset.tensors[1].detach().numpy().astype(int).reshape(-1), + lmbda = self.config['opt'].get('weight_decay', 0), + fit_intercept = fit_intercept + ) else: opt_val = None else: diff --git a/stepback/models/llama.py b/stepback/models/llama.py index 27fbff6..ada5d39 100644 --- a/stepback/models/llama.py +++ b/stepback/models/llama.py @@ -1,4 +1,4 @@ -""" Adapted from Niccolo Ajroldi: /github.com/Niccolo-Ajroldi/plainLM +""" Adapted from Niccolo Ajroldi: github.com/Niccolo-Ajroldi/plainLM Changes: diff --git a/stepback/optim/main.py b/stepback/optim/main.py index a2727e7..e039642 100644 --- a/stepback/optim/main.py +++ b/stepback/optim/main.py @@ -164,9 +164,11 @@ def get_optimizer(opt_config: dict) -> Tuple[torch.optim.Optimizer, dict]: return opt_obj, hyperp -def get_scheduler(config: dict, opt: torch.optim.Optimizer) -> torch.optim.lr_scheduler._LRScheduler: +def get_scheduler(config: dict, num_iter: int, opt: torch.optim.Optimizer) -> torch.optim.lr_scheduler._LRScheduler: """ Main function mapping to a learning rate scheduler. + + num_iter is either number of epochs or steps. """ # if not specified, use constant step sizes name = config.get('lr_schedule', 'constant') @@ -187,7 +189,26 @@ def get_scheduler(config: dict, opt: torch.optim.Optimizer) -> torch.optim.lr_sc #lr_fun = lambda t: warmup_lr + (1-warmup_lr)*t/warmup_steps if t < warmup_steps else (t-warmup_steps+1)**(-1/2) lr_fun = lambda t: (t+1)**(-1/2) scheduler = LambdaLR(opt, lr_lambda=lr_fun) + + elif name[:3] == 'wsd': + # default cooldown is 20%, otherwise specify e.g wsd_0.1 for 10% + if name == 'wsd': + cd = 0.2 + else: + cd = float(name.split('_')[1]) + cd_start = int((1 - cd) * num_iter) + + # this map is called with t = iter - warmup_steps + # but we want to fix the cooldown start independent of warmup + # so it reads a bit hacky + lr_fun = lambda t: ( + 1 - (t+warmup_steps-cd_start) / (num_iter-cd_start) + if t + warmup_steps >= cd_start + else 1.0 + ) + scheduler = LambdaLR(opt, lr_lambda=lr_fun) + elif 'exponential' in name: # use sth like 'exponential_60_0.5': decay by factor 0.5 every 60 epochs/steps step_size = int(name.split('_')[1]) From 91b9ad71591ef5ec35306a8b950078fc277dc36a Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Mon, 1 Dec 2025 18:14:04 +0100 Subject: [PATCH 3/6] configs for stability tests --- configs/lr-stability/cifar10_resnet20-2.json | 12 ++++++++++++ configs/lr-stability/cifar10_resnet20-3.json | 12 ++++++++++++ configs/lr-stability/cifar10_resnet20-4.json | 13 +++++++++++++ configs/lr-stability/cifar10_resnet20.json | 12 ++++++++++++ configs/lr-stability/dna_logreg.json | 14 ++++++++++++++ configs/lr-stability/linreg_v1.json | 15 +++++++++++++++ configs/lr-stability/linreg_v2.json | 15 +++++++++++++++ configs/lr-stability/linreg_v3.json | 15 +++++++++++++++ configs/lr-stability/linreg_v4.json | 15 +++++++++++++++ configs/lr-stability/shakespeare-2.json | 12 ++++++++++++ configs/lr-stability/shakespeare-3.json | 12 ++++++++++++ configs/lr-stability/shakespeare.json | 12 ++++++++++++ 12 files changed, 159 insertions(+) create mode 100644 configs/lr-stability/cifar10_resnet20-2.json create mode 100644 configs/lr-stability/cifar10_resnet20-3.json create mode 100644 configs/lr-stability/cifar10_resnet20-4.json create mode 100644 configs/lr-stability/cifar10_resnet20.json create mode 100644 configs/lr-stability/dna_logreg.json create mode 100644 configs/lr-stability/linreg_v1.json create mode 100644 configs/lr-stability/linreg_v2.json create mode 100644 configs/lr-stability/linreg_v3.json create mode 100644 configs/lr-stability/linreg_v4.json create mode 100644 configs/lr-stability/shakespeare-2.json create mode 100644 configs/lr-stability/shakespeare-3.json create mode 100644 configs/lr-stability/shakespeare.json diff --git a/configs/lr-stability/cifar10_resnet20-2.json b/configs/lr-stability/cifar10_resnet20-2.json new file mode 100644 index 0000000..4e45796 --- /dev/null +++ b/configs/lr-stability/cifar10_resnet20-2.json @@ -0,0 +1,12 @@ +{ + "dataset": "cifar10", + "model": "resnet20", + "model_kwargs": {"batch_norm": true}, + "loss_func": "cross_entropy", + "score_func": "cross_entropy_accuracy", + "opt": [{"name": "prox-sps", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant"} + ], + "batch_size": 128, + "max_epoch": 20, + "n_runs": 3 + } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20-3.json b/configs/lr-stability/cifar10_resnet20-3.json new file mode 100644 index 0000000..d285025 --- /dev/null +++ b/configs/lr-stability/cifar10_resnet20-3.json @@ -0,0 +1,12 @@ +{ + "dataset": "cifar10", + "model": "resnet20", + "model_kwargs": {"batch_norm": true}, + "loss_func": "cross_entropy", + "score_func": "cross_entropy_accuracy", + "opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant", "warmup_steps": 100, "stepwise_schedule": true} + ], + "batch_size": 128, + "max_epoch": 20, + "n_runs": 3 + } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20-4.json b/configs/lr-stability/cifar10_resnet20-4.json new file mode 100644 index 0000000..d119b3d --- /dev/null +++ b/configs/lr-stability/cifar10_resnet20-4.json @@ -0,0 +1,13 @@ +{ + "dataset": "cifar10", + "model": "resnet20", + "model_kwargs": {"batch_norm": true}, + "loss_func": "cross_entropy", + "score_func": "cross_entropy_accuracy", + "opt": [{"name": "nle", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, + {"name": "ngn", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"} + ], + "batch_size": 128, + "max_epoch": 20, + "n_runs": 3 + } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20.json b/configs/lr-stability/cifar10_resnet20.json new file mode 100644 index 0000000..669f19f --- /dev/null +++ b/configs/lr-stability/cifar10_resnet20.json @@ -0,0 +1,12 @@ +{ + "dataset": "cifar10", + "model": "resnet20", + "model_kwargs": {"batch_norm": true}, + "loss_func": "cross_entropy", + "score_func": "cross_entropy_accuracy", + "opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant"} + ], + "batch_size": 128, + "max_epoch": 20, + "n_runs": 3 + } \ No newline at end of file diff --git a/configs/lr-stability/dna_logreg.json b/configs/lr-stability/dna_logreg.json new file mode 100644 index 0000000..71e8b53 --- /dev/null +++ b/configs/lr-stability/dna_logreg.json @@ -0,0 +1,14 @@ +{ +"dataset": "dna", +"model": "linear", +"model_kwargs": {"output_size": 3}, +"loss_func": "cross_entropy", +"score_func": "cross_entropy_accuracy", +"opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, + {"name": "prox-sps", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, + {"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant", "warmup_steps": 100, "stepwise_schedule": true} + ], +"batch_size": 16, +"max_epoch": 10, +"n_runs": 3 +} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v1.json b/configs/lr-stability/linreg_v1.json new file mode 100644 index 0000000..b0da316 --- /dev/null +++ b/configs/lr-stability/linreg_v1.json @@ -0,0 +1,15 @@ +{ +"dataset": "synthetic_linear", +"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 0}, +"model": "linear", +"model_kwargs": {"bias": false}, +"loss_func": "squared", +"score_func": "squared", +"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} + ], +"batch_size": 5, +"max_epoch": 10, +"n_runs": 5 +} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v2.json b/configs/lr-stability/linreg_v2.json new file mode 100644 index 0000000..1ae5cee --- /dev/null +++ b/configs/lr-stability/linreg_v2.json @@ -0,0 +1,15 @@ +{ +"dataset": "synthetic_linear", +"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 1.0}, +"model": "linear", +"model_kwargs": {"bias": false}, +"loss_func": "squared", +"score_func": "squared", +"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} + ], +"batch_size": 5, +"max_epoch": 10, +"n_runs": 5 +} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v3.json b/configs/lr-stability/linreg_v3.json new file mode 100644 index 0000000..40b1708 --- /dev/null +++ b/configs/lr-stability/linreg_v3.json @@ -0,0 +1,15 @@ +{ +"dataset": "synthetic_linear", +"dataset_kwargs": {"p": 10, "n_samples": 50}, +"model": "linear", +"model_kwargs": {"bias": false}, +"loss_func": "squared", +"score_func": "squared", +"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant", "lb": -2}, + {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} + ], +"batch_size": 5, +"max_epoch": 10, +"n_runs": 5 +} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v4.json b/configs/lr-stability/linreg_v4.json new file mode 100644 index 0000000..821bae6 --- /dev/null +++ b/configs/lr-stability/linreg_v4.json @@ -0,0 +1,15 @@ +{ +"dataset": "synthetic_linear", +"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 1.0}, +"model": "linear", +"model_kwargs": {"bias": false}, +"loss_func": "squared", +"score_func": "squared", +"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, + {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} + ], +"batch_size": 25, +"max_epoch": 50, +"n_runs": 5 +} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare-2.json b/configs/lr-stability/shakespeare-2.json new file mode 100644 index 0000000..b73037e --- /dev/null +++ b/configs/lr-stability/shakespeare-2.json @@ -0,0 +1,12 @@ +{ + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "max_epoch": 15, + "model": "llama", + "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, + "opt": [{"name": "prox-sps", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], + "loss_func": "sequence_cross_entropy", + "score_func": "sequence_cross_entropy_accuracy", + "n_runs": 3 +} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare-3.json b/configs/lr-stability/shakespeare-3.json new file mode 100644 index 0000000..ab85f9e --- /dev/null +++ b/configs/lr-stability/shakespeare-3.json @@ -0,0 +1,12 @@ +{ + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "max_epoch": 15, + "model": "llama", + "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, + "opt": [{"name": "ngn", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], + "loss_func": "sequence_cross_entropy", + "score_func": "sequence_cross_entropy_accuracy", + "n_runs": 3 +} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare.json b/configs/lr-stability/shakespeare.json new file mode 100644 index 0000000..6677b80 --- /dev/null +++ b/configs/lr-stability/shakespeare.json @@ -0,0 +1,12 @@ +{ + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "max_epoch": 15, + "model": "llama", + "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, + "opt": [{"name": "sgd", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], + "loss_func": "sequence_cross_entropy", + "score_func": "sequence_cross_entropy_accuracy", + "n_runs": 3 +} \ No newline at end of file From 68aa1d743fc467286e35a5bc8d09348a97eb71b7 Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Mon, 1 Dec 2025 19:42:46 +0100 Subject: [PATCH 4/6] shakespeare sgd --- output/lr-stability/shakespeare.json | 33134 +++++++++++++++++++++++++ 1 file changed, 33134 insertions(+) create mode 100644 output/lr-stability/shakespeare.json diff --git a/output/lr-stability/shakespeare.json b/output/lr-stability/shakespeare.json new file mode 100644 index 0000000..b1ff3de --- /dev/null +++ b/output/lr-stability/shakespeare.json @@ -0,0 +1,33134 @@ +[ + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 11.078807830810547, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.58893585205078, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 8.304182052612305, + "3": 5.066382884979248, + "4": 3.9865400791168213, + "5": 7.368899822235107, + "6": 22.684249877929688, + "7": 7.364748954772949, + "8": 5.8788042068481445, + "9": 4.168630599975586, + "10": 3.386579751968384, + "11": 4.124453067779541, + "12": 5.471625328063965, + "13": 7.303220748901367, + "14": 3.478468894958496, + "15": 4.954684257507324, + "16": 5.368632793426514, + "17": 3.2023041248321533, + "18": 8.319535255432129, + "19": 4.851791858673096, + "20": 24.409008026123047, + "21": 3.8841021060943604, + "22": 127.42274475097656, + "23": 5.798412799835205, + "24": 5.420220375061035, + "25": 3.785569429397583, + "26": 1.419108510017395, + "27": 1.837812066078186, + "28": 2.7772908210754395, + "29": 6.889194011688232, + "30": 7.025652885437012, + "31": 4.444455623626709, + "32": 4.854276180267334, + "33": 17.949159622192383, + "34": 4.730770587921143, + "35": 4.025436878204346, + "36": 1.6051708459854126, + "37": 1.831591248512268, + "38": 9.636731147766113, + "39": 4.211915016174316, + "40": 3.8471062183380127, + "41": 3.4780006408691406, + "42": 8.998422622680664, + "43": 3.369115114212036, + "44": 3.2289352416992188, + "45": 2.7891392707824707, + "46": 10.820326805114746, + "47": 4.309532642364502, + "48": 3.608126401901245, + "49": 3.1533291339874268, + "50": 1.862067699432373, + "51": 1.9424623250961304, + "52": 2.750100612640381, + "53": 11.078807830810547 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.94942569732666, + "3": 3.73760724067688, + "4": 3.612809658050537, + "5": 3.569070816040039, + "6": 4.066908836364746, + "7": 4.256112098693848, + "8": 3.772096633911133, + "9": 3.5143580436706543, + "10": 3.385322093963623, + "11": 3.4045357704162598, + "12": 3.4153342247009277, + "13": 3.389209032058716, + "14": 3.3212578296661377, + "15": 3.2782492637634277, + "16": 3.1905996799468994, + "17": 3.1310646533966064, + "18": 3.072298765182495, + "19": 3.246102809906006, + "20": 3.6585073471069336, + "21": 3.3017466068267822, + "22": 3.986306667327881, + "23": 3.561216354370117, + "24": 3.6109938621520996, + "25": 3.822118043899536, + "26": 3.377912759780884, + "27": 3.400693655014038, + "28": 3.3791050910949707, + "29": 3.5108470916748047, + "30": 6.646190166473389, + "31": 4.068477630615234, + "32": 3.6853525638580322, + "33": 5.691895008087158, + "34": 5.398983478546143, + "35": 4.3408613204956055, + "36": 3.483992099761963, + "37": 3.3839926719665527, + "38": 3.7317845821380615, + "39": 5.436063289642334, + "40": 4.654531955718994, + "41": 3.7016143798828125, + "42": 3.865525245666504, + "43": 5.218574523925781, + "44": 4.379077911376953, + "45": 3.5449743270874023, + "46": 3.9674806594848633, + "47": 5.936733722686768, + "48": 5.191201686859131, + "49": 4.071769714355469, + "50": 3.3786449432373047, + "51": 3.4644248485565186, + "52": 3.453007698059082, + "53": 4.158570289611816 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "train_epoch_time": 6.811787366867065, + "train_loss": 6.15240808511567, + "train_score": 0.059190055584719396, + "val_loss": 6.150430158139917, + "val_score": 0.05866550658449924 + }, + { + "epoch": 1, + "grad_norm": 2.8834519386291504, + "learning_rate": 0.1, + "model_norm": 87.52149200439453, + "step_logs": { + "grad_norm": { + "54": 3.6732442378997803, + "55": 4.203396797180176, + "56": 3.681767463684082, + "57": 2.510910749435425, + "58": 9.891633033752441, + "59": 3.7573633193969727, + "60": 3.672461986541748, + "61": 4.566214561462402, + "62": 2.6282222270965576, + "63": 3.428539514541626, + "64": 9.9284029006958, + "65": 3.2549185752868652, + "66": 3.269218683242798, + "67": 3.8156352043151855, + "68": 2.1252076625823975, + "69": 2.5927603244781494, + "70": 10.775301933288574, + "71": 3.968351125717163, + "72": 3.8314497470855713, + "73": 3.5274105072021484, + "74": 2.681727647781372, + "75": 9.447980880737305, + "76": 3.578968048095703, + "77": 3.49782133102417, + "78": 4.2003173828125, + "79": 3.821840763092041, + "80": 3.405994415283203, + "81": 2.4955978393554688, + "82": 1.8375983238220215, + "83": 3.0771450996398926, + "84": 3.340847969055176, + "85": 2.8377625942230225, + "86": 3.0561769008636475, + "87": 6.293949127197266, + "88": 3.9713973999023438, + "89": 3.349625825881958, + "90": 2.6180405616760254, + "91": 3.398827314376831, + "92": 5.225561618804932, + "93": 2.9980392456054688, + "94": 2.9232120513916016, + "95": 8.645142555236816, + "96": 3.205531358718872, + "97": 3.0876965522766113, + "98": 3.653186559677124, + "99": 7.007787227630615, + "100": 3.295431137084961, + "101": 4.338217735290527, + "102": 2.1290626525878906, + "103": 6.241903781890869, + "104": 3.0309388637542725, + "105": 3.022103786468506, + "106": 2.0792396068573, + "107": 2.8834519386291504 + }, + "loss": { + "54": 6.156980991363525, + "55": 5.252943515777588, + "56": 4.546120643615723, + "57": 3.4713292121887207, + "58": 3.952418327331543, + "59": 5.8539018630981445, + "60": 4.961085319519043, + "61": 4.065688610076904, + "62": 3.739987850189209, + "63": 3.7214434146881104, + "64": 4.012625694274902, + "65": 5.713657855987549, + "66": 4.8445234298706055, + "67": 4.021078109741211, + "68": 3.5700507164001465, + "69": 3.5213842391967773, + "70": 4.034528732299805, + "71": 5.999028205871582, + "72": 5.269189357757568, + "73": 4.237903594970703, + "74": 3.5325655937194824, + "75": 3.8926713466644287, + "76": 5.630416393280029, + "77": 4.770911693572998, + "78": 3.93550443649292, + "79": 3.725010871887207, + "80": 4.039775848388672, + "81": 3.4270312786102295, + "82": 3.5064220428466797, + "83": 3.4148120880126953, + "84": 3.8978304862976074, + "85": 3.4866814613342285, + "86": 3.732253074645996, + "87": 3.6477553844451904, + "88": 4.735589981079102, + "89": 4.110518455505371, + "90": 3.38773250579834, + "91": 3.782430648803711, + "92": 3.6394848823547363, + "93": 4.382370471954346, + "94": 3.5844132900238037, + "95": 3.831367015838623, + "96": 5.310178756713867, + "97": 4.451910972595215, + "98": 3.6832613945007324, + "99": 3.8495001792907715, + "100": 4.868527412414551, + "101": 4.114696502685547, + "102": 3.6535263061523438, + "103": 3.659620761871338, + "104": 4.640170097351074, + "105": 3.862802028656006, + "106": 3.389212131500244, + "107": 3.5657927989959717 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "train_epoch_time": 4.7939066886901855, + "train_loss": 3.8870661060300415, + "train_score": 0.15261163918068968, + "val_loss": 3.90794214589177, + "val_score": 0.15114182700861745 + }, + { + "epoch": 2, + "grad_norm": 0.6136992573738098, + "learning_rate": 0.1, + "model_norm": 87.49285888671875, + "step_logs": { + "grad_norm": { + "108": 9.166020393371582, + "109": 3.260446548461914, + "110": 3.1746766567230225, + "111": 3.308523654937744, + "112": 3.1027259826660156, + "113": 2.790318250656128, + "114": 2.773282527923584, + "115": 2.8242406845092773, + "116": 3.1578242778778076, + "117": 2.969750165939331, + "118": 1.5987517833709717, + "119": 2.622586488723755, + "120": 8.518936157226562, + "121": 3.055206537246704, + "122": 2.9690043926239014, + "123": 2.8096797466278076, + "124": 4.662749767303467, + "125": 2.8870620727539062, + "126": 2.519000768661499, + "127": 6.841214656829834, + "128": 2.865640163421631, + "129": 2.7572903633117676, + "130": 1.421618103981018, + "131": 4.630087375640869, + "132": 2.8943216800689697, + "133": 2.507316827774048, + "134": 5.654372215270996, + "135": 2.8063504695892334, + "136": 2.557739496231079, + "137": 2.2054760456085205, + "138": 2.807223320007324, + "139": 4.430174350738525, + "140": 2.672210693359375, + "141": 1.9283416271209717, + "142": 5.2248077392578125, + "143": 2.7290027141571045, + "144": 2.4538376331329346, + "145": 2.210589647293091, + "146": 2.3795573711395264, + "147": 3.1825039386749268, + "148": 2.4803688526153564, + "149": 0.5940036177635193, + "150": 0.5978416204452515, + "151": 0.9853432178497314, + "152": 2.614866018295288, + "153": 2.426819324493408, + "154": 1.579184651374817, + "155": 2.228175640106201, + "156": 4.564561367034912, + "157": 2.549464702606201, + "158": 1.9981169700622559, + "159": 3.2127575874328613, + "160": 2.4377102851867676, + "161": 0.6136992573738098 + }, + "loss": { + "108": 3.8695123195648193, + "109": 5.381319046020508, + "110": 4.597160339355469, + "111": 3.8058512210845947, + "112": 3.524437427520752, + "113": 3.7831192016601562, + "114": 3.4119577407836914, + "115": 3.7660770416259766, + "116": 3.387238025665283, + "117": 3.847681999206543, + "118": 3.3913140296936035, + "119": 3.441558361053467, + "120": 3.854508399963379, + "121": 5.141708850860596, + "122": 4.35847282409668, + "123": 3.669011354446411, + "124": 3.5422751903533936, + "125": 4.187644958496094, + "126": 3.495161533355713, + "127": 3.7085232734680176, + "128": 4.668492317199707, + "129": 3.9175596237182617, + "130": 3.3215131759643555, + "131": 3.5024757385253906, + "132": 4.1439528465271, + "133": 3.501526355743408, + "134": 3.558802604675293, + "135": 4.358184814453125, + "136": 3.6727261543273926, + "137": 3.349576234817505, + "138": 3.5831480026245117, + "139": 3.542271137237549, + "140": 4.042727470397949, + "141": 3.3847625255584717, + "142": 3.5358009338378906, + "143": 4.247830867767334, + "144": 3.6025948524475098, + "145": 3.360447406768799, + "146": 3.5581021308898926, + "147": 3.4227206707000732, + "148": 3.762047290802002, + "149": 3.307605266571045, + "150": 3.3204827308654785, + "151": 3.310724973678589, + "152": 3.3572402000427246, + "153": 3.640658140182495, + "154": 3.307203769683838, + "155": 3.4553513526916504, + "156": 3.5002264976501465, + "157": 3.9907894134521484, + "158": 3.4358487129211426, + "159": 3.39845871925354, + "160": 3.6986827850341797, + "161": 3.2815957069396973 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "train_epoch_time": 4.793991565704346, + "train_loss": 3.2852243656749534, + "train_score": 0.152724847531045, + "val_loss": 3.301639177495385, + "val_score": 0.1513795209909553 + }, + { + "epoch": 3, + "grad_norm": 1.58250892162323, + "learning_rate": 0.1, + "model_norm": 87.50431060791016, + "step_logs": { + "grad_norm": { + "162": 0.6990936398506165, + "163": 1.0855343341827393, + "164": 2.573493003845215, + "165": 2.3338770866394043, + "166": 1.2506153583526611, + "167": 1.7552529573440552, + "168": 3.7846457958221436, + "169": 2.4146616458892822, + "170": 1.3735642433166504, + "171": 2.3647353649139404, + "172": 2.1765995025634766, + "173": 1.4735000133514404, + "174": 1.7969722747802734, + "175": 2.8110909461975098, + "176": 2.3343889713287354, + "177": 0.9906945824623108, + "178": 1.060250163078308, + "179": 1.9494225978851318, + "180": 2.1394035816192627, + "181": 2.3163135051727295, + "182": 2.225015640258789, + "183": 2.0056939125061035, + "184": 2.1559886932373047, + "185": 2.453526496887207, + "186": 2.3028724193573, + "187": 1.7598356008529663, + "188": 1.9500482082366943, + "189": 2.6437535285949707, + "190": 2.325402021408081, + "191": 1.5147470235824585, + "192": 1.7627238035202026, + "193": 2.553441286087036, + "194": 2.381896495819092, + "195": 1.8004099130630493, + "196": 1.9215713739395142, + "197": 2.468478202819824, + "198": 2.2915127277374268, + "199": 1.7835581302642822, + "200": 1.7955331802368164, + "201": 2.077427864074707, + "202": 1.991347074508667, + "203": 1.7464768886566162, + "204": 1.7679321765899658, + "205": 1.8141162395477295, + "206": 1.7467771768569946, + "207": 1.6279795169830322, + "208": 1.6814764738082886, + "209": 1.8262842893600464, + "210": 1.8182737827301025, + "211": 1.76496422290802, + "212": 1.671907901763916, + "213": 1.5209786891937256, + "214": 1.5442737340927124, + "215": 1.58250892162323 + }, + "loss": { + "162": 3.2714474201202393, + "163": 3.3101048469543457, + "164": 3.3309435844421387, + "165": 3.574993133544922, + "166": 3.299924373626709, + "167": 3.3417928218841553, + "168": 3.441725730895996, + "169": 3.7685446739196777, + "170": 3.287553310394287, + "171": 3.321277618408203, + "172": 3.4721219539642334, + "173": 3.268068790435791, + "174": 3.3477368354797363, + "175": 3.3122897148132324, + "176": 3.5108697414398193, + "177": 3.21201753616333, + "178": 3.1934924125671387, + "179": 3.2229983806610107, + "180": 3.3485851287841797, + "181": 3.2375502586364746, + "182": 3.3506741523742676, + "183": 3.2008423805236816, + "184": 3.295527219772339, + "185": 3.2338011264801025, + "186": 3.3315224647521973, + "187": 3.176913022994995, + "188": 3.219912528991699, + "189": 3.215770721435547, + "190": 3.3704915046691895, + "191": 3.1469473838806152, + "192": 3.148859977722168, + "193": 3.1830079555511475, + "194": 3.315352201461792, + "195": 3.1420464515686035, + "196": 3.1758809089660645, + "197": 3.172865390777588, + "198": 3.253307819366455, + "199": 3.1109161376953125, + "200": 3.1349234580993652, + "201": 3.1295700073242188, + "202": 3.1752634048461914, + "203": 3.071497678756714, + "204": 3.1231439113616943, + "205": 3.070530414581299, + "206": 3.122929573059082, + "207": 3.043402671813965, + "208": 3.0755069255828857, + "209": 3.0376853942871094, + "210": 3.0979976654052734, + "211": 3.0375564098358154, + "212": 3.0894670486450195, + "213": 3.0360488891601562, + "214": 3.046515464782715, + "215": 3.0090155601501465 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "train_epoch_time": 4.793714284896851, + "train_loss": 3.051448314330157, + "train_score": 0.14354487985928396, + "val_loss": 3.0669197023393915, + "val_score": 0.14355805129599764 + }, + { + "epoch": 4, + "grad_norm": 1.7089091539382935, + "learning_rate": 0.1, + "model_norm": 87.51215362548828, + "step_logs": { + "grad_norm": { + "216": 1.597985863685608, + "217": 1.6878122091293335, + "218": 1.784617304801941, + "219": 1.7893139123916626, + "220": 1.831480622291565, + "221": 1.7717779874801636, + "222": 1.7698428630828857, + "223": 1.6941642761230469, + "224": 1.7306171655654907, + "225": 1.7256495952606201, + "226": 1.6578220129013062, + "227": 1.5820120573043823, + "228": 1.5035868883132935, + "229": 1.544783592224121, + "230": 1.699487566947937, + "231": 1.987848162651062, + "232": 1.772462010383606, + "233": 1.2227364778518677, + "234": 1.355668306350708, + "235": 1.5648186206817627, + "236": 1.8153234720230103, + "237": 1.884724736213684, + "238": 2.012338161468506, + "239": 1.8148020505905151, + "240": 1.4784231185913086, + "241": 1.3997631072998047, + "242": 1.4841691255569458, + "243": 1.6125293970108032, + "244": 1.6721493005752563, + "245": 1.786097526550293, + "246": 1.8887684345245361, + "247": 2.3786678314208984, + "248": 2.8075203895568848, + "249": 2.617809534072876, + "250": 2.1931307315826416, + "251": 1.7103222608566284, + "252": 1.528630256652832, + "253": 1.602358102798462, + "254": 1.8009368181228638, + "255": 2.232680082321167, + "256": 2.715921640396118, + "257": 1.8635884523391724, + "258": 1.7650214433670044, + "259": 1.5951802730560303, + "260": 1.4896814823150635, + "261": 1.5536892414093018, + "262": 1.6634559631347656, + "263": 1.9263232946395874, + "264": 2.0404441356658936, + "265": 1.8849246501922607, + "266": 1.6770967245101929, + "267": 1.484693169593811, + "268": 1.6399348974227905, + "269": 1.7089091539382935 + }, + "loss": { + "216": 3.068169116973877, + "217": 3.007662296295166, + "218": 3.077162742614746, + "219": 3.0051798820495605, + "220": 3.0847859382629395, + "221": 3.008432626724243, + "222": 3.0402050018310547, + "223": 3.016720771789551, + "224": 3.015456438064575, + "225": 3.0199806690216064, + "226": 3.031007766723633, + "227": 2.981315851211548, + "228": 3.021916389465332, + "229": 2.9599485397338867, + "230": 2.9997448921203613, + "231": 3.0120396614074707, + "232": 3.0592916011810303, + "233": 2.941696882247925, + "234": 2.9985203742980957, + "235": 2.9608969688415527, + "236": 2.986017942428589, + "237": 3.015174388885498, + "238": 3.020690679550171, + "239": 3.0142064094543457, + "240": 2.9589004516601562, + "241": 2.9445266723632812, + "242": 2.9277162551879883, + "243": 2.9550492763519287, + "244": 2.9244189262390137, + "245": 2.9472007751464844, + "246": 2.9702582359313965, + "247": 2.9962825775146484, + "248": 3.0938549041748047, + "249": 3.0575406551361084, + "250": 3.0636565685272217, + "251": 2.952287435531616, + "252": 2.9423060417175293, + "253": 2.9139065742492676, + "254": 2.9626102447509766, + "255": 2.958373546600342, + "256": 3.063141345977783, + "257": 3.007920265197754, + "258": 2.9447689056396484, + "259": 2.9120852947235107, + "260": 2.9216179847717285, + "261": 2.8991172313690186, + "262": 2.937103748321533, + "263": 2.910618543624878, + "264": 2.9704627990722656, + "265": 2.921873092651367, + "266": 2.9462780952453613, + "267": 2.8780341148376465, + "268": 2.889033317565918, + "269": 2.9055511951446533 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "train_epoch_time": 4.7957987785339355, + "train_loss": 2.913550460560934, + "train_score": 0.1759549856827283, + "val_loss": 2.9285061988983854, + "val_score": 0.17097804289050272 + }, + { + "epoch": 5, + "grad_norm": 1.910400152206421, + "learning_rate": 0.1, + "model_norm": 87.51984405517578, + "step_logs": { + "grad_norm": { + "270": 1.7309120893478394, + "271": 1.6838411092758179, + "272": 1.6812191009521484, + "273": 1.6011210680007935, + "274": 1.5274264812469482, + "275": 1.3145862817764282, + "276": 1.1602916717529297, + "277": 1.2141036987304688, + "278": 1.3638465404510498, + "279": 1.85237717628479, + "280": 1.7475277185440063, + "281": 1.7263325452804565, + "282": 2.2008743286132812, + "283": 2.158879041671753, + "284": 2.393897294998169, + "285": 1.9338291883468628, + "286": 1.4306427240371704, + "287": 1.8433418273925781, + "288": 1.6604335308074951, + "289": 1.3293074369430542, + "290": 1.6525572538375854, + "291": 2.1497445106506348, + "292": 2.0472335815429688, + "293": 1.7122303247451782, + "294": 1.858788251876831, + "295": 1.9067161083221436, + "296": 1.5989854335784912, + "297": 1.2284775972366333, + "298": 1.1961337327957153, + "299": 1.3169851303100586, + "300": 1.5709247589111328, + "301": 1.648550033569336, + "302": 1.4092910289764404, + "303": 1.1947563886642456, + "304": 1.3627091646194458, + "305": 1.3384456634521484, + "306": 1.2349458932876587, + "307": 1.1548428535461426, + "308": 1.1814287900924683, + "309": 1.3464531898498535, + "310": 1.5717324018478394, + "311": 2.253826141357422, + "312": 1.9190090894699097, + "313": 1.294960856437683, + "314": 1.46340012550354, + "315": 1.587857723236084, + "316": 1.7106577157974243, + "317": 1.8518284559249878, + "318": 1.3669970035552979, + "319": 1.2048537731170654, + "320": 1.3535815477371216, + "321": 1.7471632957458496, + "322": 1.7827051877975464, + "323": 1.910400152206421 + }, + "loss": { + "270": 2.9040279388427734, + "271": 2.9078006744384766, + "272": 2.921225070953369, + "273": 2.892879009246826, + "274": 2.8824362754821777, + "275": 2.8701939582824707, + "276": 2.864061117172241, + "277": 2.851010322570801, + "278": 2.872096061706543, + "279": 2.883958578109741, + "280": 2.942429780960083, + "281": 2.8529250621795654, + "282": 2.9261763095855713, + "283": 2.9349865913391113, + "284": 2.9358103275299072, + "285": 2.922347068786621, + "286": 2.873699188232422, + "287": 2.8408632278442383, + "288": 2.9175288677215576, + "289": 2.838775157928467, + "290": 2.855052947998047, + "291": 2.8938469886779785, + "292": 2.918454885482788, + "293": 2.8680355548858643, + "294": 2.851624011993408, + "295": 2.8883683681488037, + "296": 2.869182586669922, + "297": 2.8103950023651123, + "298": 2.8099215030670166, + "299": 2.79030704498291, + "300": 2.807589530944824, + "301": 2.8348944187164307, + "302": 2.8415722846984863, + "303": 2.7835493087768555, + "304": 2.7984981536865234, + "305": 2.794250249862671, + "306": 2.770634651184082, + "307": 2.7848353385925293, + "308": 2.7740893363952637, + "309": 2.775357961654663, + "310": 2.7942543029785156, + "311": 2.8301584720611572, + "312": 2.8827664852142334, + "313": 2.7745378017425537, + "314": 2.7593209743499756, + "315": 2.7866246700286865, + "316": 2.810638904571533, + "317": 2.8005151748657227, + "318": 2.798887252807617, + "319": 2.7478482723236084, + "320": 2.7415900230407715, + "321": 2.7598347663879395, + "322": 2.8220653533935547, + "323": 2.775735378265381 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "train_epoch_time": 4.794295072555542, + "train_loss": 2.8404472835436785, + "train_score": 0.2019850699383354, + "val_loss": 2.860597257909764, + "val_score": 0.19601661170459642 + }, + { + "epoch": 6, + "grad_norm": 1.8443446159362793, + "learning_rate": 0.1, + "model_norm": 87.52779388427734, + "step_logs": { + "grad_norm": { + "324": 2.155632972717285, + "325": 1.84988272190094, + "326": 1.6228352785110474, + "327": 1.4795079231262207, + "328": 1.364604115486145, + "329": 1.2524449825286865, + "330": 1.2218588590621948, + "331": 1.4108054637908936, + "332": 1.694453477859497, + "333": 1.5836282968521118, + "334": 1.5810085535049438, + "335": 1.9620267152786255, + "336": 1.7498195171356201, + "337": 1.4597772359848022, + "338": 1.6286495923995972, + "339": 1.6808140277862549, + "340": 1.5486146211624146, + "341": 1.660322666168213, + "342": 1.8241106271743774, + "343": 1.892255425453186, + "344": 1.6763458251953125, + "345": 1.6753884553909302, + "346": 1.7542225122451782, + "347": 1.6533803939819336, + "348": 1.511404275894165, + "349": 1.8656450510025024, + "350": 1.738365650177002, + "351": 1.6207376718521118, + "352": 1.671261191368103, + "353": 1.4202420711517334, + "354": 1.1749540567398071, + "355": 1.0920641422271729, + "356": 1.1776328086853027, + "357": 1.4487411975860596, + "358": 1.8593851327896118, + "359": 1.898881435394287, + "360": 1.814507246017456, + "361": 1.7532634735107422, + "362": 1.700311303138733, + "363": 1.670277714729309, + "364": 1.50487220287323, + "365": 1.3220475912094116, + "366": 1.311021327972412, + "367": 1.4480234384536743, + "368": 1.4873378276824951, + "369": 1.552438497543335, + "370": 1.5964924097061157, + "371": 1.6949729919433594, + "372": 1.8176980018615723, + "373": 1.950077772140503, + "374": 1.7779804468154907, + "375": 1.8435618877410889, + "376": 1.9465885162353516, + "377": 1.8443446159362793 + }, + "loss": { + "324": 2.820026397705078, + "325": 2.8304035663604736, + "326": 2.7701854705810547, + "327": 2.762284517288208, + "328": 2.744509696960449, + "329": 2.715801477432251, + "330": 2.7025036811828613, + "331": 2.7244620323181152, + "332": 2.7508177757263184, + "333": 2.7496819496154785, + "334": 2.728255033493042, + "335": 2.773054599761963, + "336": 2.787616729736328, + "337": 2.731870651245117, + "338": 2.7328028678894043, + "339": 2.7629458904266357, + "340": 2.7285022735595703, + "341": 2.7550153732299805, + "342": 2.762906074523926, + "343": 2.7425501346588135, + "344": 2.72770619392395, + "345": 2.7335047721862793, + "346": 2.738142967224121, + "347": 2.7749195098876953, + "348": 2.7302937507629395, + "349": 2.7354214191436768, + "350": 2.7608859539031982, + "351": 2.732132911682129, + "352": 2.7298476696014404, + "353": 2.714466094970703, + "354": 2.7010788917541504, + "355": 2.6623849868774414, + "356": 2.6772267818450928, + "357": 2.6973962783813477, + "358": 2.7395243644714355, + "359": 2.735440731048584, + "360": 2.692413806915283, + "361": 2.7381410598754883, + "362": 2.720083475112915, + "363": 2.7207460403442383, + "364": 2.6731836795806885, + "365": 2.693645477294922, + "366": 2.6823296546936035, + "367": 2.685734272003174, + "368": 2.684813976287842, + "369": 2.687150478363037, + "370": 2.677173614501953, + "371": 2.708266258239746, + "372": 2.701272964477539, + "373": 2.727740526199341, + "374": 2.717501163482666, + "375": 2.7007410526275635, + "376": 2.7074296474456787, + "377": 2.6967391967773438 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "train_epoch_time": 4.794239282608032, + "train_loss": 2.6888308897251036, + "train_score": 0.2316725699212322, + "val_loss": 2.707535773002458, + "val_score": 0.2303790546928016 + }, + { + "epoch": 7, + "grad_norm": 1.704913854598999, + "learning_rate": 0.1, + "model_norm": 87.53600311279297, + "step_logs": { + "grad_norm": { + "378": 2.0708065032958984, + "379": 1.681443214416504, + "380": 1.2056361436843872, + "381": 1.2746915817260742, + "382": 1.4549461603164673, + "383": 1.5162930488586426, + "384": 1.4708962440490723, + "385": 1.5431746244430542, + "386": 1.6435669660568237, + "387": 1.8571664094924927, + "388": 1.9098087549209595, + "389": 1.9669134616851807, + "390": 2.3033864498138428, + "391": 1.8136849403381348, + "392": 1.8377368450164795, + "393": 1.8172292709350586, + "394": 1.4593688249588013, + "395": 1.4622390270233154, + "396": 1.877022385597229, + "397": 2.0090367794036865, + "398": 1.8020083904266357, + "399": 2.0596206188201904, + "400": 1.696608066558838, + "401": 2.0540056228637695, + "402": 2.267760753631592, + "403": 2.2145755290985107, + "404": 1.3494840860366821, + "405": 1.5002892017364502, + "406": 1.7640374898910522, + "407": 1.5376811027526855, + "408": 1.225986361503601, + "409": 1.2189589738845825, + "410": 1.6300349235534668, + "411": 1.8727000951766968, + "412": 1.7715438604354858, + "413": 1.6847164630889893, + "414": 1.5470216274261475, + "415": 1.5455007553100586, + "416": 1.519396424293518, + "417": 1.603033185005188, + "418": 1.5024815797805786, + "419": 1.5774893760681152, + "420": 1.7047498226165771, + "421": 1.8606033325195312, + "422": 1.6975452899932861, + "423": 1.8237303495407104, + "424": 1.813138484954834, + "425": 1.9298568964004517, + "426": 1.4758800268173218, + "427": 1.3395049571990967, + "428": 1.4335174560546875, + "429": 1.5994044542312622, + "430": 1.578407883644104, + "431": 1.704913854598999 + }, + "loss": { + "378": 2.6904144287109375, + "379": 2.7103376388549805, + "380": 2.6507139205932617, + "381": 2.6244349479675293, + "382": 2.6512017250061035, + "383": 2.646717071533203, + "384": 2.6393022537231445, + "385": 2.63724422454834, + "386": 2.6495134830474854, + "387": 2.653243064880371, + "388": 2.6915817260742188, + "389": 2.6652557849884033, + "390": 2.6909615993499756, + "391": 2.6910130977630615, + "392": 2.64697265625, + "393": 2.6619622707366943, + "394": 2.6343002319335938, + "395": 2.6012232303619385, + "396": 2.634345531463623, + "397": 2.6683480739593506, + "398": 2.6859707832336426, + "399": 2.6322669982910156, + "400": 2.671630859375, + "401": 2.652303695678711, + "402": 2.7039732933044434, + "403": 2.674090623855591, + "404": 2.6350889205932617, + "405": 2.6096720695495605, + "406": 2.6331543922424316, + "407": 2.6284260749816895, + "408": 2.581529378890991, + "409": 2.6036624908447266, + "410": 2.621208667755127, + "411": 2.6714320182800293, + "412": 2.6453893184661865, + "413": 2.6183385848999023, + "414": 2.630486011505127, + "415": 2.602463960647583, + "416": 2.6082732677459717, + "417": 2.59086275100708, + "418": 2.609283924102783, + "419": 2.603783130645752, + "420": 2.6101644039154053, + "421": 2.6071391105651855, + "422": 2.616755962371826, + "423": 2.637195110321045, + "424": 2.629767656326294, + "425": 2.612427234649658, + "426": 2.6312780380249023, + "427": 2.574591636657715, + "428": 2.5845155715942383, + "429": 2.5824062824249268, + "430": 2.600257158279419, + "431": 2.5817208290100098 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "train_epoch_time": 4.7935755252838135, + "train_loss": 2.5908328111065684, + "train_score": 0.2439125268667979, + "val_loss": 2.610408023085304, + "val_score": 0.24100800070710626 + }, + { + "epoch": 8, + "grad_norm": 1.5788434743881226, + "learning_rate": 0.1, + "model_norm": 87.54491424560547, + "step_logs": { + "grad_norm": { + "432": 1.286270260810852, + "433": 1.4693156480789185, + "434": 1.6054952144622803, + "435": 1.5613069534301758, + "436": 1.4511334896087646, + "437": 1.5603748559951782, + "438": 1.5913562774658203, + "439": 1.7547959089279175, + "440": 1.653072714805603, + "441": 1.8857250213623047, + "442": 1.610456109046936, + "443": 1.8123440742492676, + "444": 1.4749577045440674, + "445": 2.1184768676757812, + "446": 1.6481423377990723, + "447": 2.067317485809326, + "448": 1.6184128522872925, + "449": 1.7942891120910645, + "450": 1.800119400024414, + "451": 1.5355799198150635, + "452": 1.4278818368911743, + "453": 1.3814152479171753, + "454": 1.2996751070022583, + "455": 1.3250993490219116, + "456": 1.3313499689102173, + "457": 1.2745602130889893, + "458": 1.3530325889587402, + "459": 1.4841971397399902, + "460": 1.6527849435806274, + "461": 1.762048602104187, + "462": 1.673614501953125, + "463": 1.4465807676315308, + "464": 1.2824547290802002, + "465": 1.2305985689163208, + "466": 1.2866114377975464, + "467": 1.3537986278533936, + "468": 1.3462580442428589, + "469": 1.305461049079895, + "470": 1.3501737117767334, + "471": 1.392318606376648, + "472": 1.3961211442947388, + "473": 1.39976966381073, + "474": 1.3901209831237793, + "475": 1.3982222080230713, + "476": 1.358236312866211, + "477": 1.50264310836792, + "478": 1.3991254568099976, + "479": 1.5541889667510986, + "480": 1.3254960775375366, + "481": 1.3470790386199951, + "482": 1.3513410091400146, + "483": 1.4285060167312622, + "484": 1.4070141315460205, + "485": 1.5788434743881226 + }, + "loss": { + "432": 2.588563919067383, + "433": 2.5665931701660156, + "434": 2.5865190029144287, + "435": 2.58758544921875, + "436": 2.589094638824463, + "437": 2.5738320350646973, + "438": 2.597832202911377, + "439": 2.586780071258545, + "440": 2.6245334148406982, + "441": 2.597990036010742, + "442": 2.598391532897949, + "443": 2.5842695236206055, + "444": 2.5860185623168945, + "445": 2.6066675186157227, + "446": 2.603336811065674, + "447": 2.579054594039917, + "448": 2.6035208702087402, + "449": 2.580047607421875, + "450": 2.617368698120117, + "451": 2.579369306564331, + "452": 2.5580849647521973, + "453": 2.560403347015381, + "454": 2.5808045864105225, + "455": 2.5537352561950684, + "456": 2.5496912002563477, + "457": 2.5557453632354736, + "458": 2.5602951049804688, + "459": 2.5526907444000244, + "460": 2.579969644546509, + "461": 2.5848283767700195, + "462": 2.5882349014282227, + "463": 2.5545496940612793, + "464": 2.5511064529418945, + "465": 2.561150550842285, + "466": 2.5469818115234375, + "467": 2.5514984130859375, + "468": 2.543325424194336, + "469": 2.5386500358581543, + "470": 2.543893575668335, + "471": 2.5512020587921143, + "472": 2.565943479537964, + "473": 2.545866012573242, + "474": 2.5484142303466797, + "475": 2.531568765640259, + "476": 2.5378899574279785, + "477": 2.5438013076782227, + "478": 2.553943634033203, + "479": 2.5409069061279297, + "480": 2.5679845809936523, + "481": 2.5286805629730225, + "482": 2.5337605476379395, + "483": 2.5237157344818115, + "484": 2.557138681411743, + "485": 2.563009262084961 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "train_epoch_time": 4.793356895446777, + "train_loss": 2.5702027982413513, + "train_score": 0.23034321196708651, + "val_loss": 2.5981002122197987, + "val_score": 0.2235397532599391 + }, + { + "epoch": 9, + "grad_norm": 1.1482586860656738, + "learning_rate": 0.1, + "model_norm": 87.55480194091797, + "step_logs": { + "grad_norm": { + "486": 1.5784811973571777, + "487": 1.3914556503295898, + "488": 1.3596466779708862, + "489": 1.3279658555984497, + "490": 1.2607295513153076, + "491": 1.1456457376480103, + "492": 1.0837055444717407, + "493": 1.0943005084991455, + "494": 1.0616426467895508, + "495": 1.071111798286438, + "496": 1.0862345695495605, + "497": 1.1060478687286377, + "498": 1.136170506477356, + "499": 1.235898494720459, + "500": 1.3385292291641235, + "501": 1.370406985282898, + "502": 1.2026985883712769, + "503": 1.4100940227508545, + "504": 1.644429087638855, + "505": 1.8106659650802612, + "506": 1.954629898071289, + "507": 1.80476975440979, + "508": 1.8550328016281128, + "509": 1.503313422203064, + "510": 1.3251838684082031, + "511": 1.3144601583480835, + "512": 1.3451131582260132, + "513": 1.3598833084106445, + "514": 1.3750338554382324, + "515": 1.336960792541504, + "516": 1.285061240196228, + "517": 1.275316596031189, + "518": 1.331743597984314, + "519": 1.3414835929870605, + "520": 1.3239773511886597, + "521": 1.3470772504806519, + "522": 1.3875837326049805, + "523": 1.4202936887741089, + "524": 1.3600045442581177, + "525": 1.332250714302063, + "526": 1.303304672241211, + "527": 1.289294719696045, + "528": 1.2900714874267578, + "529": 1.385382890701294, + "530": 1.633182406425476, + "531": 1.551501750946045, + "532": 1.4395169019699097, + "533": 1.3466784954071045, + "534": 1.3048006296157837, + "535": 1.3786487579345703, + "536": 1.4639127254486084, + "537": 1.4889410734176636, + "538": 1.3108350038528442, + "539": 1.1482586860656738 + }, + "loss": { + "486": 2.5756337642669678, + "487": 2.5349271297454834, + "488": 2.5285801887512207, + "489": 2.5507967472076416, + "490": 2.53084135055542, + "491": 2.534364700317383, + "492": 2.507439613342285, + "493": 2.525484800338745, + "494": 2.5282270908355713, + "495": 2.505523920059204, + "496": 2.5126185417175293, + "497": 2.5094051361083984, + "498": 2.5260982513427734, + "499": 2.498687267303467, + "500": 2.521278142929077, + "501": 2.53318190574646, + "502": 2.531080961227417, + "503": 2.534235715866089, + "504": 2.557523250579834, + "505": 2.541710376739502, + "506": 2.5630218982696533, + "507": 2.580857753753662, + "508": 2.5380778312683105, + "509": 2.5711212158203125, + "510": 2.538898468017578, + "511": 2.522522449493408, + "512": 2.5212037563323975, + "513": 2.537827968597412, + "514": 2.550327777862549, + "515": 2.5227861404418945, + "516": 2.509505033493042, + "517": 2.510568618774414, + "518": 2.512913227081299, + "519": 2.523221969604492, + "520": 2.510162830352783, + "521": 2.5164146423339844, + "522": 2.515011787414551, + "523": 2.5461232662200928, + "524": 2.51908016204834, + "525": 2.530780792236328, + "526": 2.5205748081207275, + "527": 2.5035438537597656, + "528": 2.5022497177124023, + "529": 2.4940085411071777, + "530": 2.54360294342041, + "531": 2.527162790298462, + "532": 2.520843267440796, + "533": 2.517914056777954, + "534": 2.4902191162109375, + "535": 2.494802713394165, + "536": 2.4951748847961426, + "537": 2.495988607406616, + "538": 2.5046000480651855, + "539": 2.4796130657196045 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "train_epoch_time": 4.793646574020386, + "train_loss": 2.48418271640794, + "train_score": 0.2766129393916219, + "val_loss": 2.517991020539872, + "val_score": 0.2703340268778336 + }, + { + "epoch": 10, + "grad_norm": 1.3202892541885376, + "learning_rate": 0.1, + "model_norm": 87.56735229492188, + "step_logs": { + "grad_norm": { + "540": 1.2227391004562378, + "541": 1.264283537864685, + "542": 1.351145625114441, + "543": 1.382710337638855, + "544": 1.3440003395080566, + "545": 1.3707528114318848, + "546": 1.436211347579956, + "547": 1.3877992630004883, + "548": 1.5721369981765747, + "549": 1.412706732749939, + "550": 1.5401239395141602, + "551": 1.5462980270385742, + "552": 1.4806427955627441, + "553": 1.3181809186935425, + "554": 1.2439841032028198, + "555": 1.269310474395752, + "556": 1.4574469327926636, + "557": 1.5510181188583374, + "558": 1.5461934804916382, + "559": 1.3501863479614258, + "560": 1.6920835971832275, + "561": 1.3779664039611816, + "562": 1.7835991382598877, + "563": 1.4689912796020508, + "564": 2.218740463256836, + "565": 1.8177419900894165, + "566": 3.5291826725006104, + "567": 2.6178267002105713, + "568": 3.536343812942505, + "569": 2.8160178661346436, + "570": 2.678770065307617, + "571": 1.8330696821212769, + "572": 1.226369857788086, + "573": 1.170863151550293, + "574": 1.2805521488189697, + "575": 1.3313273191452026, + "576": 1.5048911571502686, + "577": 1.5850555896759033, + "578": 1.5417934656143188, + "579": 1.4226044416427612, + "580": 1.2192103862762451, + "581": 1.1476165056228638, + "582": 1.0969921350479126, + "583": 1.1125699281692505, + "584": 1.1975789070129395, + "585": 1.2218101024627686, + "586": 1.15450119972229, + "587": 1.0483189821243286, + "588": 1.0810859203338623, + "589": 1.2447563409805298, + "590": 1.3714244365692139, + "591": 1.4388325214385986, + "592": 1.3979953527450562, + "593": 1.3202892541885376 + }, + "loss": { + "540": 2.4943041801452637, + "541": 2.48636794090271, + "542": 2.5075252056121826, + "543": 2.5327506065368652, + "544": 2.476402521133423, + "545": 2.491098403930664, + "546": 2.4865918159484863, + "547": 2.508561372756958, + "548": 2.497014284133911, + "549": 2.4996299743652344, + "550": 2.500722885131836, + "551": 2.5067903995513916, + "552": 2.4970755577087402, + "553": 2.481100559234619, + "554": 2.4737167358398438, + "555": 2.49249267578125, + "556": 2.4807984828948975, + "557": 2.4960312843322754, + "558": 2.4705493450164795, + "559": 2.496767520904541, + "560": 2.4527297019958496, + "561": 2.5173892974853516, + "562": 2.495147705078125, + "563": 2.5012295246124268, + "564": 2.519364833831787, + "565": 2.506685733795166, + "566": 2.566556930541992, + "567": 2.5751850605010986, + "568": 2.692901611328125, + "569": 2.7047135829925537, + "570": 2.615813732147217, + "571": 2.5705904960632324, + "572": 2.496788263320923, + "573": 2.4712114334106445, + "574": 2.45900821685791, + "575": 2.45928955078125, + "576": 2.468764305114746, + "577": 2.4921658039093018, + "578": 2.4838056564331055, + "579": 2.4985921382904053, + "580": 2.4581851959228516, + "581": 2.4484689235687256, + "582": 2.452770709991455, + "583": 2.430497169494629, + "584": 2.4521737098693848, + "585": 2.437474250793457, + "586": 2.4307210445404053, + "587": 2.425107955932617, + "588": 2.42672061920166, + "589": 2.416019916534424, + "590": 2.4553542137145996, + "591": 2.4518163204193115, + "592": 2.4379661083221436, + "593": 2.434680461883545 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "train_epoch_time": 4.79363226890564, + "train_loss": 2.4340638096397544, + "train_score": 0.2910083392600247, + "val_loss": 2.470148722969432, + "val_score": 0.28113788010469126 + }, + { + "epoch": 11, + "grad_norm": 1.2460404634475708, + "learning_rate": 0.1, + "model_norm": 87.57955932617188, + "step_logs": { + "grad_norm": { + "594": 1.190887451171875, + "595": 1.1612094640731812, + "596": 1.2796485424041748, + "597": 1.2939462661743164, + "598": 1.352797508239746, + "599": 1.4436309337615967, + "600": 1.4838576316833496, + "601": 1.5455995798110962, + "602": 1.4773309230804443, + "603": 1.3189791440963745, + "604": 1.2051525115966797, + "605": 1.1795378923416138, + "606": 1.2116870880126953, + "607": 1.3571536540985107, + "608": 1.322058081626892, + "609": 1.2179083824157715, + "610": 1.2482426166534424, + "611": 1.2081761360168457, + "612": 1.213883876800537, + "613": 1.1586085557937622, + "614": 1.2170231342315674, + "615": 1.2826834917068481, + "616": 1.1640968322753906, + "617": 1.155636191368103, + "618": 1.2509428262710571, + "619": 1.3532533645629883, + "620": 1.4139662981033325, + "621": 1.4417206048965454, + "622": 1.4218956232070923, + "623": 1.428883671760559, + "624": 1.3710283041000366, + "625": 1.2938613891601562, + "626": 1.247063398361206, + "627": 1.147965908050537, + "628": 1.270517110824585, + "629": 1.5310883522033691, + "630": 1.7752671241760254, + "631": 1.6297425031661987, + "632": 1.3927663564682007, + "633": 1.4218151569366455, + "634": 1.4464086294174194, + "635": 1.5107020139694214, + "636": 1.4589729309082031, + "637": 1.3154953718185425, + "638": 1.2757622003555298, + "639": 1.2317240238189697, + "640": 1.1954832077026367, + "641": 1.295788049697876, + "642": 1.3872429132461548, + "643": 1.448379635810852, + "644": 1.6278018951416016, + "645": 1.6655741930007935, + "646": 1.441634178161621, + "647": 1.2460404634475708 + }, + "loss": { + "594": 2.431124210357666, + "595": 2.4516854286193848, + "596": 2.437966823577881, + "597": 2.416505813598633, + "598": 2.426060676574707, + "599": 2.4314777851104736, + "600": 2.4269022941589355, + "601": 2.455850124359131, + "602": 2.4542617797851562, + "603": 2.4437007904052734, + "604": 2.407693386077881, + "605": 2.4272193908691406, + "606": 2.4195547103881836, + "607": 2.4127767086029053, + "608": 2.4124574661254883, + "609": 2.4287705421447754, + "610": 2.426764726638794, + "611": 2.397266387939453, + "612": 2.4216463565826416, + "613": 2.396771192550659, + "614": 2.4175186157226562, + "615": 2.4226953983306885, + "616": 2.4081461429595947, + "617": 2.422806739807129, + "618": 2.4365670680999756, + "619": 2.426455020904541, + "620": 2.420490264892578, + "621": 2.4348621368408203, + "622": 2.4125428199768066, + "623": 2.4133524894714355, + "624": 2.4224588871002197, + "625": 2.3952183723449707, + "626": 2.3972959518432617, + "627": 2.417902946472168, + "628": 2.3964099884033203, + "629": 2.4017157554626465, + "630": 2.4101107120513916, + "631": 2.4423046112060547, + "632": 2.3953487873077393, + "633": 2.4087698459625244, + "634": 2.3892393112182617, + "635": 2.3853797912597656, + "636": 2.418905735015869, + "637": 2.4223732948303223, + "638": 2.403280019760132, + "639": 2.3609299659729004, + "640": 2.365952968597412, + "641": 2.396477460861206, + "642": 2.3863439559936523, + "643": 2.4122841358184814, + "644": 2.4080750942230225, + "645": 2.4373939037323, + "646": 2.4049220085144043, + "647": 2.3869943618774414 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "train_epoch_time": 4.793908596038818, + "train_loss": 2.37648544325206, + "train_score": 0.30083729369910583, + "val_loss": 2.414509936397303, + "val_score": 0.2929777196778496 + }, + { + "epoch": 12, + "grad_norm": 0.874882161617279, + "learning_rate": 0.1, + "model_norm": 87.59091186523438, + "step_logs": { + "grad_norm": { + "648": 1.2153477668762207, + "649": 1.1675283908843994, + "650": 1.049970030784607, + "651": 1.0972448587417603, + "652": 1.0681356191635132, + "653": 0.9281523823738098, + "654": 0.9574427008628845, + "655": 1.0146082639694214, + "656": 1.003524899482727, + "657": 0.9913014769554138, + "658": 0.9196563363075256, + "659": 0.9900217652320862, + "660": 1.0558892488479614, + "661": 1.1580747365951538, + "662": 1.4004088640213013, + "663": 1.4685490131378174, + "664": 1.2901968955993652, + "665": 1.193713665008545, + "666": 1.1283073425292969, + "667": 0.9713377952575684, + "668": 0.9379022121429443, + "669": 0.9284307360649109, + "670": 0.934114396572113, + "671": 0.8895021080970764, + "672": 0.8586216568946838, + "673": 0.8883331418037415, + "674": 0.8018227815628052, + "675": 0.8049211502075195, + "676": 0.8063657879829407, + "677": 0.7673876285552979, + "678": 0.8404936790466309, + "679": 0.8184280395507812, + "680": 0.7469046115875244, + "681": 0.7634561657905579, + "682": 0.8151198625564575, + "683": 0.8497288227081299, + "684": 0.8237655162811279, + "685": 0.8332400918006897, + "686": 0.7675581574440002, + "687": 0.7410281896591187, + "688": 0.7786405086517334, + "689": 0.7372876405715942, + "690": 0.6957602500915527, + "691": 0.6824859380722046, + "692": 0.6738407015800476, + "693": 0.6567220687866211, + "694": 0.6754260659217834, + "695": 0.7066612839698792, + "696": 0.7177454233169556, + "697": 0.6948050856590271, + "698": 0.8061345219612122, + "699": 0.9835711121559143, + "700": 0.930682897567749, + "701": 0.874882161617279 + }, + "loss": { + "648": 2.376716136932373, + "649": 2.3776378631591797, + "650": 2.343829870223999, + "651": 2.3607192039489746, + "652": 2.381183385848999, + "653": 2.360600233078003, + "654": 2.3540377616882324, + "655": 2.369310140609741, + "656": 2.362597703933716, + "657": 2.355721950531006, + "658": 2.3551576137542725, + "659": 2.363320827484131, + "660": 2.359642505645752, + "661": 2.342151165008545, + "662": 2.3799564838409424, + "663": 2.406167984008789, + "664": 2.3755650520324707, + "665": 2.3758602142333984, + "666": 2.3462915420532227, + "667": 2.3359925746917725, + "668": 2.346792221069336, + "669": 2.3401670455932617, + "670": 2.3435280323028564, + "671": 2.3481383323669434, + "672": 2.3416247367858887, + "673": 2.3271641731262207, + "674": 2.3429627418518066, + "675": 2.3368120193481445, + "676": 2.3182358741760254, + "677": 2.3329615592956543, + "678": 2.3190741539001465, + "679": 2.333871364593506, + "680": 2.3324553966522217, + "681": 2.3340964317321777, + "682": 2.3210675716400146, + "683": 2.3353805541992188, + "684": 2.3347253799438477, + "685": 2.3267807960510254, + "686": 2.341536045074463, + "687": 2.351691961288452, + "688": 2.3285975456237793, + "689": 2.310883045196533, + "690": 2.325793504714966, + "691": 2.3111751079559326, + "692": 2.2995662689208984, + "693": 2.2912259101867676, + "694": 2.313803195953369, + "695": 2.333242893218994, + "696": 2.3076462745666504, + "697": 2.3276991844177246, + "698": 2.299652576446533, + "699": 2.330610752105713, + "700": 2.3376986980438232, + "701": 2.31112003326416 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "train_epoch_time": 4.79313063621521, + "train_loss": 2.315652990546425, + "train_score": 0.3212697273090888, + "val_loss": 2.3595519397069884, + "val_score": 0.3076609143159693 + }, + { + "epoch": 13, + "grad_norm": 0.5055509209632874, + "learning_rate": 0.06666666666666668, + "model_norm": 87.5983657836914, + "step_logs": { + "grad_norm": { + "702": 0.873421847820282, + "703": 0.8787020444869995, + "704": 0.8275917172431946, + "705": 0.6600570678710938, + "706": 0.6477454900741577, + "707": 0.7042960524559021, + "708": 0.7077363133430481, + "709": 0.6466545462608337, + "710": 0.5727100372314453, + "711": 0.5508549213409424, + "712": 0.5574941039085388, + "713": 0.612697958946228, + "714": 0.6018780469894409, + "715": 0.6349993944168091, + "716": 0.627842366695404, + "717": 0.5532848238945007, + "718": 0.5407602190971375, + "719": 0.5293595194816589, + "720": 0.5181177258491516, + "721": 0.49682825803756714, + "722": 0.5690038204193115, + "723": 0.56830894947052, + "724": 0.6396275162696838, + "725": 0.6821790933609009, + "726": 0.6260387301445007, + "727": 0.5721016526222229, + "728": 0.5962054133415222, + "729": 0.6370568871498108, + "730": 0.5463374853134155, + "731": 0.5727306008338928, + "732": 0.5895899534225464, + "733": 0.6114787459373474, + "734": 0.5390244126319885, + "735": 0.47865039110183716, + "736": 0.5278720259666443, + "737": 0.49008116126060486, + "738": 0.48022207617759705, + "739": 0.5447253584861755, + "740": 0.4932536780834198, + "741": 0.4671356678009033, + "742": 0.41701987385749817, + "743": 0.4587652385234833, + "744": 0.47394877672195435, + "745": 0.42169538140296936, + "746": 0.4012998342514038, + "747": 0.4062337577342987, + "748": 0.4478924870491028, + "749": 0.5040670037269592, + "750": 0.4971032440662384, + "751": 0.4145593047142029, + "752": 0.46045157313346863, + "753": 0.4367998242378235, + "754": 0.4862283170223236, + "755": 0.5055509209632874 + }, + "loss": { + "702": 2.3045897483825684, + "703": 2.3064589500427246, + "704": 2.3070566654205322, + "705": 2.2990026473999023, + "706": 2.318958282470703, + "707": 2.3057384490966797, + "708": 2.321763753890991, + "709": 2.3071179389953613, + "710": 2.312420606613159, + "711": 2.3025388717651367, + "712": 2.303982973098755, + "713": 2.3164925575256348, + "714": 2.3022115230560303, + "715": 2.316880941390991, + "716": 2.2946736812591553, + "717": 2.3245089054107666, + "718": 2.312926769256592, + "719": 2.2961387634277344, + "720": 2.2904229164123535, + "721": 2.298205852508545, + "722": 2.3078927993774414, + "723": 2.299222469329834, + "724": 2.288508415222168, + "725": 2.2892141342163086, + "726": 2.3007991313934326, + "727": 2.27486515045166, + "728": 2.288325786590576, + "729": 2.300913095474243, + "730": 2.3070125579833984, + "731": 2.286262035369873, + "732": 2.2943308353424072, + "733": 2.295842170715332, + "734": 2.2783195972442627, + "735": 2.2907257080078125, + "736": 2.297694683074951, + "737": 2.2753493785858154, + "738": 2.2839298248291016, + "739": 2.2937827110290527, + "740": 2.2947144508361816, + "741": 2.283738613128662, + "742": 2.3150370121002197, + "743": 2.2977237701416016, + "744": 2.2905197143554688, + "745": 2.299306631088257, + "746": 2.277128219604492, + "747": 2.2901813983917236, + "748": 2.2748513221740723, + "749": 2.255838632583618, + "750": 2.2761495113372803, + "751": 2.2779102325439453, + "752": 2.270442008972168, + "753": 2.2821240425109863, + "754": 2.2740554809570312, + "755": 2.277127981185913 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "train_epoch_time": 4.793538808822632, + "train_loss": 2.281302854491443, + "train_score": 0.32983545558353405, + "val_loss": 2.326994259787208, + "val_score": 0.31552274636368255 + }, + { + "epoch": 14, + "grad_norm": 0.4144708216190338, + "learning_rate": 0.03333333333333334, + "model_norm": 87.60089874267578, + "step_logs": { + "grad_norm": { + "756": 0.4931719899177551, + "757": 0.4075883626937866, + "758": 0.41429805755615234, + "759": 0.4326213002204895, + "760": 0.4707561731338501, + "761": 0.3977644145488739, + "762": 0.4253990948200226, + "763": 0.44635725021362305, + "764": 0.46539491415023804, + "765": 0.47820448875427246, + "766": 0.47383829951286316, + "767": 0.4402104318141937, + "768": 0.47765982151031494, + "769": 0.42489269375801086, + "770": 0.4826014041900635, + "771": 0.43354299664497375, + "772": 0.43740114569664, + "773": 0.4561915993690491, + "774": 0.4442673325538635, + "775": 0.4328565001487732, + "776": 0.43971607089042664, + "777": 0.42531150579452515, + "778": 0.4106575548648834, + "779": 0.39637649059295654, + "780": 0.40828216075897217, + "781": 0.4233377277851105, + "782": 0.4745389521121979, + "783": 0.5316278338432312, + "784": 0.41087356209754944, + "785": 0.4421166479587555, + "786": 0.4266990125179291, + "787": 0.43108993768692017, + "788": 0.45749399065971375, + "789": 0.4333341121673584, + "790": 0.4311707615852356, + "791": 0.3969404995441437, + "792": 0.4155759811401367, + "793": 0.3905121684074402, + "794": 0.4371906518936157, + "795": 0.42071861028671265, + "796": 0.44041207432746887, + "797": 0.39641088247299194, + "798": 0.46034425497055054, + "799": 0.4287641942501068, + "800": 0.36320623755455017, + "801": 0.41520726680755615, + "802": 0.44111791253089905, + "803": 0.4099193811416626, + "804": 0.4446238875389099, + "805": 0.4684411585330963, + "806": 0.3893916606903076, + "807": 0.4585827589035034, + "808": 0.4609387218952179, + "809": 0.4144708216190338 + }, + "loss": { + "756": 2.285633087158203, + "757": 2.2859766483306885, + "758": 2.2913501262664795, + "759": 2.2818572521209717, + "760": 2.2529425621032715, + "761": 2.285468578338623, + "762": 2.2915501594543457, + "763": 2.2674098014831543, + "764": 2.2713334560394287, + "765": 2.2697865962982178, + "766": 2.290040969848633, + "767": 2.2831172943115234, + "768": 2.294541597366333, + "769": 2.2672605514526367, + "770": 2.2870659828186035, + "771": 2.264914035797119, + "772": 2.2836861610412598, + "773": 2.283172130584717, + "774": 2.301900863647461, + "775": 2.2733378410339355, + "776": 2.2623291015625, + "777": 2.2673239707946777, + "778": 2.2862801551818848, + "779": 2.2801761627197266, + "780": 2.28262996673584, + "781": 2.272077798843384, + "782": 2.261648654937744, + "783": 2.2838706970214844, + "784": 2.2722668647766113, + "785": 2.277329921722412, + "786": 2.264673948287964, + "787": 2.2855968475341797, + "788": 2.2744250297546387, + "789": 2.290834665298462, + "790": 2.260718822479248, + "791": 2.2834455966949463, + "792": 2.266693115234375, + "793": 2.277648448944092, + "794": 2.2884092330932617, + "795": 2.26765513420105, + "796": 2.2677550315856934, + "797": 2.2779088020324707, + "798": 2.2651021480560303, + "799": 2.271761178970337, + "800": 2.257920265197754, + "801": 2.266298532485962, + "802": 2.263610601425171, + "803": 2.2619476318359375, + "804": 2.2639989852905273, + "805": 2.2840189933776855, + "806": 2.273719072341919, + "807": 2.254319667816162, + "808": 2.2900280952453613, + "809": 2.2776894569396973 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "train_epoch_time": 4.7934250831604, + "train_loss": 2.2720030760662455, + "train_score": 0.33221171087821894, + "val_loss": 2.318394170426063, + "val_score": 0.3180745909531546 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:23:07.624259", + "final_model_norm": 87.60089874267578, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:21:24.375213", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 10.934322357177734, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.43321990966797, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 8.455106735229492, + "3": 5.430062770843506, + "4": 5.646889686584473, + "5": 11.033295631408691, + "6": 19.40323257446289, + "7": 7.685751914978027, + "8": 4.723723888397217, + "9": 2.782180070877075, + "10": 2.4882020950317383, + "11": 3.0325138568878174, + "12": 4.416697978973389, + "13": 13.23110580444336, + "14": 3.7264244556427, + "15": 6.371815204620361, + "16": 7.2332377433776855, + "17": 5.820730686187744, + "18": 3.617907762527466, + "19": 7.497646808624268, + "20": 2.679824113845825, + "21": 3.7948453426361084, + "22": 22.81573486328125, + "23": 4.701910495758057, + "24": 17.263755798339844, + "25": 18.11946678161621, + "26": 14.757731437683105, + "27": 7.984584808349609, + "28": 4.494190692901611, + "29": 3.9699289798736572, + "30": 2.19315505027771, + "31": 2.6492249965667725, + "32": 5.787468433380127, + "33": 3.5560007095336914, + "34": 3.076932191848755, + "35": 5.001994609832764, + "36": 6.565011978149414, + "37": 3.4447667598724365, + "38": 7.241092681884766, + "39": 5.51541805267334, + "40": 6.2375102043151855, + "41": 16.69146728515625, + "42": 6.195662498474121, + "43": 6.505462169647217, + "44": 9.777633666992188, + "45": 5.433412075042725, + "46": 4.207071781158447, + "47": 8.236141204833984, + "48": 4.265207290649414, + "49": 3.7269554138183594, + "50": 1.688579797744751, + "51": 2.1194658279418945, + "52": 3.15309476852417, + "53": 10.934322357177734 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.944894552230835, + "3": 3.726104974746704, + "4": 3.616518020629883, + "5": 3.6710803508758545, + "6": 4.058044910430908, + "7": 4.218535423278809, + "8": 3.69265079498291, + "9": 3.4404852390289307, + "10": 3.4065136909484863, + "11": 3.3600831031799316, + "12": 3.3283584117889404, + "13": 3.448434352874756, + "14": 3.412850856781006, + "15": 3.4513070583343506, + "16": 3.5203781127929688, + "17": 3.7609071731567383, + "18": 3.273527145385742, + "19": 3.076810836791992, + "20": 3.258510112762451, + "21": 3.120025157928467, + "22": 3.4057347774505615, + "23": 3.2594664096832275, + "24": 6.348177909851074, + "25": 5.118218421936035, + "26": 4.498443603515625, + "27": 4.378347873687744, + "28": 4.566770553588867, + "29": 3.8768372535705566, + "30": 3.3816871643066406, + "31": 3.405897617340088, + "32": 3.4564743041992188, + "33": 3.9595227241516113, + "34": 3.3077290058135986, + "35": 3.292396068572998, + "36": 3.7437429428100586, + "37": 3.428488254547119, + "38": 3.564821481704712, + "39": 4.445432186126709, + "40": 3.9705464839935303, + "41": 5.185230731964111, + "42": 3.322171211242676, + "43": 4.240131855010986, + "44": 4.554407119750977, + "45": 5.6574811935424805, + "46": 4.055406093597412, + "47": 3.8916234970092773, + "48": 5.238371849060059, + "49": 4.262404918670654, + "50": 3.371361255645752, + "51": 3.319608211517334, + "52": 3.4928855895996094, + "53": 4.050593376159668 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "train_epoch_time": 4.795361518859863, + "train_loss": 5.793298119554561, + "train_score": 0.0842853300091697, + "val_loss": 5.795012561653566, + "val_score": 0.07970364541499678 + }, + { + "epoch": 1, + "grad_norm": 2.271104097366333, + "learning_rate": 0.1, + "model_norm": 87.44066619873047, + "step_logs": { + "grad_norm": { + "54": 7.6241044998168945, + "55": 18.086820602416992, + "56": 13.76170825958252, + "57": 12.195931434631348, + "58": 7.707003116607666, + "59": 7.350017547607422, + "60": 4.068539142608643, + "61": 2.0567800998687744, + "62": 1.7179681062698364, + "63": 1.5987063646316528, + "64": 1.7728155851364136, + "65": 1.6580462455749512, + "66": 2.2254655361175537, + "67": 2.6681747436523438, + "68": 4.53953742980957, + "69": 2.4346330165863037, + "70": 2.5816047191619873, + "71": 1.6540542840957642, + "72": 1.8485995531082153, + "73": 2.111536741256714, + "74": 2.41682767868042, + "75": 3.788759708404541, + "76": 2.339066743850708, + "77": 2.0891082286834717, + "78": 7.765440464019775, + "79": 2.2957189083099365, + "80": 2.1013708114624023, + "81": 1.9216939210891724, + "82": 1.3233511447906494, + "83": 1.3147692680358887, + "84": 1.985363483428955, + "85": 1.7441880702972412, + "86": 1.4367233514785767, + "87": 1.473137617111206, + "88": 1.9445949792861938, + "89": 3.4859619140625, + "90": 2.658531904220581, + "91": 1.740609049797058, + "92": 1.3440288305282593, + "93": 1.3734662532806396, + "94": 1.9076367616653442, + "95": 4.12608528137207, + "96": 3.1233537197113037, + "97": 2.544762372970581, + "98": 1.650414228439331, + "99": 1.5785913467407227, + "100": 2.0071909427642822, + "101": 2.1086478233337402, + "102": 2.614389419555664, + "103": 2.1235971450805664, + "104": 1.0054165124893188, + "105": 1.140705943107605, + "106": 1.9072152376174927, + "107": 2.271104097366333 + }, + "loss": { + "54": 5.75114631652832, + "55": 7.560795307159424, + "56": 5.97125244140625, + "57": 5.015491008758545, + "58": 4.765529155731201, + "59": 5.059174537658691, + "60": 4.556539535522461, + "61": 4.199892044067383, + "62": 3.8614931106567383, + "63": 3.665408134460449, + "64": 3.421212673187256, + "65": 3.278885841369629, + "66": 3.2003910541534424, + "67": 3.2887396812438965, + "68": 3.3668746948242188, + "69": 3.6330623626708984, + "70": 3.3452088832855225, + "71": 3.1023542881011963, + "72": 3.0678114891052246, + "73": 3.1246538162231445, + "74": 3.133157253265381, + "75": 3.2065348625183105, + "76": 3.39947772026062, + "77": 3.0320286750793457, + "78": 3.203878402709961, + "79": 3.2980666160583496, + "80": 3.0177414417266846, + "81": 3.018956184387207, + "82": 2.902022361755371, + "83": 2.8780031204223633, + "84": 2.890287399291992, + "85": 2.9209518432617188, + "86": 2.8200201988220215, + "87": 2.799708843231201, + "88": 2.807431936264038, + "89": 2.9304189682006836, + "90": 3.1970624923706055, + "91": 2.8154075145721436, + "92": 2.774245262145996, + "93": 2.7271995544433594, + "94": 2.782083034515381, + "95": 2.91900372505188, + "96": 3.2765560150146484, + "97": 2.9599997997283936, + "98": 2.764235496520996, + "99": 2.76535701751709, + "100": 2.7487878799438477, + "101": 2.838395357131958, + "102": 2.7675676345825195, + "103": 2.905409097671509, + "104": 2.6871585845947266, + "105": 2.6786069869995117, + "106": 2.6857080459594727, + "107": 2.813164234161377 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "train_epoch_time": 4.793367624282837, + "train_loss": 2.7579970372800675, + "train_score": 0.22975026898438825, + "val_loss": 2.770692006865544, + "val_score": 0.2255220296017475 + }, + { + "epoch": 2, + "grad_norm": 1.6037001609802246, + "learning_rate": 0.1, + "model_norm": 87.44985961914062, + "step_logs": { + "grad_norm": { + "108": 2.205274820327759, + "109": 1.9024920463562012, + "110": 1.8349051475524902, + "111": 1.9209145307540894, + "112": 1.98993980884552, + "113": 1.854478359222412, + "114": 1.439594030380249, + "115": 1.5765751600265503, + "116": 2.0506632328033447, + "117": 1.853440284729004, + "118": 1.4193261861801147, + "119": 1.6544969081878662, + "120": 2.240386962890625, + "121": 1.8756539821624756, + "122": 1.1770734786987305, + "123": 1.2625330686569214, + "124": 1.6702266931533813, + "125": 1.7356467247009277, + "126": 1.6991575956344604, + "127": 1.6498024463653564, + "128": 1.5905519723892212, + "129": 1.4519444704055786, + "130": 1.3913549184799194, + "131": 1.6237090826034546, + "132": 1.873119592666626, + "133": 1.8221279382705688, + "134": 1.51943039894104, + "135": 1.5637608766555786, + "136": 1.9931915998458862, + "137": 1.919577717781067, + "138": 1.651701807975769, + "139": 1.6597294807434082, + "140": 1.6811857223510742, + "141": 1.6458594799041748, + "142": 1.5167800188064575, + "143": 1.4958332777023315, + "144": 1.637245774269104, + "145": 1.579387903213501, + "146": 1.3156354427337646, + "147": 1.2954654693603516, + "148": 1.374786376953125, + "149": 1.4277857542037964, + "150": 1.3610658645629883, + "151": 1.325801968574524, + "152": 1.6275372505187988, + "153": 1.5998854637145996, + "154": 1.5871320962905884, + "155": 1.7424808740615845, + "156": 1.5622713565826416, + "157": 1.4811228513717651, + "158": 1.6517689228057861, + "159": 1.603561520576477, + "160": 1.6064814329147339, + "161": 1.6037001609802246 + }, + "loss": { + "108": 2.7527647018432617, + "109": 2.7791929244995117, + "110": 2.7051548957824707, + "111": 2.7681171894073486, + "112": 2.682340145111084, + "113": 2.7631428241729736, + "114": 2.6718995571136475, + "115": 2.694222927093506, + "116": 2.720673084259033, + "117": 2.756666660308838, + "118": 2.650331974029541, + "119": 2.6732842922210693, + "120": 2.6883387565612793, + "121": 2.768552780151367, + "122": 2.6281371116638184, + "123": 2.6367087364196777, + "124": 2.6483657360076904, + "125": 2.696963310241699, + "126": 2.6594419479370117, + "127": 2.6907296180725098, + "128": 2.646522045135498, + "129": 2.693516731262207, + "130": 2.613452434539795, + "131": 2.653848648071289, + "132": 2.6505908966064453, + "133": 2.6901187896728516, + "134": 2.6408727169036865, + "135": 2.607588291168213, + "136": 2.649486541748047, + "137": 2.699981689453125, + "138": 2.62245512008667, + "139": 2.6442720890045166, + "140": 2.6174240112304688, + "141": 2.6557726860046387, + "142": 2.617366313934326, + "143": 2.637457847595215, + "144": 2.5964584350585938, + "145": 2.6647820472717285, + "146": 2.6050243377685547, + "147": 2.6010234355926514, + "148": 2.5798959732055664, + "149": 2.6097328662872314, + "150": 2.5874581336975098, + "151": 2.595344305038452, + "152": 2.58966064453125, + "153": 2.6383798122406006, + "154": 2.598228931427002, + "155": 2.6442062854766846, + "156": 2.6013989448547363, + "157": 2.601226806640625, + "158": 2.5805084705352783, + "159": 2.6327877044677734, + "160": 2.5750956535339355, + "161": 2.6411633491516113 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "train_epoch_time": 4.793071031570435, + "train_loss": 2.581822080420627, + "train_score": 0.2562141319301241, + "val_loss": 2.606259554042882, + "val_score": 0.249811638647051 + }, + { + "epoch": 3, + "grad_norm": 1.315682291984558, + "learning_rate": 0.1, + "model_norm": 87.45896911621094, + "step_logs": { + "grad_norm": { + "162": 1.57014000415802, + "163": 1.5652203559875488, + "164": 1.3761399984359741, + "165": 1.378050446510315, + "166": 1.3502795696258545, + "167": 1.3488165140151978, + "168": 1.4270493984222412, + "169": 1.3370720148086548, + "170": 1.194875717163086, + "171": 1.1860826015472412, + "172": 1.2140578031539917, + "173": 1.1944050788879395, + "174": 1.2293858528137207, + "175": 1.3145146369934082, + "176": 1.4801857471466064, + "177": 1.4290152788162231, + "178": 1.4381383657455444, + "179": 1.5648773908615112, + "180": 1.621522068977356, + "181": 1.6166753768920898, + "182": 1.5579445362091064, + "183": 1.418610692024231, + "184": 1.2966928482055664, + "185": 1.329863429069519, + "186": 1.3860971927642822, + "187": 1.3883217573165894, + "188": 1.4934109449386597, + "189": 1.4227187633514404, + "190": 1.268064022064209, + "191": 1.3188798427581787, + "192": 1.4236650466918945, + "193": 1.3837710618972778, + "194": 1.2826850414276123, + "195": 1.3714286088943481, + "196": 1.4355472326278687, + "197": 1.3856053352355957, + "198": 1.4145541191101074, + "199": 1.3343926668167114, + "200": 1.127225637435913, + "201": 1.1470998525619507, + "202": 1.208306908607483, + "203": 1.22909414768219, + "204": 1.2772661447525024, + "205": 1.2968382835388184, + "206": 1.251835584640503, + "207": 1.3025931119918823, + "208": 1.4895981550216675, + "209": 1.4703450202941895, + "210": 1.4638627767562866, + "211": 1.378810167312622, + "212": 1.2432467937469482, + "213": 1.2543805837631226, + "214": 1.2786723375320435, + "215": 1.315682291984558 + }, + "loss": { + "162": 2.587583065032959, + "163": 2.6200835704803467, + "164": 2.585965156555176, + "165": 2.6061301231384277, + "166": 2.5670583248138428, + "167": 2.565309524536133, + "168": 2.5831704139709473, + "169": 2.610398292541504, + "170": 2.5702619552612305, + "171": 2.5738534927368164, + "172": 2.5414133071899414, + "173": 2.587667226791382, + "174": 2.555570363998413, + "175": 2.583132743835449, + "176": 2.550055503845215, + "177": 2.6150519847869873, + "178": 2.5643064975738525, + "179": 2.5992491245269775, + "180": 2.584167003631592, + "181": 2.623523235321045, + "182": 2.5763607025146484, + "183": 2.587963342666626, + "184": 2.56569766998291, + "185": 2.5684685707092285, + "186": 2.5555853843688965, + "187": 2.572157382965088, + "188": 2.5411853790283203, + "189": 2.591902256011963, + "190": 2.5444412231445312, + "191": 2.5447676181793213, + "192": 2.5466439723968506, + "193": 2.569901943206787, + "194": 2.543004035949707, + "195": 2.573594570159912, + "196": 2.5569801330566406, + "197": 2.5572328567504883, + "198": 2.5544891357421875, + "199": 2.570556163787842, + "200": 2.538686752319336, + "201": 2.5396692752838135, + "202": 2.537353754043579, + "203": 2.5404670238494873, + "204": 2.525038957595825, + "205": 2.542437791824341, + "206": 2.5132079124450684, + "207": 2.5575854778289795, + "208": 2.5400819778442383, + "209": 2.5585079193115234, + "210": 2.547358512878418, + "211": 2.5636322498321533, + "212": 2.5159521102905273, + "213": 2.527498722076416, + "214": 2.5264225006103516, + "215": 2.547900438308716 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "train_epoch_time": 4.793066024780273, + "train_loss": 2.5332912087269457, + "train_score": 0.25806693875977776, + "val_loss": 2.568290614917549, + "val_score": 0.252722265953722 + }, + { + "epoch": 4, + "grad_norm": 1.255943775177002, + "learning_rate": 0.1, + "model_norm": 87.46749114990234, + "step_logs": { + "grad_norm": { + "216": 1.3211983442306519, + "217": 1.3779386281967163, + "218": 1.6283546686172485, + "219": 1.542668342590332, + "220": 1.4222657680511475, + "221": 1.4642791748046875, + "222": 1.5152777433395386, + "223": 1.455426812171936, + "224": 1.4295985698699951, + "225": 1.4423514604568481, + "226": 1.4302033185958862, + "227": 1.4385539293289185, + "228": 1.4562808275222778, + "229": 1.4059540033340454, + "230": 1.3763949871063232, + "231": 1.45053231716156, + "232": 1.4418200254440308, + "233": 1.3518669605255127, + "234": 1.2135918140411377, + "235": 1.1925864219665527, + "236": 1.1781482696533203, + "237": 1.1683781147003174, + "238": 1.198647379875183, + "239": 1.187096118927002, + "240": 1.2433829307556152, + "241": 1.2245243787765503, + "242": 1.1704925298690796, + "243": 1.2215750217437744, + "244": 1.3533116579055786, + "245": 1.2959398031234741, + "246": 1.1733379364013672, + "247": 1.1857608556747437, + "248": 1.2436304092407227, + "249": 1.3097114562988281, + "250": 1.4403356313705444, + "251": 1.3560868501663208, + "252": 1.2285547256469727, + "253": 1.276785135269165, + "254": 1.3266159296035767, + "255": 1.2754143476486206, + "256": 1.1954617500305176, + "257": 1.2000350952148438, + "258": 1.1608084440231323, + "259": 1.143524408340454, + "260": 1.2010358572006226, + "261": 1.3104660511016846, + "262": 1.3789137601852417, + "263": 1.456289291381836, + "264": 1.4257675409317017, + "265": 1.3827656507492065, + "266": 1.2667171955108643, + "267": 1.2337268590927124, + "268": 1.2437505722045898, + "269": 1.255943775177002 + }, + "loss": { + "216": 2.540203094482422, + "217": 2.5396881103515625, + "218": 2.5548245906829834, + "219": 2.5851268768310547, + "220": 2.5351104736328125, + "221": 2.5615248680114746, + "222": 2.5332982540130615, + "223": 2.569162607192993, + "224": 2.549081563949585, + "225": 2.5589795112609863, + "226": 2.5483968257904053, + "227": 2.540041923522949, + "228": 2.532780885696411, + "229": 2.554763078689575, + "230": 2.535845994949341, + "231": 2.532985210418701, + "232": 2.542069435119629, + "233": 2.5587151050567627, + "234": 2.498936653137207, + "235": 2.529637575149536, + "236": 2.525259494781494, + "237": 2.52756667137146, + "238": 2.5121688842773438, + "239": 2.5410871505737305, + "240": 2.5145249366760254, + "241": 2.524502754211426, + "242": 2.5309348106384277, + "243": 2.532170295715332, + "244": 2.5251142978668213, + "245": 2.543562412261963, + "246": 2.498774766921997, + "247": 2.524109125137329, + "248": 2.500328302383423, + "249": 2.5229434967041016, + "250": 2.511627197265625, + "251": 2.5283007621765137, + "252": 2.50056791305542, + "253": 2.5200109481811523, + "254": 2.507739305496216, + "255": 2.538595199584961, + "256": 2.499328374862671, + "257": 2.5182833671569824, + "258": 2.50455379486084, + "259": 2.5087757110595703, + "260": 2.4964256286621094, + "261": 2.5303449630737305, + "262": 2.5143609046936035, + "263": 2.5192484855651855, + "264": 2.519637107849121, + "265": 2.5299739837646484, + "266": 2.5209293365478516, + "267": 2.497617483139038, + "268": 2.499988555908203, + "269": 2.515929937362671 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "train_epoch_time": 4.793076515197754, + "train_loss": 2.5068613774807242, + "train_score": 0.26193059542599845, + "val_loss": 2.5377160761030617, + "val_score": 0.25469557316514024 + }, + { + "epoch": 5, + "grad_norm": 1.0183618068695068, + "learning_rate": 0.1, + "model_norm": 87.47647857666016, + "step_logs": { + "grad_norm": { + "270": 1.2888706922531128, + "271": 1.2905149459838867, + "272": 1.2208613157272339, + "273": 1.2000612020492554, + "274": 1.1423448324203491, + "275": 1.0955835580825806, + "276": 1.1377270221710205, + "277": 1.2265115976333618, + "278": 1.2998406887054443, + "279": 1.2332520484924316, + "280": 1.1454983949661255, + "281": 1.228863000869751, + "282": 1.4385524988174438, + "283": 1.4464181661605835, + "284": 1.1787866353988647, + "285": 1.0656853914260864, + "286": 1.1027541160583496, + "287": 1.1337908506393433, + "288": 1.2419637441635132, + "289": 1.2795246839523315, + "290": 1.3119382858276367, + "291": 1.3148417472839355, + "292": 1.2502707242965698, + "293": 1.2403854131698608, + "294": 1.2405940294265747, + "295": 1.2264312505722046, + "296": 1.2762929201126099, + "297": 1.2564655542373657, + "298": 1.2502623796463013, + "299": 1.182131052017212, + "300": 1.1872307062149048, + "301": 1.1584244966506958, + "302": 1.1509478092193604, + "303": 1.1777397394180298, + "304": 1.2179162502288818, + "305": 1.2852668762207031, + "306": 1.4277710914611816, + "307": 1.4504923820495605, + "308": 1.411252737045288, + "309": 1.5104649066925049, + "310": 1.4985687732696533, + "311": 1.4138259887695312, + "312": 1.1221050024032593, + "313": 1.0279879570007324, + "314": 1.0947867631912231, + "315": 1.0967639684677124, + "316": 1.1049673557281494, + "317": 1.1171448230743408, + "318": 1.1055734157562256, + "319": 1.1026877164840698, + "320": 1.1551287174224854, + "321": 1.1338034868240356, + "322": 1.0455329418182373, + "323": 1.0183618068695068 + }, + "loss": { + "270": 2.4969587326049805, + "271": 2.52333927154541, + "272": 2.5029678344726562, + "273": 2.524637222290039, + "274": 2.50390362739563, + "275": 2.500753164291382, + "276": 2.4904160499572754, + "277": 2.5181005001068115, + "278": 2.4987549781799316, + "279": 2.5213732719421387, + "280": 2.492311954498291, + "281": 2.514549732208252, + "282": 2.5207369327545166, + "283": 2.5474534034729004, + "284": 2.4867730140686035, + "285": 2.4976253509521484, + "286": 2.485607147216797, + "287": 2.4887099266052246, + "288": 2.4849839210510254, + "289": 2.5085396766662598, + "290": 2.5221471786499023, + "291": 2.508707046508789, + "292": 2.4703402519226074, + "293": 2.491426944732666, + "294": 2.488715171813965, + "295": 2.5134172439575195, + "296": 2.5023412704467773, + "297": 2.501113176345825, + "298": 2.5273776054382324, + "299": 2.4985909461975098, + "300": 2.4877607822418213, + "301": 2.499028444290161, + "302": 2.478795051574707, + "303": 2.516446113586426, + "304": 2.4692130088806152, + "305": 2.5044076442718506, + "306": 2.4741435050964355, + "307": 2.5357775688171387, + "308": 2.5134716033935547, + "309": 2.4990007877349854, + "310": 2.5130791664123535, + "311": 2.5232057571411133, + "312": 2.489683151245117, + "313": 2.4821839332580566, + "314": 2.4768636226654053, + "315": 2.492902994155884, + "316": 2.4939475059509277, + "317": 2.501979351043701, + "318": 2.4935226440429688, + "319": 2.491177797317505, + "320": 2.4911839962005615, + "321": 2.4907495975494385, + "322": 2.4883058071136475, + "323": 2.4766368865966797 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "train_epoch_time": 4.792854309082031, + "train_loss": 2.47357450776668, + "train_score": 0.2708919923907544, + "val_loss": 2.522531079643766, + "val_score": 0.2605213112658666 + }, + { + "epoch": 6, + "grad_norm": 1.3232122659683228, + "learning_rate": 0.1, + "model_norm": 87.48561096191406, + "step_logs": { + "grad_norm": { + "324": 1.0678391456604004, + "325": 1.0858285427093506, + "326": 1.1414928436279297, + "327": 1.0667126178741455, + "328": 0.9203104972839355, + "329": 0.9948205947875977, + "330": 1.0342060327529907, + "331": 1.042789340019226, + "332": 1.193058967590332, + "333": 1.26388680934906, + "334": 1.3626110553741455, + "335": 1.4746543169021606, + "336": 1.3356393575668335, + "337": 1.1898353099822998, + "338": 1.0583469867706299, + "339": 0.9968602657318115, + "340": 1.0603820085525513, + "341": 1.1961801052093506, + "342": 1.2735388278961182, + "343": 1.2184572219848633, + "344": 1.113504409790039, + "345": 1.0792896747589111, + "346": 1.1413064002990723, + "347": 1.1297205686569214, + "348": 1.1726166009902954, + "349": 1.2580716609954834, + "350": 1.2107887268066406, + "351": 1.2144241333007812, + "352": 1.2301344871520996, + "353": 1.2117292881011963, + "354": 1.2339168787002563, + "355": 1.1750441789627075, + "356": 1.1156620979309082, + "357": 1.152209758758545, + "358": 1.1627806425094604, + "359": 1.2509145736694336, + "360": 1.2125744819641113, + "361": 1.1409679651260376, + "362": 1.047756314277649, + "363": 1.0098140239715576, + "364": 0.9484673142433167, + "365": 0.9900673031806946, + "366": 1.0080896615982056, + "367": 1.075703740119934, + "368": 1.2004772424697876, + "369": 1.35337233543396, + "370": 1.3664360046386719, + "371": 1.485229730606079, + "372": 1.5003336668014526, + "373": 1.4629193544387817, + "374": 1.4541159868240356, + "375": 1.5671086311340332, + "376": 1.491943597793579, + "377": 1.3232122659683228 + }, + "loss": { + "324": 2.473811626434326, + "325": 2.481210708618164, + "326": 2.483485221862793, + "327": 2.490971565246582, + "328": 2.4594228267669678, + "329": 2.471400022506714, + "330": 2.4843342304229736, + "331": 2.4998817443847656, + "332": 2.4674744606018066, + "333": 2.4877407550811768, + "334": 2.4874391555786133, + "335": 2.4866015911102295, + "336": 2.4862873554229736, + "337": 2.4790971279144287, + "338": 2.4758708477020264, + "339": 2.4860565662384033, + "340": 2.471980571746826, + "341": 2.491982936859131, + "342": 2.4847707748413086, + "343": 2.4865832328796387, + "344": 2.4886763095855713, + "345": 2.4785633087158203, + "346": 2.4921278953552246, + "347": 2.475213050842285, + "348": 2.4717774391174316, + "349": 2.4874706268310547, + "350": 2.473141670227051, + "351": 2.4926629066467285, + "352": 2.4693734645843506, + "353": 2.470925807952881, + "354": 2.4947831630706787, + "355": 2.4795145988464355, + "356": 2.454616069793701, + "357": 2.4772300720214844, + "358": 2.4731054306030273, + "359": 2.4879140853881836, + "360": 2.471583843231201, + "361": 2.473353624343872, + "362": 2.462623119354248, + "363": 2.4717414379119873, + "364": 2.4528353214263916, + "365": 2.4707860946655273, + "366": 2.463679552078247, + "367": 2.465583086013794, + "368": 2.468745708465576, + "369": 2.4872140884399414, + "370": 2.486074924468994, + "371": 2.482853412628174, + "372": 2.5016226768493652, + "373": 2.473024368286133, + "374": 2.478849411010742, + "375": 2.512293815612793, + "376": 2.5045816898345947, + "377": 2.4941835403442383 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "train_epoch_time": 4.792715072631836, + "train_loss": 2.4831072682116604, + "train_score": 0.26030868902439025, + "val_loss": 2.5313797309122186, + "val_score": 0.25334116700185566 + }, + { + "epoch": 7, + "grad_norm": 1.5443918704986572, + "learning_rate": 0.1, + "model_norm": 87.49939727783203, + "step_logs": { + "grad_norm": { + "378": 1.3162884712219238, + "379": 1.2067433595657349, + "380": 1.1712816953659058, + "381": 1.1231498718261719, + "382": 1.2187566757202148, + "383": 1.237554669380188, + "384": 1.1868064403533936, + "385": 1.070710301399231, + "386": 1.0229023694992065, + "387": 1.0611571073532104, + "388": 1.0525788068771362, + "389": 1.016467571258545, + "390": 1.0857120752334595, + "391": 1.1862807273864746, + "392": 1.128239393234253, + "393": 1.1056164503097534, + "394": 1.1890345811843872, + "395": 1.272507905960083, + "396": 1.2444751262664795, + "397": 1.2104840278625488, + "398": 1.2719988822937012, + "399": 1.2732030153274536, + "400": 1.1537237167358398, + "401": 1.0243120193481445, + "402": 0.9922335147857666, + "403": 0.9944742321968079, + "404": 1.075513243675232, + "405": 1.200055480003357, + "406": 1.2577424049377441, + "407": 1.3575090169906616, + "408": 1.3999048471450806, + "409": 1.4631588459014893, + "410": 1.5729089975357056, + "411": 1.538995385169983, + "412": 1.3967065811157227, + "413": 1.1819331645965576, + "414": 1.031980276107788, + "415": 0.9106211066246033, + "416": 0.9348329901695251, + "417": 1.0886101722717285, + "418": 1.254741907119751, + "419": 1.6187708377838135, + "420": 1.7849868535995483, + "421": 1.2913455963134766, + "422": 0.9638165235519409, + "423": 0.939207911491394, + "424": 1.0187772512435913, + "425": 1.1491801738739014, + "426": 1.2341713905334473, + "427": 1.4123573303222656, + "428": 1.4255586862564087, + "429": 1.4977840185165405, + "430": 1.5621732473373413, + "431": 1.5443918704986572 + }, + "loss": { + "378": 2.4901351928710938, + "379": 2.4646198749542236, + "380": 2.4778575897216797, + "381": 2.469677209854126, + "382": 2.4916670322418213, + "383": 2.461810827255249, + "384": 2.4723567962646484, + "385": 2.45829176902771, + "386": 2.457439422607422, + "387": 2.4681992530822754, + "388": 2.4569380283355713, + "389": 2.460292339324951, + "390": 2.4383909702301025, + "391": 2.464303493499756, + "392": 2.470714569091797, + "393": 2.450083017349243, + "394": 2.4778363704681396, + "395": 2.454130172729492, + "396": 2.479257106781006, + "397": 2.446122646331787, + "398": 2.4874773025512695, + "399": 2.4751386642456055, + "400": 2.4647936820983887, + "401": 2.4522576332092285, + "402": 2.442202091217041, + "403": 2.4207894802093506, + "404": 2.443239688873291, + "405": 2.4430959224700928, + "406": 2.4443068504333496, + "407": 2.44044828414917, + "408": 2.4579362869262695, + "409": 2.4598844051361084, + "410": 2.487255334854126, + "411": 2.4754133224487305, + "412": 2.474102735519409, + "413": 2.442674398422241, + "414": 2.44766902923584, + "415": 2.423190116882324, + "416": 2.4155349731445312, + "417": 2.4214839935302734, + "418": 2.422520875930786, + "419": 2.441464424133301, + "420": 2.4788379669189453, + "421": 2.4609122276306152, + "422": 2.4330925941467285, + "423": 2.4144163131713867, + "424": 2.4115402698516846, + "425": 2.41140079498291, + "426": 2.431873321533203, + "427": 2.4388933181762695, + "428": 2.433537483215332, + "429": 2.4347527027130127, + "430": 2.4340856075286865, + "431": 2.4404280185699463 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "train_epoch_time": 4.792433261871338, + "train_loss": 2.441129124831606, + "train_score": 0.266009460092446, + "val_loss": 2.488670668289938, + "val_score": 0.25693796614418074 + }, + { + "epoch": 8, + "grad_norm": 1.1139146089553833, + "learning_rate": 0.1, + "model_norm": 87.51409149169922, + "step_logs": { + "grad_norm": { + "432": 1.4680956602096558, + "433": 1.3616770505905151, + "434": 1.3802167177200317, + "435": 1.3216320276260376, + "436": 1.3114744424819946, + "437": 1.3092608451843262, + "438": 1.2603095769882202, + "439": 1.228329062461853, + "440": 1.1659294366836548, + "441": 1.0559760332107544, + "442": 1.1393606662750244, + "443": 1.2201030254364014, + "444": 1.3578343391418457, + "445": 1.3618934154510498, + "446": 1.3702973127365112, + "447": 1.4715440273284912, + "448": 1.8345134258270264, + "449": 1.824266791343689, + "450": 1.6442898511886597, + "451": 1.4994401931762695, + "452": 1.2396150827407837, + "453": 1.2815325260162354, + "454": 1.4068149328231812, + "455": 1.4612693786621094, + "456": 1.4176119565963745, + "457": 1.2853500843048096, + "458": 1.1139631271362305, + "459": 1.110557198524475, + "460": 1.296318531036377, + "461": 1.2464932203292847, + "462": 1.0801687240600586, + "463": 1.0897548198699951, + "464": 1.2457433938980103, + "465": 1.220635175704956, + "466": 1.0722063779830933, + "467": 1.0892789363861084, + "468": 1.1401158571243286, + "469": 1.1175123453140259, + "470": 1.1574429273605347, + "471": 1.2341843843460083, + "472": 1.1629436016082764, + "473": 1.1734954118728638, + "474": 1.2790582180023193, + "475": 1.3268572092056274, + "476": 1.3662433624267578, + "477": 1.3509211540222168, + "478": 1.3142013549804688, + "479": 1.226273536682129, + "480": 1.1497875452041626, + "481": 1.2131487131118774, + "482": 1.2446680068969727, + "483": 1.229183316230774, + "484": 1.1781516075134277, + "485": 1.1139146089553833 + }, + "loss": { + "432": 2.4437899589538574, + "433": 2.424687385559082, + "434": 2.4247090816497803, + "435": 2.410892963409424, + "436": 2.421461820602417, + "437": 2.403177261352539, + "438": 2.393446445465088, + "439": 2.3813343048095703, + "440": 2.402475595474243, + "441": 2.38427734375, + "442": 2.3911595344543457, + "443": 2.381458282470703, + "444": 2.41695237159729, + "445": 2.382270097732544, + "446": 2.384190559387207, + "447": 2.401710033416748, + "448": 2.4225590229034424, + "449": 2.476780891418457, + "450": 2.424055814743042, + "451": 2.422682046890259, + "452": 2.3920207023620605, + "453": 2.4021143913269043, + "454": 2.383765697479248, + "455": 2.4077653884887695, + "456": 2.377314329147339, + "457": 2.380800485610962, + "458": 2.3656201362609863, + "459": 2.37318754196167, + "460": 2.369588851928711, + "461": 2.3998665809631348, + "462": 2.369065761566162, + "463": 2.3675756454467773, + "464": 2.356987476348877, + "465": 2.3940508365631104, + "466": 2.352421522140503, + "467": 2.3660664558410645, + "468": 2.373870372772217, + "469": 2.375763416290283, + "470": 2.3583180904388428, + "471": 2.4042859077453613, + "472": 2.3731842041015625, + "473": 2.3786895275115967, + "474": 2.3778910636901855, + "475": 2.380321502685547, + "476": 2.3639659881591797, + "477": 2.400440216064453, + "478": 2.3682382106781006, + "479": 2.3443045616149902, + "480": 2.3650808334350586, + "481": 2.353428840637207, + "482": 2.377392053604126, + "483": 2.3923966884613037, + "484": 2.367537021636963, + "485": 2.3758602142333984 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "train_epoch_time": 4.792607307434082, + "train_loss": 2.3498305032037763, + "train_score": 0.3071209200656568, + "val_loss": 2.394291750462266, + "val_score": 0.2975522028250207 + }, + { + "epoch": 9, + "grad_norm": 1.1305856704711914, + "learning_rate": 0.1, + "model_norm": 87.52873992919922, + "step_logs": { + "grad_norm": { + "486": 1.1678463220596313, + "487": 1.1428059339523315, + "488": 1.0742453336715698, + "489": 1.1098260879516602, + "490": 1.2321646213531494, + "491": 1.35171639919281, + "492": 1.4551805257797241, + "493": 1.3502675294876099, + "494": 1.155698537826538, + "495": 1.1915760040283203, + "496": 1.2877423763275146, + "497": 1.3450067043304443, + "498": 1.28325617313385, + "499": 1.1674617528915405, + "500": 1.1079142093658447, + "501": 1.1063071489334106, + "502": 1.0905908346176147, + "503": 1.164880394935608, + "504": 1.2494115829467773, + "505": 1.2591413259506226, + "506": 1.3494834899902344, + "507": 1.3859891891479492, + "508": 1.3739418983459473, + "509": 1.3578109741210938, + "510": 1.335060954093933, + "511": 1.3035852909088135, + "512": 1.2293521165847778, + "513": 1.2602473497390747, + "514": 1.3364858627319336, + "515": 1.342375636100769, + "516": 1.1868771314620972, + "517": 1.070024013519287, + "518": 1.0482691526412964, + "519": 1.089015245437622, + "520": 1.2586957216262817, + "521": 1.3905738592147827, + "522": 1.5018579959869385, + "523": 1.577476143836975, + "524": 1.4661107063293457, + "525": 1.3028267621994019, + "526": 1.1227914094924927, + "527": 1.041851282119751, + "528": 1.1096352338790894, + "529": 1.17355215549469, + "530": 1.167136311531067, + "531": 1.151084303855896, + "532": 1.0863591432571411, + "533": 1.0845279693603516, + "534": 1.1236761808395386, + "535": 1.1533441543579102, + "536": 1.123315453529358, + "537": 1.0647786855697632, + "538": 1.0899544954299927, + "539": 1.1305856704711914 + }, + "loss": { + "486": 2.3578438758850098, + "487": 2.3592309951782227, + "488": 2.3549351692199707, + "489": 2.3621273040771484, + "490": 2.34975004196167, + "491": 2.368321180343628, + "492": 2.3742294311523438, + "493": 2.383321762084961, + "494": 2.345890998840332, + "495": 2.3425047397613525, + "496": 2.355344772338867, + "497": 2.3632798194885254, + "498": 2.3482437133789062, + "499": 2.353148937225342, + "500": 2.348996162414551, + "501": 2.339188575744629, + "502": 2.323655605316162, + "503": 2.349989891052246, + "504": 2.3400676250457764, + "505": 2.351712226867676, + "506": 2.3578600883483887, + "507": 2.350525379180908, + "508": 2.3507909774780273, + "509": 2.35506010055542, + "510": 2.3364036083221436, + "511": 2.357384204864502, + "512": 2.3415093421936035, + "513": 2.354396343231201, + "514": 2.3620967864990234, + "515": 2.3538894653320312, + "516": 2.3279030323028564, + "517": 2.30672025680542, + "518": 2.32662296295166, + "519": 2.3261046409606934, + "520": 2.326359748840332, + "521": 2.333083152770996, + "522": 2.329730749130249, + "523": 2.346832275390625, + "524": 2.361660957336426, + "525": 2.361846446990967, + "526": 2.3219640254974365, + "527": 2.3159098625183105, + "528": 2.334743022918701, + "529": 2.3389053344726562, + "530": 2.342283248901367, + "531": 2.3142433166503906, + "532": 2.300330400466919, + "533": 2.3037781715393066, + "534": 2.3184280395507812, + "535": 2.323700428009033, + "536": 2.326554775238037, + "537": 2.3354570865631104, + "538": 2.3264319896698, + "539": 2.3157200813293457 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "train_epoch_time": 4.792439699172974, + "train_loss": 2.309127076558095, + "train_score": 0.3164589310393614, + "val_loss": 2.35623586602107, + "val_score": 0.30715861878904216 + }, + { + "epoch": 10, + "grad_norm": 1.1796280145645142, + "learning_rate": 0.1, + "model_norm": 87.544189453125, + "step_logs": { + "grad_norm": { + "540": 1.161305546760559, + "541": 1.2283931970596313, + "542": 1.317402720451355, + "543": 1.3057178258895874, + "544": 1.2032058238983154, + "545": 1.14738929271698, + "546": 1.1586660146713257, + "547": 1.1149215698242188, + "548": 1.1587846279144287, + "549": 1.2024911642074585, + "550": 1.255492091178894, + "551": 1.1892679929733276, + "552": 1.1933448314666748, + "553": 1.3068780899047852, + "554": 1.3164958953857422, + "555": 1.3122694492340088, + "556": 1.2884422540664673, + "557": 1.221727728843689, + "558": 1.1230424642562866, + "559": 1.0691163539886475, + "560": 1.0665291547775269, + "561": 1.2649699449539185, + "562": 1.3877015113830566, + "563": 1.4894722700119019, + "564": 1.588516354560852, + "565": 1.434905767440796, + "566": 1.312994122505188, + "567": 1.328588843345642, + "568": 1.2876503467559814, + "569": 1.170223593711853, + "570": 1.1069793701171875, + "571": 1.1823673248291016, + "572": 1.4810973405838013, + "573": 1.5807106494903564, + "574": 1.4761574268341064, + "575": 1.3216222524642944, + "576": 1.1939986944198608, + "577": 1.1861799955368042, + "578": 1.235101342201233, + "579": 1.3082369565963745, + "580": 1.3869751691818237, + "581": 1.5206942558288574, + "582": 1.6433238983154297, + "583": 1.858879804611206, + "584": 1.9497158527374268, + "585": 1.6261554956436157, + "586": 1.1508755683898926, + "587": 1.0212258100509644, + "588": 0.9676312208175659, + "589": 1.0243943929672241, + "590": 1.1439132690429688, + "591": 1.2129299640655518, + "592": 1.2235816717147827, + "593": 1.1796280145645142 + }, + "loss": { + "540": 2.3171210289001465, + "541": 2.308004379272461, + "542": 2.315589427947998, + "543": 2.3284170627593994, + "544": 2.294079542160034, + "545": 2.32733154296875, + "546": 2.3148040771484375, + "547": 2.3080525398254395, + "548": 2.285910129547119, + "549": 2.3130908012390137, + "550": 2.3004648685455322, + "551": 2.328625202178955, + "552": 2.2756528854370117, + "553": 2.3235809803009033, + "554": 2.3086225986480713, + "555": 2.2931509017944336, + "556": 2.327099323272705, + "557": 2.2975168228149414, + "558": 2.2790915966033936, + "559": 2.289891242980957, + "560": 2.2908947467803955, + "561": 2.2981479167938232, + "562": 2.3118560314178467, + "563": 2.3111345767974854, + "564": 2.3224036693573, + "565": 2.306122303009033, + "566": 2.3056812286376953, + "567": 2.306670904159546, + "568": 2.3035149574279785, + "569": 2.2847745418548584, + "570": 2.2881369590759277, + "571": 2.282701253890991, + "572": 2.3043665885925293, + "573": 2.30226469039917, + "574": 2.3024446964263916, + "575": 2.2997875213623047, + "576": 2.3021397590637207, + "577": 2.295419216156006, + "578": 2.2787160873413086, + "579": 2.3125319480895996, + "580": 2.286283016204834, + "581": 2.335730791091919, + "582": 2.3143105506896973, + "583": 2.3098740577697754, + "584": 2.3531603813171387, + "585": 2.3374156951904297, + "586": 2.3087737560272217, + "587": 2.2460923194885254, + "588": 2.2396931648254395, + "589": 2.286303997039795, + "590": 2.27905011177063, + "591": 2.2785191535949707, + "592": 2.275733232498169, + "593": 2.2709147930145264 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "train_epoch_time": 4.792525291442871, + "train_loss": 2.263752429351916, + "train_score": 0.3371917592829235, + "val_loss": 2.308534987895278, + "val_score": 0.32576151765309846 + }, + { + "epoch": 11, + "grad_norm": 1.3685996532440186, + "learning_rate": 0.1, + "model_norm": 87.5596694946289, + "step_logs": { + "grad_norm": { + "594": 1.0865164995193481, + "595": 1.1329095363616943, + "596": 1.3010756969451904, + "597": 1.328014850616455, + "598": 1.3275071382522583, + "599": 1.3220125436782837, + "600": 1.1146235466003418, + "601": 1.0263105630874634, + "602": 1.0661321878433228, + "603": 1.1506239175796509, + "604": 1.1343413591384888, + "605": 1.104317545890808, + "606": 1.261232614517212, + "607": 1.3966481685638428, + "608": 1.2947789430618286, + "609": 1.2160342931747437, + "610": 1.1448036432266235, + "611": 1.0876213312149048, + "612": 0.9403430223464966, + "613": 0.954963207244873, + "614": 1.0463155508041382, + "615": 1.185558795928955, + "616": 1.34348464012146, + "617": 1.4730113744735718, + "618": 1.4631580114364624, + "619": 1.4056711196899414, + "620": 1.1971206665039062, + "621": 1.133603572845459, + "622": 1.261331558227539, + "623": 1.2839696407318115, + "624": 1.3165996074676514, + "625": 1.2759069204330444, + "626": 1.237928032875061, + "627": 1.2392375469207764, + "628": 1.3599416017532349, + "629": 1.5133132934570312, + "630": 1.6796135902404785, + "631": 2.099416732788086, + "632": 1.7489235401153564, + "633": 1.3765060901641846, + "634": 1.2964673042297363, + "635": 1.2260470390319824, + "636": 1.1240711212158203, + "637": 1.0787630081176758, + "638": 1.0385583639144897, + "639": 1.084102749824524, + "640": 1.1306864023208618, + "641": 1.2291929721832275, + "642": 1.3389650583267212, + "643": 1.4280952215194702, + "644": 1.4507155418395996, + "645": 1.438639760017395, + "646": 1.4191477298736572, + "647": 1.3685996532440186 + }, + "loss": { + "594": 2.279766321182251, + "595": 2.262509822845459, + "596": 2.25968861579895, + "597": 2.303629159927368, + "598": 2.248288154602051, + "599": 2.2764763832092285, + "600": 2.2789015769958496, + "601": 2.2551932334899902, + "602": 2.2632646560668945, + "603": 2.2653019428253174, + "604": 2.2451963424682617, + "605": 2.2576050758361816, + "606": 2.2759532928466797, + "607": 2.2747607231140137, + "608": 2.2684969902038574, + "609": 2.262915849685669, + "610": 2.242417335510254, + "611": 2.262439727783203, + "612": 2.2382800579071045, + "613": 2.2459542751312256, + "614": 2.2378830909729004, + "615": 2.2267580032348633, + "616": 2.27055287361145, + "617": 2.2495005130767822, + "618": 2.265409231185913, + "619": 2.2847390174865723, + "620": 2.252580165863037, + "621": 2.2570343017578125, + "622": 2.260960102081299, + "623": 2.264223098754883, + "624": 2.264275550842285, + "625": 2.252000331878662, + "626": 2.242096424102783, + "627": 2.2638721466064453, + "628": 2.2463698387145996, + "629": 2.2558932304382324, + "630": 2.2472801208496094, + "631": 2.301028251647949, + "632": 2.3061435222625732, + "633": 2.2686140537261963, + "634": 2.248030662536621, + "635": 2.2571120262145996, + "636": 2.221653938293457, + "637": 2.231825590133667, + "638": 2.227450132369995, + "639": 2.2359278202056885, + "640": 2.2498085498809814, + "641": 2.2468912601470947, + "642": 2.258052349090576, + "643": 2.229947566986084, + "644": 2.249079704284668, + "645": 2.262083053588867, + "646": 2.243039131164551, + "647": 2.2438366413116455 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "train_epoch_time": 4.792309522628784, + "train_loss": 2.23927902587004, + "train_score": 0.34341485840689334, + "val_loss": 2.2831107362950847, + "val_score": 0.33204918891088836 + }, + { + "epoch": 12, + "grad_norm": 0.6759992241859436, + "learning_rate": 0.1, + "model_norm": 87.57398986816406, + "step_logs": { + "grad_norm": { + "648": 1.3157293796539307, + "649": 1.2831898927688599, + "650": 1.242431879043579, + "651": 1.1514537334442139, + "652": 1.016585350036621, + "653": 0.9744150042533875, + "654": 1.1358592510223389, + "655": 1.2114596366882324, + "656": 1.149278163909912, + "657": 1.1524993181228638, + "658": 1.1944392919540405, + "659": 1.1589891910552979, + "660": 0.9806221127510071, + "661": 0.8652613162994385, + "662": 0.8761599063873291, + "663": 0.765261173248291, + "664": 0.7722328901290894, + "665": 0.782863199710846, + "666": 0.7870795130729675, + "667": 0.8094477653503418, + "668": 0.7380000352859497, + "669": 0.7013874053955078, + "670": 0.7496308088302612, + "671": 0.7957020401954651, + "672": 0.8425513505935669, + "673": 0.8213631510734558, + "674": 0.9047280550003052, + "675": 0.9603910446166992, + "676": 0.9602252244949341, + "677": 1.0297188758850098, + "678": 1.132676362991333, + "679": 1.2204123735427856, + "680": 1.1892904043197632, + "681": 1.067742943763733, + "682": 1.0063056945800781, + "683": 0.9673774242401123, + "684": 0.9493207335472107, + "685": 0.9398552775382996, + "686": 0.8061246871948242, + "687": 0.7620973587036133, + "688": 0.7281740307807922, + "689": 0.7557730078697205, + "690": 0.8111618757247925, + "691": 0.7774203419685364, + "692": 0.7169901728630066, + "693": 0.7558302879333496, + "694": 0.8023281097412109, + "695": 0.7568250894546509, + "696": 0.6141039729118347, + "697": 0.6380651593208313, + "698": 0.6953994035720825, + "699": 0.7113130688667297, + "700": 0.704087495803833, + "701": 0.6759992241859436 + }, + "loss": { + "648": 2.261910915374756, + "649": 2.2563204765319824, + "650": 2.244753837585449, + "651": 2.2408347129821777, + "652": 2.2249274253845215, + "653": 2.2067465782165527, + "654": 2.2221713066101074, + "655": 2.235966682434082, + "656": 2.222716808319092, + "657": 2.214606761932373, + "658": 2.2392733097076416, + "659": 2.2052741050720215, + "660": 2.205606698989868, + "661": 2.208386182785034, + "662": 2.2034988403320312, + "663": 2.1935505867004395, + "664": 2.197619915008545, + "665": 2.2146215438842773, + "666": 2.194058418273926, + "667": 2.215623378753662, + "668": 2.1855344772338867, + "669": 2.190002918243408, + "670": 2.205420970916748, + "671": 2.1891214847564697, + "672": 2.1700940132141113, + "673": 2.1814677715301514, + "674": 2.188023090362549, + "675": 2.1961395740509033, + "676": 2.1827468872070312, + "677": 2.205850601196289, + "678": 2.208791971206665, + "679": 2.1849048137664795, + "680": 2.1808977127075195, + "681": 2.1970322132110596, + "682": 2.2265472412109375, + "683": 2.189401388168335, + "684": 2.190937042236328, + "685": 2.194042682647705, + "686": 2.1837193965911865, + "687": 2.188961982727051, + "688": 2.1881442070007324, + "689": 2.1811366081237793, + "690": 2.1552915573120117, + "691": 2.179030418395996, + "692": 2.1814942359924316, + "693": 2.18145489692688, + "694": 2.1861605644226074, + "695": 2.2093653678894043, + "696": 2.1812477111816406, + "697": 2.1637752056121826, + "698": 2.185196876525879, + "699": 2.1589841842651367, + "700": 2.18300461769104, + "701": 2.1738545894622803 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "train_epoch_time": 4.791497468948364, + "train_loss": 2.169372323116921, + "train_score": 0.36573260401721663, + "val_loss": 2.2238867980878196, + "val_score": 0.3526747271538328 + }, + { + "epoch": 13, + "grad_norm": 0.49907785654067993, + "learning_rate": 0.06666666666666668, + "model_norm": 87.5830307006836, + "step_logs": { + "grad_norm": { + "702": 0.6368564963340759, + "703": 0.6076768040657043, + "704": 0.5592228770256042, + "705": 0.5513622760772705, + "706": 0.6080222725868225, + "707": 0.577868640422821, + "708": 0.6604942679405212, + "709": 0.6498759388923645, + "710": 0.665377140045166, + "711": 0.7255287766456604, + "712": 0.7604535818099976, + "713": 0.8307657837867737, + "714": 0.8735730648040771, + "715": 0.8782442212104797, + "716": 0.7650600075721741, + "717": 0.6790304780006409, + "718": 0.6845667362213135, + "719": 0.66389000415802, + "720": 0.6423768997192383, + "721": 0.5914143919944763, + "722": 0.532931923866272, + "723": 0.4888806641101837, + "724": 0.5077181458473206, + "725": 0.5264216661453247, + "726": 0.5784741640090942, + "727": 0.5645992159843445, + "728": 0.5294567942619324, + "729": 0.48763424158096313, + "730": 0.5541208386421204, + "731": 0.5630053877830505, + "732": 0.5812532901763916, + "733": 0.5510789752006531, + "734": 0.5447343587875366, + "735": 0.5688351988792419, + "736": 0.4924420118331909, + "737": 0.4757325053215027, + "738": 0.5324620604515076, + "739": 0.5173011422157288, + "740": 0.5379522442817688, + "741": 0.592404842376709, + "742": 0.5272946953773499, + "743": 0.5140841603279114, + "744": 0.5539537668228149, + "745": 0.5049985647201538, + "746": 0.5906404852867126, + "747": 0.5667707920074463, + "748": 0.5557032823562622, + "749": 0.5358092784881592, + "750": 0.49783727526664734, + "751": 0.5133266448974609, + "752": 0.5335384011268616, + "753": 0.5760951042175293, + "754": 0.5938552021980286, + "755": 0.49907785654067993 + }, + "loss": { + "702": 2.1720659732818604, + "703": 2.162989854812622, + "704": 2.1660091876983643, + "705": 2.1644506454467773, + "706": 2.175673484802246, + "707": 2.1532578468322754, + "708": 2.160410165786743, + "709": 2.147061824798584, + "710": 2.158083915710449, + "711": 2.156714916229248, + "712": 2.14506196975708, + "713": 2.169018268585205, + "714": 2.182749032974243, + "715": 2.1624374389648438, + "716": 2.1641082763671875, + "717": 2.172196388244629, + "718": 2.1627159118652344, + "719": 2.181286573410034, + "720": 2.1652560234069824, + "721": 2.151679754257202, + "722": 2.1626691818237305, + "723": 2.167447090148926, + "724": 2.1818296909332275, + "725": 2.151371955871582, + "726": 2.1428914070129395, + "727": 2.1450066566467285, + "728": 2.178149700164795, + "729": 2.150766372680664, + "730": 2.1568174362182617, + "731": 2.1730165481567383, + "732": 2.164250373840332, + "733": 2.153724193572998, + "734": 2.1604862213134766, + "735": 2.1655867099761963, + "736": 2.1356310844421387, + "737": 2.1577720642089844, + "738": 2.1455540657043457, + "739": 2.1675660610198975, + "740": 2.1476006507873535, + "741": 2.1399364471435547, + "742": 2.159788131713867, + "743": 2.153350830078125, + "744": 2.1600308418273926, + "745": 2.1429643630981445, + "746": 2.1565303802490234, + "747": 2.1383442878723145, + "748": 2.1562609672546387, + "749": 2.1520001888275146, + "750": 2.158112049102783, + "751": 2.134209394454956, + "752": 2.1595122814178467, + "753": 2.1421687602996826, + "754": 2.135690927505493, + "755": 2.1421546936035156 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "train_epoch_time": 4.79168963432312, + "train_loss": 2.143818064844249, + "train_score": 0.37153761659667345, + "val_loss": 2.199015952142043, + "val_score": 0.3556705655104531 + }, + { + "epoch": 14, + "grad_norm": 0.4956870675086975, + "learning_rate": 0.03333333333333334, + "model_norm": 87.58612823486328, + "step_logs": { + "grad_norm": { + "756": 0.49513155221939087, + "757": 0.5022679567337036, + "758": 0.5577079057693481, + "759": 0.515231728553772, + "760": 0.5745097398757935, + "761": 0.5916004776954651, + "762": 0.536749541759491, + "763": 0.5285218954086304, + "764": 0.541519284248352, + "765": 0.5248390436172485, + "766": 0.512719988822937, + "767": 0.5196036696434021, + "768": 0.4581640362739563, + "769": 0.5233047604560852, + "770": 0.5259172916412354, + "771": 0.5370526909828186, + "772": 0.4967109262943268, + "773": 0.5193302035331726, + "774": 0.5008170008659363, + "775": 0.5132573246955872, + "776": 0.5467756390571594, + "777": 0.5054141283035278, + "778": 0.5527999997138977, + "779": 0.5361348986625671, + "780": 0.49893510341644287, + "781": 0.5206290483474731, + "782": 0.5152865648269653, + "783": 0.5393097400665283, + "784": 0.5165121555328369, + "785": 0.48669740557670593, + "786": 0.5691501498222351, + "787": 0.45931774377822876, + "788": 0.5266578793525696, + "789": 0.5444711446762085, + "790": 0.5368290543556213, + "791": 0.5440704822540283, + "792": 0.4691430330276489, + "793": 0.47556206583976746, + "794": 0.5098633766174316, + "795": 0.5082263350486755, + "796": 0.48889461159706116, + "797": 0.5263949036598206, + "798": 0.458205908536911, + "799": 0.47433793544769287, + "800": 0.5163528323173523, + "801": 0.5056624412536621, + "802": 0.512822687625885, + "803": 0.46433424949645996, + "804": 0.5228627920150757, + "805": 0.4945848882198334, + "806": 0.47502943873405457, + "807": 0.5427560210227966, + "808": 0.48139405250549316, + "809": 0.4956870675086975 + }, + "loss": { + "756": 2.1292901039123535, + "757": 2.13224458694458, + "758": 2.156043767929077, + "759": 2.1326234340667725, + "760": 2.1132423877716064, + "761": 2.1552364826202393, + "762": 2.1544415950775146, + "763": 2.134969472885132, + "764": 2.147430419921875, + "765": 2.1315479278564453, + "766": 2.146841049194336, + "767": 2.156494140625, + "768": 2.139413356781006, + "769": 2.1242666244506836, + "770": 2.160593271255493, + "771": 2.150193214416504, + "772": 2.155601739883423, + "773": 2.139258861541748, + "774": 2.14971923828125, + "775": 2.1444602012634277, + "776": 2.1487326622009277, + "777": 2.1447412967681885, + "778": 2.1246066093444824, + "779": 2.1411261558532715, + "780": 2.1247427463531494, + "781": 2.1267518997192383, + "782": 2.150043249130249, + "783": 2.1429920196533203, + "784": 2.158691883087158, + "785": 2.129425287246704, + "786": 2.132561445236206, + "787": 2.1354730129241943, + "788": 2.1702489852905273, + "789": 2.1353976726531982, + "790": 2.1300811767578125, + "791": 2.145681381225586, + "792": 2.147080898284912, + "793": 2.1334328651428223, + "794": 2.1277124881744385, + "795": 2.1520814895629883, + "796": 2.121870517730713, + "797": 2.1433753967285156, + "798": 2.135822296142578, + "799": 2.1297056674957275, + "800": 2.145019769668579, + "801": 2.147047758102417, + "802": 2.1173782348632812, + "803": 2.122710943222046, + "804": 2.139861583709717, + "805": 2.134819507598877, + "806": 2.1377744674682617, + "807": 2.142080307006836, + "808": 2.116997241973877, + "809": 2.141411542892456 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "train_epoch_time": 4.79171347618103, + "train_loss": 2.135158681527443, + "train_score": 0.3732671270839111, + "val_loss": 2.1931466248771763, + "val_score": 0.35748690503609853 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:24:48.598058", + "final_model_norm": 87.58612823486328, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:23:07.792803", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 3.928489923477173, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.40995025634766, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 8.492406845092773, + "3": 5.133657455444336, + "4": 4.3554368019104, + "5": 8.35051441192627, + "6": 20.943737030029297, + "7": 8.242918014526367, + "8": 5.08157205581665, + "9": 3.359898567199707, + "10": 3.038463592529297, + "11": 3.604124069213867, + "12": 5.520622253417969, + "13": 6.1087188720703125, + "14": 4.094149112701416, + "15": 41.904884338378906, + "16": 3.4324355125427246, + "17": 13.723320007324219, + "18": 3.9538044929504395, + "19": 5.832334518432617, + "20": 3.944932699203491, + "21": 2.1189889907836914, + "22": 2.870297908782959, + "23": 3.1965341567993164, + "24": 5.530642986297607, + "25": 3.9083855152130127, + "26": 2.926786422729492, + "27": 4.4148430824279785, + "28": 4.216004848480225, + "29": 3.2344162464141846, + "30": 4.084825038909912, + "31": 19.7644100189209, + "32": 6.071712017059326, + "33": 2.593540668487549, + "34": 3.893601655960083, + "35": 4.604806900024414, + "36": 2.552725076675415, + "37": 3.1886115074157715, + "38": 8.129566192626953, + "39": 3.764469861984253, + "40": 4.238050937652588, + "41": 2.458381175994873, + "42": 9.120431900024414, + "43": 6.467569351196289, + "44": 4.2845048904418945, + "45": 4.342476844787598, + "46": 9.561224937438965, + "47": 5.701088905334473, + "48": 4.729899883270264, + "49": 3.9542646408081055, + "50": 11.983695030212402, + "51": 5.041728973388672, + "52": 5.196239948272705, + "53": 3.928489923477173 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.9338831901550293, + "3": 3.7350575923919678, + "4": 3.591062068939209, + "5": 3.5876054763793945, + "6": 4.030679702758789, + "7": 4.3120293617248535, + "8": 3.7534420490264893, + "9": 3.4793989658355713, + "10": 3.3799009323120117, + "11": 3.3830275535583496, + "12": 3.336364269256592, + "13": 3.4640421867370605, + "14": 3.2779111862182617, + "15": 3.5040996074676514, + "16": 3.3695948123931885, + "17": 6.5667901039123535, + "18": 3.649301528930664, + "19": 3.5469374656677246, + "20": 3.8657937049865723, + "21": 3.4264955520629883, + "22": 3.388160228729248, + "23": 3.492974281311035, + "24": 3.455472469329834, + "25": 3.8936333656311035, + "26": 3.418292999267578, + "27": 3.435255527496338, + "28": 3.6847920417785645, + "29": 3.361020088195801, + "30": 3.3433456420898438, + "31": 6.951195240020752, + "32": 5.358872413635254, + "33": 3.6707730293273926, + "34": 3.5337109565734863, + "35": 3.961641311645508, + "36": 3.6996350288391113, + "37": 3.5062618255615234, + "38": 3.81649112701416, + "39": 4.745336532592773, + "40": 4.115941047668457, + "41": 3.507040500640869, + "42": 3.8155136108398438, + "43": 5.306589126586914, + "44": 5.048976898193359, + "45": 4.098546981811523, + "46": 4.041048049926758, + "47": 5.62568473815918, + "48": 5.116580963134766, + "49": 4.063141822814941, + "50": 4.201789379119873, + "51": 6.328237533569336, + "52": 5.198306560516357, + "53": 3.7437965869903564 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "train_epoch_time": 4.793660640716553, + "train_loss": 7.0373294337750165, + "train_score": 0.1526093973619182, + "val_loss": 7.05570218647936, + "val_score": 0.15117322052182067 + }, + { + "epoch": 1, + "grad_norm": 3.1967504024505615, + "learning_rate": 0.1, + "model_norm": 87.35136413574219, + "step_logs": { + "grad_norm": { + "54": 20.8322696685791, + "55": 5.69713020324707, + "56": 4.769029140472412, + "57": 2.6155433654785156, + "58": 21.443958282470703, + "59": 4.483689785003662, + "60": 4.094512462615967, + "61": 4.553788661956787, + "62": 3.568303346633911, + "63": 3.3428092002868652, + "64": 3.310009241104126, + "65": 10.45778751373291, + "66": 3.8152976036071777, + "67": 4.708316326141357, + "68": 3.467155933380127, + "69": 1.7733898162841797, + "70": 2.766688585281372, + "71": 2.9359614849090576, + "72": 4.071628570556641, + "73": 3.1312408447265625, + "74": 2.2443907260894775, + "75": 8.656401634216309, + "76": 3.2381207942962646, + "77": 3.167780876159668, + "78": 3.455178737640381, + "79": 9.005704879760742, + "80": 3.1383450031280518, + "81": 3.4697580337524414, + "82": 3.2605271339416504, + "83": 5.5265421867370605, + "84": 3.0896403789520264, + "85": 3.085052251815796, + "86": 5.85503625869751, + "87": 3.1433417797088623, + "88": 3.538795232772827, + "89": 4.574154376983643, + "90": 2.9779162406921387, + "91": 2.368004560470581, + "92": 9.186367988586426, + "93": 3.1846976280212402, + "94": 2.9605798721313477, + "95": 2.9102890491485596, + "96": 3.2285008430480957, + "97": 3.100440740585327, + "98": 1.6425262689590454, + "99": 2.2215452194213867, + "100": 9.593896865844727, + "101": 3.614487648010254, + "102": 3.2893424034118652, + "103": 2.878619432449341, + "104": 0.9948781728744507, + "105": 1.8634400367736816, + "106": 7.743080139160156, + "107": 3.1967504024505615 + }, + "loss": { + "54": 7.0251545906066895, + "55": 6.5295729637146, + "56": 5.340699195861816, + "57": 3.5550639629364014, + "58": 4.719113826751709, + "59": 8.22577953338623, + "60": 6.953283786773682, + "61": 6.079184532165527, + "62": 5.362835884094238, + "63": 4.218271732330322, + "64": 3.491568088531494, + "65": 4.205788612365723, + "66": 5.975528717041016, + "67": 5.122035503387451, + "68": 4.456905364990234, + "69": 3.4620778560638428, + "70": 3.4128365516662598, + "71": 3.813060998916626, + "72": 3.4397783279418945, + "73": 4.167916297912598, + "74": 3.4135727882385254, + "75": 3.848520517349243, + "76": 5.363252639770508, + "77": 4.492628574371338, + "78": 3.6933460235595703, + "79": 3.979837417602539, + "80": 5.384007453918457, + "81": 4.589946269989014, + "82": 3.8715908527374268, + "83": 3.53947377204895, + "84": 4.503907203674316, + "85": 3.727509021759033, + "86": 3.5863893032073975, + "87": 4.5610175132751465, + "88": 3.7871623039245605, + "89": 3.5952672958374023, + "90": 4.161242961883545, + "91": 3.4317400455474854, + "92": 3.900672435760498, + "93": 5.428202152252197, + "94": 4.576373100280762, + "95": 3.8025903701782227, + "96": 3.4560348987579346, + "97": 3.8929941654205322, + "98": 3.376396894454956, + "99": 3.422096014022827, + "100": 3.8941197395324707, + "101": 5.518061637878418, + "102": 4.7969970703125, + "103": 3.895348310470581, + "104": 3.356802225112915, + "105": 3.353306770324707, + "106": 3.750880241394043, + "107": 5.0138325691223145 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "train_epoch_time": 4.791884183883667, + "train_loss": 4.236121246222957, + "train_score": 0.058572453354489344, + "val_loss": 4.240466636850421, + "val_score": 0.061486438019404316 + }, + { + "epoch": 2, + "grad_norm": 3.2641994953155518, + "learning_rate": 0.1, + "model_norm": 87.329833984375, + "step_logs": { + "grad_norm": { + "108": 3.091353178024292, + "109": 2.8348827362060547, + "110": 9.003742218017578, + "111": 2.9350898265838623, + "112": 2.8313305377960205, + "113": 2.802385091781616, + "114": 2.3809354305267334, + "115": 2.76755428314209, + "116": 5.0462212562561035, + "117": 2.8333096504211426, + "118": 2.7225873470306396, + "119": 6.37400484085083, + "120": 2.846667528152466, + "121": 2.7285897731781006, + "122": 1.2651418447494507, + "123": 2.024562120437622, + "124": 2.5707547664642334, + "125": 6.322628021240234, + "126": 2.8623361587524414, + "127": 2.727850914001465, + "128": 0.9342372417449951, + "129": 1.2566787004470825, + "130": 2.0622317790985107, + "131": 6.882542610168457, + "132": 3.343644380569458, + "133": 2.787292957305908, + "134": 1.7388594150543213, + "135": 5.157271385192871, + "136": 2.7840070724487305, + "137": 2.4878382682800293, + "138": 3.4821557998657227, + "139": 2.5549707412719727, + "140": 1.742350459098816, + "141": 2.621386766433716, + "142": 2.7173802852630615, + "143": 2.51566481590271, + "144": 2.563798666000366, + "145": 3.376544237136841, + "146": 2.679647922515869, + "147": 2.15069317817688, + "148": 2.3656318187713623, + "149": 1.2963240146636963, + "150": 3.189444065093994, + "151": 2.649932384490967, + "152": 1.1049697399139404, + "153": 1.5990302562713623, + "154": 1.2787617444992065, + "155": 1.2961159944534302, + "156": 3.5000383853912354, + "157": 2.621171236038208, + "158": 1.2562835216522217, + "159": 3.0938045978546143, + "160": 2.4794678688049316, + "161": 3.2641994953155518 + }, + "loss": { + "108": 4.230429649353027, + "109": 3.4804975986480713, + "110": 3.9522042274475098, + "111": 5.217118263244629, + "112": 4.460448741912842, + "113": 3.7666289806365967, + "114": 3.398249864578247, + "115": 3.635561466217041, + "116": 3.5293471813201904, + "117": 4.248546123504639, + "118": 3.544987440109253, + "119": 3.6407437324523926, + "120": 4.547821044921875, + "121": 3.848416805267334, + "122": 3.346789836883545, + "123": 3.369158983230591, + "124": 3.5375986099243164, + "125": 3.647054672241211, + "126": 4.53694486618042, + "127": 3.825772285461426, + "128": 3.331212282180786, + "129": 3.3430371284484863, + "130": 3.3759121894836426, + "131": 3.6432371139526367, + "132": 4.652709484100342, + "133": 3.9427027702331543, + "134": 3.3405842781066895, + "135": 3.507436513900757, + "136": 4.2222466468811035, + "137": 3.5759730339050293, + "138": 3.399026393890381, + "139": 3.8177061080932617, + "140": 3.261749267578125, + "141": 3.3000082969665527, + "142": 3.634598970413208, + "143": 3.3618576526641846, + "144": 3.549241542816162, + "145": 3.3747048377990723, + "146": 3.745640277862549, + "147": 3.2384514808654785, + "148": 3.2761054039001465, + "149": 3.289032459259033, + "150": 3.3667030334472656, + "151": 3.7193541526794434, + "152": 3.256500244140625, + "153": 3.2474305629730225, + "154": 3.222397804260254, + "155": 3.2512006759643555, + "156": 3.3207297325134277, + "157": 3.6948719024658203, + "158": 3.165006160736084, + "159": 3.186288833618164, + "160": 3.476701498031616, + "161": 3.25138258934021 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "train_epoch_time": 4.7920238971710205, + "train_loss": 3.588182293428068, + "train_score": 0.11997063305319809, + "val_loss": 3.607787262832256, + "val_score": 0.11555503753583822 + }, + { + "epoch": 3, + "grad_norm": 1.3324508666992188, + "learning_rate": 0.1, + "model_norm": 87.85812377929688, + "step_logs": { + "grad_norm": { + "162": 2.824432134628296, + "163": 0.671918511390686, + "164": 0.6593815684318542, + "165": 1.7013866901397705, + "166": 0.708214521408081, + "167": 0.6305429935455322, + "168": 0.8797547817230225, + "169": 2.1628949642181396, + "170": 2.7668163776397705, + "171": 2.6776111125946045, + "172": 8.3070068359375, + "173": 1.7021747827529907, + "174": 2.527327537536621, + "175": 3.311972141265869, + "176": 4.841958999633789, + "177": 3.6141247749328613, + "178": 3.2142221927642822, + "179": 4.1299824714660645, + "180": 2.6084084510803223, + "181": 28.179595947265625, + "182": 1.637022852897644, + "183": 89.59008026123047, + "184": 3.3377041816711426, + "185": 3.0138769149780273, + "186": 5.9667534828186035, + "187": 3.6820621490478516, + "188": 3.018510103225708, + "189": 2.218217372894287, + "190": 2.8095602989196777, + "191": 4.044013977050781, + "192": 2.6811869144439697, + "193": 1.3078272342681885, + "194": 2.956500768661499, + "195": 2.7388901710510254, + "196": 1.413649559020996, + "197": 1.7727783918380737, + "198": 4.475114822387695, + "199": 2.8780641555786133, + "200": 2.163017511367798, + "201": 3.7061965465545654, + "202": 2.895972967147827, + "203": 1.4888063669204712, + "204": 1.859369158744812, + "205": 2.2908899784088135, + "206": 3.5580227375030518, + "207": 2.626107931137085, + "208": 1.303713321685791, + "209": 1.16728937625885, + "210": 1.6314259767532349, + "211": 3.220306634902954, + "212": 2.8753163814544678, + "213": 1.4265707731246948, + "214": 0.87506502866745, + "215": 1.3324508666992188 + }, + "loss": { + "162": 3.6087803840637207, + "163": 3.1152126789093018, + "164": 3.039452075958252, + "165": 3.073056697845459, + "166": 3.0707786083221436, + "167": 3.107726812362671, + "168": 3.0354068279266357, + "169": 3.02984619140625, + "170": 3.068946361541748, + "171": 3.366084575653076, + "172": 3.1058127880096436, + "173": 3.1386094093322754, + "174": 3.136183738708496, + "175": 3.2795944213867188, + "176": 3.295351505279541, + "177": 3.6594529151916504, + "178": 3.1454052925109863, + "179": 3.2168116569519043, + "180": 3.4305734634399414, + "181": 3.4626173973083496, + "182": 3.180250644683838, + "183": 6.397340774536133, + "184": 3.869750499725342, + "185": 3.7022652626037598, + "186": 3.723776340484619, + "187": 4.366543769836426, + "188": 3.800497055053711, + "189": 3.2572691440582275, + "190": 3.449812889099121, + "191": 3.408133029937744, + "192": 3.788762092590332, + "193": 3.215562343597412, + "194": 3.25341796875, + "195": 3.5887646675109863, + "196": 3.187283992767334, + "197": 3.2056093215942383, + "198": 3.386817455291748, + "199": 3.8443965911865234, + "200": 3.2804689407348633, + "201": 3.272469997406006, + "202": 3.664766788482666, + "203": 3.2230629920959473, + "204": 3.1280860900878906, + "205": 3.25093674659729, + "206": 3.2604851722717285, + "207": 3.567448616027832, + "208": 3.109919548034668, + "209": 3.0726699829101562, + "210": 3.1369528770446777, + "211": 3.1474156379699707, + "212": 3.4623429775238037, + "213": 3.111325979232788, + "214": 3.050870895385742, + "215": 3.0334606170654297 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "train_epoch_time": 4.788628578186035, + "train_loss": 3.0937729036448847, + "train_score": 0.14534164279314504, + "val_loss": 3.116196415591322, + "val_score": 0.14355805131310573 + }, + { + "epoch": 4, + "grad_norm": 1.835491418838501, + "learning_rate": 0.1, + "model_norm": 87.8695297241211, + "step_logs": { + "grad_norm": { + "216": 1.8210769891738892, + "217": 3.1851627826690674, + "218": 2.7567389011383057, + "219": 1.3343204259872437, + "220": 1.0685020685195923, + "221": 1.6193506717681885, + "222": 2.0600414276123047, + "223": 2.947606086730957, + "224": 2.5700862407684326, + "225": 1.65345299243927, + "226": 1.679834246635437, + "227": 2.455029249191284, + "228": 2.3733439445495605, + "229": 1.955888271331787, + "230": 1.9781748056411743, + "231": 2.608536720275879, + "232": 2.326899766921997, + "233": 1.7234989404678345, + "234": 1.6420572996139526, + "235": 2.279515266418457, + "236": 2.1274642944335938, + "237": 1.75114107131958, + "238": 1.8417428731918335, + "239": 2.201796531677246, + "240": 2.1251626014709473, + "241": 1.738152265548706, + "242": 1.7395765781402588, + "243": 1.9702775478363037, + "244": 2.001551628112793, + "245": 2.015592575073242, + "246": 2.0610828399658203, + "247": 2.0354769229888916, + "248": 1.9832501411437988, + "249": 1.9828920364379883, + "250": 1.8790068626403809, + "251": 1.7271981239318848, + "252": 1.811844825744629, + "253": 1.9391674995422363, + "254": 1.960034966468811, + "255": 1.9865453243255615, + "256": 1.986283540725708, + "257": 1.9122625589370728, + "258": 1.892019510269165, + "259": 1.8487002849578857, + "260": 1.751713514328003, + "261": 1.7176132202148438, + "262": 1.8376508951187134, + "263": 2.0424368381500244, + "264": 1.9755114316940308, + "265": 1.726958990097046, + "266": 1.7347079515457153, + "267": 1.7232612371444702, + "268": 1.720772624015808, + "269": 1.835491418838501 + }, + "loss": { + "216": 3.0981698036193848, + "217": 3.1354355812072754, + "218": 3.3637356758117676, + "219": 3.0626955032348633, + "220": 3.0227108001708984, + "221": 3.0190038681030273, + "222": 3.092155933380127, + "223": 3.1007096767425537, + "224": 3.285460948944092, + "225": 2.9957432746887207, + "226": 3.0422234535217285, + "227": 3.03903865814209, + "228": 3.1799209117889404, + "229": 3.0033092498779297, + "230": 3.0493030548095703, + "231": 3.0282745361328125, + "232": 3.1725730895996094, + "233": 2.973391056060791, + "234": 2.9965219497680664, + "235": 2.9718401432037354, + "236": 3.095028877258301, + "237": 2.9446334838867188, + "238": 2.9990029335021973, + "239": 2.9554476737976074, + "240": 3.0799272060394287, + "241": 2.9496467113494873, + "242": 2.9689407348632812, + "243": 2.9472358226776123, + "244": 3.0029308795928955, + "245": 2.9216151237487793, + "246": 3.0016770362854004, + "247": 2.920720338821411, + "248": 3.013965606689453, + "249": 2.942023754119873, + "250": 2.96042799949646, + "251": 2.8988468647003174, + "252": 2.9738144874572754, + "253": 2.889207124710083, + "254": 2.954322338104248, + "255": 2.903627395629883, + "256": 2.9620203971862793, + "257": 2.906479835510254, + "258": 2.93631649017334, + "259": 2.8859314918518066, + "260": 2.903928756713867, + "261": 2.8633675575256348, + "262": 2.911755084991455, + "263": 2.8941762447357178, + "264": 2.9624555110931396, + "265": 2.855073928833008, + "266": 2.8961992263793945, + "267": 2.866568088531494, + "268": 2.891225576400757, + "269": 2.8544578552246094 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "train_epoch_time": 4.789099454879761, + "train_loss": 2.8964361739466487, + "train_score": 0.1781754394087251, + "val_loss": 2.9152807860916434, + "val_score": 0.17769176977441725 + }, + { + "epoch": 5, + "grad_norm": 1.263764500617981, + "learning_rate": 0.1, + "model_norm": 87.87779235839844, + "step_logs": { + "grad_norm": { + "270": 1.8306066989898682, + "271": 1.689879059791565, + "272": 1.6475777626037598, + "273": 1.7084754705429077, + "274": 1.7558374404907227, + "275": 1.646780014038086, + "276": 1.6305023431777954, + "277": 1.7640175819396973, + "278": 1.7945770025253296, + "279": 1.5457614660263062, + "280": 1.477850317955017, + "281": 1.6258735656738281, + "282": 1.700717806816101, + "283": 1.5992242097854614, + "284": 1.53714120388031, + "285": 1.4478787183761597, + "286": 1.4500188827514648, + "287": 1.526047706604004, + "288": 1.487168550491333, + "289": 1.3725082874298096, + "290": 1.361341953277588, + "291": 1.4069572687149048, + "292": 1.4598197937011719, + "293": 1.640318512916565, + "294": 1.6395676136016846, + "295": 1.4420651197433472, + "296": 1.4571055173873901, + "297": 1.5050864219665527, + "298": 1.4598448276519775, + "299": 1.4728896617889404, + "300": 1.5455671548843384, + "301": 1.7616255283355713, + "302": 1.6545928716659546, + "303": 1.3951133489608765, + "304": 1.4193669557571411, + "305": 1.414574384689331, + "306": 1.4064468145370483, + "307": 1.388370394706726, + "308": 1.4153351783752441, + "309": 1.5999380350112915, + "310": 1.6131445169448853, + "311": 1.439826250076294, + "312": 1.421922206878662, + "313": 1.4740266799926758, + "314": 1.471842646598816, + "315": 1.5004557371139526, + "316": 1.4952611923217773, + "317": 1.4506856203079224, + "318": 1.4390641450881958, + "319": 1.3931269645690918, + "320": 1.4123622179031372, + "321": 1.3545799255371094, + "322": 1.3484094142913818, + "323": 1.263764500617981 + }, + "loss": { + "270": 2.9038162231445312, + "271": 2.847886562347412, + "272": 2.8594350814819336, + "273": 2.8455896377563477, + "274": 2.875256061553955, + "275": 2.827949047088623, + "276": 2.830427885055542, + "277": 2.841245174407959, + "278": 2.8708105087280273, + "279": 2.8325061798095703, + "280": 2.8047170639038086, + "281": 2.814955711364746, + "282": 2.865180253982544, + "283": 2.8153252601623535, + "284": 2.822352886199951, + "285": 2.7974014282226562, + "286": 2.809227466583252, + "287": 2.782064437866211, + "288": 2.813408851623535, + "289": 2.772481679916382, + "290": 2.7815897464752197, + "291": 2.772855758666992, + "292": 2.793900489807129, + "293": 2.775840997695923, + "294": 2.843992233276367, + "295": 2.772657871246338, + "296": 2.777970552444458, + "297": 2.790752410888672, + "298": 2.803579568862915, + "299": 2.7560462951660156, + "300": 2.7929444313049316, + "301": 2.8014888763427734, + "302": 2.831510543823242, + "303": 2.758927345275879, + "304": 2.78273606300354, + "305": 2.7754967212677, + "306": 2.7944374084472656, + "307": 2.7461819648742676, + "308": 2.778132200241089, + "309": 2.7810895442962646, + "310": 2.79830002784729, + "311": 2.773873805999756, + "312": 2.770893096923828, + "313": 2.755842685699463, + "314": 2.7996699810028076, + "315": 2.762727975845337, + "316": 2.759260654449463, + "317": 2.7637434005737305, + "318": 2.7653982639312744, + "319": 2.744609832763672, + "320": 2.7802443504333496, + "321": 2.743741750717163, + "322": 2.754742383956909, + "323": 2.7466845512390137 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "train_epoch_time": 4.7897584438323975, + "train_loss": 2.7485697853000812, + "train_score": 0.23732738516252044, + "val_loss": 2.765530650569158, + "val_score": 0.23109662033249673 + }, + { + "epoch": 6, + "grad_norm": 1.1934714317321777, + "learning_rate": 0.1, + "model_norm": 87.8837890625, + "step_logs": { + "grad_norm": { + "324": 1.212211012840271, + "325": 1.3444231748580933, + "326": 1.3861666917800903, + "327": 1.4266451597213745, + "328": 1.470704197883606, + "329": 1.569632649421692, + "330": 1.5838505029678345, + "331": 1.5241219997406006, + "332": 1.497305989265442, + "333": 1.4816832542419434, + "334": 1.462012767791748, + "335": 1.363344430923462, + "336": 1.3326537609100342, + "337": 1.3990378379821777, + "338": 1.3750602006912231, + "339": 1.2829521894454956, + "340": 1.2796564102172852, + "341": 1.3192286491394043, + "342": 1.3997341394424438, + "343": 1.3237347602844238, + "344": 1.2693843841552734, + "345": 1.2844293117523193, + "346": 1.3697872161865234, + "347": 1.5104541778564453, + "348": 1.4591901302337646, + "349": 1.3731815814971924, + "350": 1.3856337070465088, + "351": 1.3679394721984863, + "352": 1.299026370048523, + "353": 1.2451119422912598, + "354": 1.2331608533859253, + "355": 1.387474536895752, + "356": 1.4008287191390991, + "357": 1.3250088691711426, + "358": 1.2733795642852783, + "359": 1.1614477634429932, + "360": 1.172279953956604, + "361": 1.2362662553787231, + "362": 1.28485107421875, + "363": 1.3219964504241943, + "364": 1.359808325767517, + "365": 1.4506912231445312, + "366": 1.502313256263733, + "367": 1.53559410572052, + "368": 1.5011998414993286, + "369": 1.4331663846969604, + "370": 1.4088222980499268, + "371": 1.4210195541381836, + "372": 1.372637152671814, + "373": 1.2787890434265137, + "374": 1.3293178081512451, + "375": 1.4793283939361572, + "376": 1.3737612962722778, + "377": 1.1934714317321777 + }, + "loss": { + "324": 2.7605576515197754, + "325": 2.768967390060425, + "326": 2.7654802799224854, + "327": 2.748854398727417, + "328": 2.7728044986724854, + "329": 2.743345260620117, + "330": 2.7956578731536865, + "331": 2.756959915161133, + "332": 2.7566184997558594, + "333": 2.7495181560516357, + "334": 2.7496824264526367, + "335": 2.7624990940093994, + "336": 2.744152069091797, + "337": 2.737061023712158, + "338": 2.7730588912963867, + "339": 2.727717161178589, + "340": 2.7479336261749268, + "341": 2.7435193061828613, + "342": 2.738661289215088, + "343": 2.731874465942383, + "344": 2.736388683319092, + "345": 2.722999095916748, + "346": 2.7389166355133057, + "347": 2.747027635574341, + "348": 2.76369309425354, + "349": 2.735213041305542, + "350": 2.745793342590332, + "351": 2.737874984741211, + "352": 2.736140727996826, + "353": 2.7206907272338867, + "354": 2.719184398651123, + "355": 2.7062129974365234, + "356": 2.752830982208252, + "357": 2.724489688873291, + "358": 2.7392067909240723, + "359": 2.7085227966308594, + "360": 2.713923454284668, + "361": 2.7098302841186523, + "362": 2.7200355529785156, + "363": 2.7300682067871094, + "364": 2.7280852794647217, + "365": 2.7351455688476562, + "366": 2.733548164367676, + "367": 2.7467041015625, + "368": 2.7625515460968018, + "369": 2.743311882019043, + "370": 2.7539706230163574, + "371": 2.7306618690490723, + "372": 2.736286163330078, + "373": 2.7176260948181152, + "374": 2.7371134757995605, + "375": 2.718600273132324, + "376": 2.752563953399658, + "377": 2.703352451324463 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "train_epoch_time": 4.7895894050598145, + "train_loss": 2.720357865685199, + "train_score": 0.24113948169021387, + "val_loss": 2.739208456843099, + "val_score": 0.23535268352581903 + }, + { + "epoch": 7, + "grad_norm": 1.2527161836624146, + "learning_rate": 0.1, + "model_norm": 87.88997650146484, + "step_logs": { + "grad_norm": { + "378": 1.1807222366333008, + "379": 1.0899643898010254, + "380": 1.0149189233779907, + "381": 1.024212121963501, + "382": 1.056444764137268, + "383": 1.1337379217147827, + "384": 1.2161318063735962, + "385": 1.2631144523620605, + "386": 1.2693703174591064, + "387": 1.309577226638794, + "388": 1.4319877624511719, + "389": 1.4681731462478638, + "390": 1.3460105657577515, + "391": 1.213484764099121, + "392": 1.1490771770477295, + "393": 1.1601296663284302, + "394": 1.2059359550476074, + "395": 1.244826078414917, + "396": 1.192064881324768, + "397": 1.1367443799972534, + "398": 1.1892452239990234, + "399": 1.3832355737686157, + "400": 1.4787708520889282, + "401": 1.5027453899383545, + "402": 1.4790802001953125, + "403": 1.4165902137756348, + "404": 1.4307981729507446, + "405": 1.406058430671692, + "406": 1.4000561237335205, + "407": 1.5383082628250122, + "408": 1.4830294847488403, + "409": 1.154307246208191, + "410": 1.07868492603302, + "411": 1.0703961849212646, + "412": 1.0097122192382812, + "413": 0.9366562366485596, + "414": 0.9800765514373779, + "415": 1.0790947675704956, + "416": 1.1231735944747925, + "417": 1.156981348991394, + "418": 1.1893287897109985, + "419": 1.2710999250411987, + "420": 1.3180030584335327, + "421": 1.2941913604736328, + "422": 1.3021388053894043, + "423": 1.4313437938690186, + "424": 1.3214612007141113, + "425": 1.124120831489563, + "426": 1.1646090745925903, + "427": 1.2193467617034912, + "428": 1.1915640830993652, + "429": 1.2326442003250122, + "430": 1.2742655277252197, + "431": 1.2527161836624146 + }, + "loss": { + "378": 2.7315673828125, + "379": 2.6979758739471436, + "380": 2.7075541019439697, + "381": 2.6885218620300293, + "382": 2.697160243988037, + "383": 2.706719398498535, + "384": 2.719973087310791, + "385": 2.691807270050049, + "386": 2.728854179382324, + "387": 2.708617687225342, + "388": 2.716881275177002, + "389": 2.7154035568237305, + "390": 2.751321792602539, + "391": 2.706925868988037, + "392": 2.703634738922119, + "393": 2.7008211612701416, + "394": 2.69789719581604, + "395": 2.689706802368164, + "396": 2.716561794281006, + "397": 2.6960606575012207, + "398": 2.7129335403442383, + "399": 2.7062387466430664, + "400": 2.7242417335510254, + "401": 2.724435329437256, + "402": 2.740177631378174, + "403": 2.713056802749634, + "404": 2.7444705963134766, + "405": 2.7041895389556885, + "406": 2.7081923484802246, + "407": 2.706737518310547, + "408": 2.7492432594299316, + "409": 2.7043237686157227, + "410": 2.6978580951690674, + "411": 2.698662757873535, + "412": 2.7073848247528076, + "413": 2.6918282508850098, + "414": 2.6825029850006104, + "415": 2.6828842163085938, + "416": 2.6918365955352783, + "417": 2.688469171524048, + "418": 2.7077364921569824, + "419": 2.6961145401000977, + "420": 2.710808277130127, + "421": 2.6898560523986816, + "422": 2.7140374183654785, + "423": 2.717118263244629, + "424": 2.709707736968994, + "425": 2.677877902984619, + "426": 2.682298183441162, + "427": 2.6855239868164062, + "428": 2.684274673461914, + "429": 2.6895804405212402, + "430": 2.6932625770568848, + "431": 2.704105854034424 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "train_epoch_time": 4.789607048034668, + "train_loss": 2.698752693671578, + "train_score": 0.24680662656275065, + "val_loss": 2.717042813481748, + "val_score": 0.24142060147429442 + }, + { + "epoch": 8, + "grad_norm": 1.1504567861557007, + "learning_rate": 0.1, + "model_norm": 87.89695739746094, + "step_logs": { + "grad_norm": { + "432": 1.2470049858093262, + "433": 1.289585828781128, + "434": 1.2592664957046509, + "435": 1.1862597465515137, + "436": 1.1151883602142334, + "437": 0.989433765411377, + "438": 1.0614880323410034, + "439": 1.133050560951233, + "440": 1.1500543355941772, + "441": 1.1999155282974243, + "442": 1.3118451833724976, + "443": 1.3277556896209717, + "444": 1.2784500122070312, + "445": 1.2585184574127197, + "446": 1.187974452972412, + "447": 1.1775364875793457, + "448": 1.1956790685653687, + "449": 1.1144866943359375, + "450": 0.97844398021698, + "451": 0.8708642721176147, + "452": 0.8820230960845947, + "453": 0.9199129939079285, + "454": 1.0073786973953247, + "455": 1.1868746280670166, + "456": 1.2747303247451782, + "457": 1.4342340230941772, + "458": 1.608930230140686, + "459": 1.78340744972229, + "460": 1.8361918926239014, + "461": 1.6384437084197998, + "462": 1.328623652458191, + "463": 1.1527019739151, + "464": 1.1041613817214966, + "465": 1.1172404289245605, + "466": 1.0322593450546265, + "467": 0.9617031216621399, + "468": 0.9166104197502136, + "469": 0.9106884598731995, + "470": 0.9628452062606812, + "471": 1.0301531553268433, + "472": 0.9843339323997498, + "473": 0.9334664344787598, + "474": 0.9433060884475708, + "475": 0.9796757102012634, + "476": 1.0614765882492065, + "477": 1.0917699337005615, + "478": 1.0538979768753052, + "479": 1.1657098531723022, + "480": 1.2200924158096313, + "481": 1.1883965730667114, + "482": 1.1627167463302612, + "483": 1.1036138534545898, + "484": 1.1021698713302612, + "485": 1.1504567861557007 + }, + "loss": { + "432": 2.713374614715576, + "433": 2.671968460083008, + "434": 2.7096199989318848, + "435": 2.6906604766845703, + "436": 2.68302321434021, + "437": 2.6825802326202393, + "438": 2.670771837234497, + "439": 2.689758777618408, + "440": 2.6748640537261963, + "441": 2.6890697479248047, + "442": 2.692525625228882, + "443": 2.696845531463623, + "444": 2.7210817337036133, + "445": 2.6859681606292725, + "446": 2.6838626861572266, + "447": 2.6948540210723877, + "448": 2.692143440246582, + "449": 2.6692957878112793, + "450": 2.6747798919677734, + "451": 2.663140058517456, + "452": 2.659381151199341, + "453": 2.6430397033691406, + "454": 2.6683623790740967, + "455": 2.675079822540283, + "456": 2.6917381286621094, + "457": 2.6812214851379395, + "458": 2.715550422668457, + "459": 2.704169750213623, + "460": 2.742527484893799, + "461": 2.7332534790039062, + "462": 2.68533992767334, + "463": 2.67154860496521, + "464": 2.663553476333618, + "465": 2.6680541038513184, + "466": 2.658794641494751, + "467": 2.66171932220459, + "468": 2.677323341369629, + "469": 2.660294771194458, + "470": 2.6684353351593018, + "471": 2.659008741378784, + "472": 2.65541934967041, + "473": 2.661707878112793, + "474": 2.662236452102661, + "475": 2.660902500152588, + "476": 2.6765389442443848, + "477": 2.6452293395996094, + "478": 2.660407781600952, + "479": 2.6676993370056152, + "480": 2.6760501861572266, + "481": 2.648056745529175, + "482": 2.684999704360962, + "483": 2.657564878463745, + "484": 2.650477409362793, + "485": 2.6608033180236816 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "train_epoch_time": 4.79047703742981, + "train_loss": 2.6679309994794718, + "train_score": 0.2502947901080294, + "val_loss": 2.6905030358398823, + "val_score": 0.24513849013845354 + }, + { + "epoch": 9, + "grad_norm": 1.0001332759857178, + "learning_rate": 0.1, + "model_norm": 87.90438842773438, + "step_logs": { + "grad_norm": { + "486": 1.1612510681152344, + "487": 1.062957525253296, + "488": 1.078001618385315, + "489": 1.1932837963104248, + "490": 1.1981325149536133, + "491": 1.1555547714233398, + "492": 1.1137977838516235, + "493": 1.1161729097366333, + "494": 1.1411128044128418, + "495": 1.2163426876068115, + "496": 1.2052350044250488, + "497": 1.0639222860336304, + "498": 0.9671124815940857, + "499": 0.9194662570953369, + "500": 0.9014149308204651, + "501": 0.8693838119506836, + "502": 0.910906195640564, + "503": 0.8739429712295532, + "504": 0.8550460934638977, + "505": 0.9850298166275024, + "506": 1.0261270999908447, + "507": 1.009405493736267, + "508": 0.9884840250015259, + "509": 1.0212205648422241, + "510": 1.177075982093811, + "511": 1.287395715713501, + "512": 1.207298994064331, + "513": 1.0814963579177856, + "514": 1.014443039894104, + "515": 1.0257301330566406, + "516": 1.0380085706710815, + "517": 1.0529162883758545, + "518": 1.0382617712020874, + "519": 1.0139249563217163, + "520": 0.9397279620170593, + "521": 0.9486550688743591, + "522": 0.9386362433433533, + "523": 0.8969469666481018, + "524": 0.8797743916511536, + "525": 0.8516401052474976, + "526": 0.8083978891372681, + "527": 0.7750649452209473, + "528": 0.7665368914604187, + "529": 0.8149704933166504, + "530": 0.8175081014633179, + "531": 0.8571537137031555, + "532": 0.8864110708236694, + "533": 0.9360243678092957, + "534": 1.0125856399536133, + "535": 1.0238449573516846, + "536": 1.0681442022323608, + "537": 1.0233536958694458, + "538": 0.9476478099822998, + "539": 1.0001332759857178 + }, + "loss": { + "486": 2.6660804748535156, + "487": 2.6628355979919434, + "488": 2.666457414627075, + "489": 2.6530027389526367, + "490": 2.646252393722534, + "491": 2.6584932804107666, + "492": 2.654371738433838, + "493": 2.6585822105407715, + "494": 2.6463801860809326, + "495": 2.648942708969116, + "496": 2.662529468536377, + "497": 2.64328932762146, + "498": 2.6459012031555176, + "499": 2.6344614028930664, + "500": 2.6539177894592285, + "501": 2.6388461589813232, + "502": 2.6546387672424316, + "503": 2.635089874267578, + "504": 2.638031482696533, + "505": 2.646827459335327, + "506": 2.6532065868377686, + "507": 2.6290059089660645, + "508": 2.6561617851257324, + "509": 2.64959979057312, + "510": 2.648609161376953, + "511": 2.654461622238159, + "512": 2.674318313598633, + "513": 2.6441245079040527, + "514": 2.6411540508270264, + "515": 2.6309056282043457, + "516": 2.6483817100524902, + "517": 2.6443352699279785, + "518": 2.6486361026763916, + "519": 2.6380200386047363, + "520": 2.62508487701416, + "521": 2.643843650817871, + "522": 2.6435084342956543, + "523": 2.6433770656585693, + "524": 2.670839309692383, + "525": 2.6234757900238037, + "526": 2.6334450244903564, + "527": 2.6266393661499023, + "528": 2.6260743141174316, + "529": 2.6110000610351562, + "530": 2.621337413787842, + "531": 2.614391803741455, + "532": 2.635751485824585, + "533": 2.6307735443115234, + "534": 2.6422643661499023, + "535": 2.640986919403076, + "536": 2.653642177581787, + "537": 2.622880458831787, + "538": 2.6292803287506104, + "539": 2.642066717147827 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "train_epoch_time": 4.789014577865601, + "train_loss": 2.636936020885342, + "train_score": 0.25737647956378834, + "val_loss": 2.6576797067092564, + "val_score": 0.251358890779804 + }, + { + "epoch": 10, + "grad_norm": 1.3132293224334717, + "learning_rate": 0.1, + "model_norm": 87.91010284423828, + "step_logs": { + "grad_norm": { + "540": 1.0130642652511597, + "541": 1.026869535446167, + "542": 1.059008002281189, + "543": 1.1466665267944336, + "544": 1.166563868522644, + "545": 1.1260216236114502, + "546": 1.1005512475967407, + "547": 1.0809563398361206, + "548": 1.065432071685791, + "549": 0.8896036148071289, + "550": 0.764809787273407, + "551": 0.7022968530654907, + "552": 0.8188730478286743, + "553": 0.9863724708557129, + "554": 1.1375645399093628, + "555": 1.349520206451416, + "556": 1.4614402055740356, + "557": 1.4614065885543823, + "558": 1.5128509998321533, + "559": 1.4895579814910889, + "560": 1.3585960865020752, + "561": 1.3409488201141357, + "562": 1.341147541999817, + "563": 1.2098031044006348, + "564": 1.110071063041687, + "565": 1.2046527862548828, + "566": 1.2403206825256348, + "567": 1.3031007051467896, + "568": 1.327358365058899, + "569": 1.2581506967544556, + "570": 1.1368353366851807, + "571": 1.0242462158203125, + "572": 1.040698528289795, + "573": 1.0046101808547974, + "574": 0.9578930139541626, + "575": 0.9442002177238464, + "576": 0.9903170466423035, + "577": 0.9429300427436829, + "578": 0.946691632270813, + "579": 0.9224731922149658, + "580": 0.8999906778335571, + "581": 0.8905678391456604, + "582": 0.888059675693512, + "583": 0.9678447246551514, + "584": 1.045638084411621, + "585": 1.0738847255706787, + "586": 1.1046018600463867, + "587": 1.049461841583252, + "588": 0.9720369577407837, + "589": 0.988822877407074, + "590": 1.061402440071106, + "591": 1.174731969833374, + "592": 1.2586510181427002, + "593": 1.3132293224334717 + }, + "loss": { + "540": 2.638904333114624, + "541": 2.6240127086639404, + "542": 2.6381092071533203, + "543": 2.6202807426452637, + "544": 2.6436290740966797, + "545": 2.6638031005859375, + "546": 2.64585542678833, + "547": 2.6289055347442627, + "548": 2.6339850425720215, + "549": 2.632993221282959, + "550": 2.6294941902160645, + "551": 2.6152708530426025, + "552": 2.6239538192749023, + "553": 2.6080827713012695, + "554": 2.635983943939209, + "555": 2.6620101928710938, + "556": 2.659130334854126, + "557": 2.6474387645721436, + "558": 2.649404525756836, + "559": 2.6575660705566406, + "560": 2.647495746612549, + "561": 2.6552162170410156, + "562": 2.656172752380371, + "563": 2.6511611938476562, + "564": 2.621337652206421, + "565": 2.610480546951294, + "566": 2.6637675762176514, + "567": 2.63095760345459, + "568": 2.660881996154785, + "569": 2.635842800140381, + "570": 2.6520895957946777, + "571": 2.6140847206115723, + "572": 2.6337008476257324, + "573": 2.582170248031616, + "574": 2.617434024810791, + "575": 2.6204416751861572, + "576": 2.615417003631592, + "577": 2.604065418243408, + "578": 2.621476650238037, + "579": 2.6046745777130127, + "580": 2.6206703186035156, + "581": 2.607785701751709, + "582": 2.598924160003662, + "583": 2.608166217803955, + "584": 2.630323886871338, + "585": 2.616121530532837, + "586": 2.642490863800049, + "587": 2.6267971992492676, + "588": 2.629271984100342, + "589": 2.6087536811828613, + "590": 2.621145725250244, + "591": 2.6333248615264893, + "592": 2.625886917114258, + "593": 2.6297783851623535 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "train_epoch_time": 4.7885589599609375, + "train_loss": 2.6399824152717972, + "train_score": 0.24474757884119983, + "val_loss": 2.6626385922546913, + "val_score": 0.2361061278426962 + }, + { + "epoch": 11, + "grad_norm": 0.9859684109687805, + "learning_rate": 0.1, + "model_norm": 87.91594696044922, + "step_logs": { + "grad_norm": { + "594": 1.232816219329834, + "595": 1.0501583814620972, + "596": 0.9624819755554199, + "597": 0.8268987536430359, + "598": 0.8406144976615906, + "599": 0.9287706613540649, + "600": 0.9681162238121033, + "601": 0.9388473629951477, + "602": 0.9350690245628357, + "603": 0.9709102511405945, + "604": 1.012136459350586, + "605": 0.9263124465942383, + "606": 0.8129522204399109, + "607": 0.7992943525314331, + "608": 0.8032410740852356, + "609": 0.7831953167915344, + "610": 0.7809920310974121, + "611": 0.7741714119911194, + "612": 0.8221547603607178, + "613": 0.9485293626785278, + "614": 1.0550944805145264, + "615": 1.223487138748169, + "616": 1.3416365385055542, + "617": 1.3670084476470947, + "618": 1.4177489280700684, + "619": 1.445788025856018, + "620": 1.424302101135254, + "621": 1.3975396156311035, + "622": 1.313917636871338, + "623": 1.2091777324676514, + "624": 1.088362693786621, + "625": 0.9875993728637695, + "626": 0.860623300075531, + "627": 0.7900123000144958, + "628": 0.6933360695838928, + "629": 0.6035811305046082, + "630": 0.5959075093269348, + "631": 0.569287121295929, + "632": 0.5972772836685181, + "633": 0.6479539275169373, + "634": 0.6718242764472961, + "635": 0.708493709564209, + "636": 0.7453250885009766, + "637": 0.8379040956497192, + "638": 0.9716309905052185, + "639": 1.1598995923995972, + "640": 1.145552635192871, + "641": 1.0906648635864258, + "642": 1.1012121438980103, + "643": 1.130255937576294, + "644": 1.1508382558822632, + "645": 1.1003869771957397, + "646": 1.045069932937622, + "647": 0.9859684109687805 + }, + "loss": { + "594": 2.6449217796325684, + "595": 2.603557825088501, + "596": 2.6232662200927734, + "597": 2.5892586708068848, + "598": 2.616605281829834, + "599": 2.5903077125549316, + "600": 2.6083695888519287, + "601": 2.6061222553253174, + "602": 2.6089248657226562, + "603": 2.6281466484069824, + "604": 2.6093266010284424, + "605": 2.604705810546875, + "606": 2.6213996410369873, + "607": 2.6040000915527344, + "608": 2.6044650077819824, + "609": 2.5897090435028076, + "610": 2.597846031188965, + "611": 2.6044387817382812, + "612": 2.5937461853027344, + "613": 2.6094236373901367, + "614": 2.620765209197998, + "615": 2.635242462158203, + "616": 2.6388814449310303, + "617": 2.633263349533081, + "618": 2.62026047706604, + "619": 2.647897481918335, + "620": 2.6325912475585938, + "621": 2.622979164123535, + "622": 2.64648175239563, + "623": 2.6118288040161133, + "624": 2.628385543823242, + "625": 2.6140294075012207, + "626": 2.607260227203369, + "627": 2.5733721256256104, + "628": 2.5911970138549805, + "629": 2.5724167823791504, + "630": 2.598172426223755, + "631": 2.573568820953369, + "632": 2.5882842540740967, + "633": 2.5935275554656982, + "634": 2.6073246002197266, + "635": 2.5996737480163574, + "636": 2.589401960372925, + "637": 2.5997109413146973, + "638": 2.611051082611084, + "639": 2.6028225421905518, + "640": 2.623549461364746, + "641": 2.5914037227630615, + "642": 2.6109440326690674, + "643": 2.6036734580993652, + "644": 2.6183109283447266, + "645": 2.618478775024414, + "646": 2.618661880493164, + "647": 2.6153457164764404 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "train_epoch_time": 4.7887489795684814, + "train_loss": 2.604097114095045, + "train_score": 0.260620292255834, + "val_loss": 2.6265195924296854, + "val_score": 0.2520719714608176 + }, + { + "epoch": 12, + "grad_norm": 0.3209591507911682, + "learning_rate": 0.1, + "model_norm": 87.92118072509766, + "step_logs": { + "grad_norm": { + "648": 1.000657558441162, + "649": 1.0662273168563843, + "650": 1.115991473197937, + "651": 1.1540179252624512, + "652": 1.1653028726577759, + "653": 1.1926831007003784, + "654": 1.2208694219589233, + "655": 1.0660916566848755, + "656": 1.0124804973602295, + "657": 0.9115723967552185, + "658": 0.8166388869285583, + "659": 0.8163293600082397, + "660": 0.806090772151947, + "661": 0.6859253644943237, + "662": 0.5863920450210571, + "663": 0.5947525501251221, + "664": 0.5988678336143494, + "665": 0.5380848050117493, + "666": 0.5694063901901245, + "667": 0.5971958041191101, + "668": 0.5856954455375671, + "669": 0.681048572063446, + "670": 0.7289005517959595, + "671": 0.7114271521568298, + "672": 0.7229717969894409, + "673": 0.6649057269096375, + "674": 0.6589520573616028, + "675": 0.606964111328125, + "676": 0.5510775446891785, + "677": 0.5911911725997925, + "678": 0.5577713251113892, + "679": 0.46715670824050903, + "680": 0.5220733880996704, + "681": 0.49557799100875854, + "682": 0.4635457694530487, + "683": 0.46117931604385376, + "684": 0.366115003824234, + "685": 0.3324587941169739, + "686": 0.30120137333869934, + "687": 0.3033061623573303, + "688": 0.27584969997406006, + "689": 0.3256341218948364, + "690": 0.306515634059906, + "691": 0.27305927872657776, + "692": 0.32025620341300964, + "693": 0.30496740341186523, + "694": 0.32456541061401367, + "695": 0.3657022714614868, + "696": 0.28745922446250916, + "697": 0.3022083342075348, + "698": 0.28058817982673645, + "699": 0.298888236284256, + "700": 0.38229817152023315, + "701": 0.3209591507911682 + }, + "loss": { + "648": 2.592336654663086, + "649": 2.6028923988342285, + "650": 2.6087141036987305, + "651": 2.626216173171997, + "652": 2.6205203533172607, + "653": 2.601165771484375, + "654": 2.613736629486084, + "655": 2.6018567085266113, + "656": 2.6031391620635986, + "657": 2.589202880859375, + "658": 2.589087724685669, + "659": 2.5789053440093994, + "660": 2.5687503814697266, + "661": 2.599064826965332, + "662": 2.584836006164551, + "663": 2.5851101875305176, + "664": 2.5766913890838623, + "665": 2.580737352371216, + "666": 2.5948710441589355, + "667": 2.557612657546997, + "668": 2.5727901458740234, + "669": 2.593672275543213, + "670": 2.5946192741394043, + "671": 2.5791306495666504, + "672": 2.5738112926483154, + "673": 2.5791189670562744, + "674": 2.580991506576538, + "675": 2.5737242698669434, + "676": 2.594982147216797, + "677": 2.581864833831787, + "678": 2.581547737121582, + "679": 2.5753448009490967, + "680": 2.5702717304229736, + "681": 2.56905460357666, + "682": 2.5909414291381836, + "683": 2.578022003173828, + "684": 2.5715603828430176, + "685": 2.572442054748535, + "686": 2.5702497959136963, + "687": 2.574857711791992, + "688": 2.5609073638916016, + "689": 2.5848307609558105, + "690": 2.562727928161621, + "691": 2.5779733657836914, + "692": 2.5541539192199707, + "693": 2.5602164268493652, + "694": 2.5699291229248047, + "695": 2.5719079971313477, + "696": 2.5735368728637695, + "697": 2.565096855163574, + "698": 2.555540084838867, + "699": 2.5925405025482178, + "700": 2.568173885345459, + "701": 2.562692403793335 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "train_epoch_time": 4.789390802383423, + "train_loss": 2.5664006419982255, + "train_score": 0.26575838418054787, + "val_loss": 2.590764033123765, + "val_score": 0.25639979217958503 + }, + { + "epoch": 13, + "grad_norm": 0.23563528060913086, + "learning_rate": 0.06666666666666668, + "model_norm": 87.9243392944336, + "step_logs": { + "grad_norm": { + "702": 0.3117081820964813, + "703": 0.28111910820007324, + "704": 0.29745182394981384, + "705": 0.36087968945503235, + "706": 0.2960343658924103, + "707": 0.2654924690723419, + "708": 0.2641964554786682, + "709": 0.30734601616859436, + "710": 0.3318740129470825, + "711": 0.3192457854747772, + "712": 0.3369113504886627, + "713": 0.30742621421813965, + "714": 0.2744104564189911, + "715": 0.3230270445346832, + "716": 0.31964409351348877, + "717": 0.2801240384578705, + "718": 0.35099443793296814, + "719": 0.3327133059501648, + "720": 0.3149246573448181, + "721": 0.3623065650463104, + "722": 0.30663007497787476, + "723": 0.2702566981315613, + "724": 0.32905909419059753, + "725": 0.2621152400970459, + "726": 0.25548678636550903, + "727": 0.24502606689929962, + "728": 0.34794163703918457, + "729": 0.25389766693115234, + "730": 0.28882837295532227, + "731": 0.28239139914512634, + "732": 0.3363243341445923, + "733": 0.33072951436042786, + "734": 0.2719234228134155, + "735": 0.2361488938331604, + "736": 0.25554588437080383, + "737": 0.2818159759044647, + "738": 0.3646887242794037, + "739": 0.34500768780708313, + "740": 0.30105453729629517, + "741": 0.277146577835083, + "742": 0.26946109533309937, + "743": 0.279104620218277, + "744": 0.2642884850502014, + "745": 0.3399738073348999, + "746": 0.27590250968933105, + "747": 0.25552427768707275, + "748": 0.2481745183467865, + "749": 0.250038206577301, + "750": 0.24432861804962158, + "751": 0.24622569978237152, + "752": 0.2436780482530594, + "753": 0.25488924980163574, + "754": 0.29004406929016113, + "755": 0.23563528060913086 + }, + "loss": { + "702": 2.565304756164551, + "703": 2.542484998703003, + "704": 2.554048538208008, + "705": 2.5680859088897705, + "706": 2.568817615509033, + "707": 2.5685794353485107, + "708": 2.569775104522705, + "709": 2.5595221519470215, + "710": 2.5770986080169678, + "711": 2.57230806350708, + "712": 2.538276433944702, + "713": 2.5701746940612793, + "714": 2.5652430057525635, + "715": 2.5728981494903564, + "716": 2.5630252361297607, + "717": 2.553982734680176, + "718": 2.564180850982666, + "719": 2.5603227615356445, + "720": 2.573831081390381, + "721": 2.5586624145507812, + "722": 2.565708637237549, + "723": 2.5375452041625977, + "724": 2.5520243644714355, + "725": 2.5846681594848633, + "726": 2.5612869262695312, + "727": 2.5715951919555664, + "728": 2.544262647628784, + "729": 2.55293345451355, + "730": 2.5561492443084717, + "731": 2.584156036376953, + "732": 2.5557851791381836, + "733": 2.572971820831299, + "734": 2.5559208393096924, + "735": 2.574418067932129, + "736": 2.549515724182129, + "737": 2.5522515773773193, + "738": 2.575105667114258, + "739": 2.562318801879883, + "740": 2.560502767562866, + "741": 2.581686019897461, + "742": 2.5498270988464355, + "743": 2.549985885620117, + "744": 2.5514426231384277, + "745": 2.549697160720825, + "746": 2.5701656341552734, + "747": 2.575108766555786, + "748": 2.5700650215148926, + "749": 2.556898593902588, + "750": 2.5626792907714844, + "751": 2.5578603744506836, + "752": 2.566579818725586, + "753": 2.565284490585327, + "754": 2.5585641860961914, + "755": 2.562068223953247 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "train_epoch_time": 4.788929224014282, + "train_loss": 2.5584420603009175, + "train_score": 0.2657057029530513, + "val_loss": 2.583310950911004, + "val_score": 0.2565433047669079 + }, + { + "epoch": 14, + "grad_norm": 0.2518008351325989, + "learning_rate": 0.03333333333333334, + "model_norm": 87.92538452148438, + "step_logs": { + "grad_norm": { + "756": 0.2775435149669647, + "757": 0.29300960898399353, + "758": 0.3221818506717682, + "759": 0.24240811169147491, + "760": 0.26610898971557617, + "761": 0.30956459045410156, + "762": 0.25148969888687134, + "763": 0.2645169496536255, + "764": 0.328706294298172, + "765": 0.24311308562755585, + "766": 0.24176357686519623, + "767": 0.21268054842948914, + "768": 0.27396246790885925, + "769": 0.254978746175766, + "770": 0.2865082621574402, + "771": 0.24055102467536926, + "772": 0.23608236014842987, + "773": 0.23270298540592194, + "774": 0.22181636095046997, + "775": 0.27683961391448975, + "776": 0.25761187076568604, + "777": 0.20783394575119019, + "778": 0.24660271406173706, + "779": 0.3313913941383362, + "780": 0.279691219329834, + "781": 0.25982221961021423, + "782": 0.22880993783473969, + "783": 0.2237672507762909, + "784": 0.2610894739627838, + "785": 0.2801312506198883, + "786": 0.22010964155197144, + "787": 0.2497442215681076, + "788": 0.2843281626701355, + "789": 0.23611755669116974, + "790": 0.28263595700263977, + "791": 0.23977956175804138, + "792": 0.24657556414604187, + "793": 0.24455679953098297, + "794": 0.2648468613624573, + "795": 0.22714628279209137, + "796": 0.24095964431762695, + "797": 0.22900207340717316, + "798": 0.25532257556915283, + "799": 0.22062742710113525, + "800": 0.246209055185318, + "801": 0.24342700839042664, + "802": 0.25300362706184387, + "803": 0.29027682542800903, + "804": 0.22983090579509735, + "805": 0.23163969814777374, + "806": 0.23459257185459137, + "807": 0.23948901891708374, + "808": 0.2343757599592209, + "809": 0.2518008351325989 + }, + "loss": { + "756": 2.5660817623138428, + "757": 2.5577492713928223, + "758": 2.5660839080810547, + "759": 2.5521488189697266, + "760": 2.554875612258911, + "761": 2.570044994354248, + "762": 2.559504747390747, + "763": 2.546598434448242, + "764": 2.5554494857788086, + "765": 2.552934169769287, + "766": 2.5620369911193848, + "767": 2.5694453716278076, + "768": 2.5737318992614746, + "769": 2.546405553817749, + "770": 2.5901038646698, + "771": 2.5474853515625, + "772": 2.5522875785827637, + "773": 2.557750701904297, + "774": 2.548905849456787, + "775": 2.547020435333252, + "776": 2.5597705841064453, + "777": 2.5656325817108154, + "778": 2.552678346633911, + "779": 2.5623927116394043, + "780": 2.5527384281158447, + "781": 2.5446033477783203, + "782": 2.5598649978637695, + "783": 2.5354132652282715, + "784": 2.5684633255004883, + "785": 2.5687317848205566, + "786": 2.5529067516326904, + "787": 2.5547072887420654, + "788": 2.5824577808380127, + "789": 2.5537703037261963, + "790": 2.547931432723999, + "791": 2.5651164054870605, + "792": 2.5557239055633545, + "793": 2.5407819747924805, + "794": 2.5403971672058105, + "795": 2.5537753105163574, + "796": 2.5584399700164795, + "797": 2.537245273590088, + "798": 2.558483600616455, + "799": 2.5595879554748535, + "800": 2.54250431060791, + "801": 2.546520471572876, + "802": 2.557851791381836, + "803": 2.5625834465026855, + "804": 2.5644705295562744, + "805": 2.568164348602295, + "806": 2.562441349029541, + "807": 2.5585927963256836, + "808": 2.563767910003662, + "809": 2.542076349258423 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "train_epoch_time": 4.789141893386841, + "train_loss": 2.5557412305555522, + "train_score": 0.2661686243422575, + "val_loss": 2.5808811688669513, + "val_score": 0.25705008597105705 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:26:29.478094", + "final_model_norm": 87.92538452148438, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:24:48.737793", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 4.768748760223389, + "learning_rate": 2.15e-11, + "model_norm": 91.1038818359375, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.512977123260498, + "3": 7.69253396987915, + "4": 21.81309700012207, + "5": 5.747983455657959, + "6": 5.2405686378479, + "7": 4.423764705657959, + "8": 4.456271648406982, + "9": 3.984340190887451, + "10": 4.62080192565918, + "11": 4.915428638458252, + "12": 56.408966064453125, + "13": 467.4852294921875, + "14": 8.032468795776367, + "15": 3.1247873306274414, + "16": 3.5135746002197266, + "17": 4.337405681610107, + "18": 7.781325817108154, + "19": 4.247313499450684, + "20": 5.036606311798096, + "21": 3.811758279800415, + "22": 2.94175386428833, + "23": 8.17628288269043, + "24": 3.4523274898529053, + "25": 5.4133124351501465, + "26": 3.348008632659912, + "27": 14.790365219116211, + "28": 4.12067985534668, + "29": 8.541983604431152, + "30": 3.94637393951416, + "31": 2.8650941848754883, + "32": 4.193454742431641, + "33": 17.27073097229004, + "34": 7.146099090576172, + "35": 4.75208044052124, + "36": 11.106724739074707, + "37": 4.310473918914795, + "38": 2.791454792022705, + "39": 3.749058961868286, + "40": 4.987452030181885, + "41": 3.3146536350250244, + "42": 4.122133731842041, + "43": 12.054593086242676, + "44": 6.384027004241943, + "45": 12.094640731811523, + "46": 4.997376441955566, + "47": 5.145849227905273, + "48": 9.49466609954834, + "49": 4.682456016540527, + "50": 4.668734550476074, + "51": 3.8795957565307617, + "52": 2.3955156803131104, + "53": 4.768748760223389 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.8393378257751465, + "3": 3.6987521648406982, + "4": 4.2365193367004395, + "5": 4.385725975036621, + "6": 3.8917675018310547, + "7": 3.6118927001953125, + "8": 3.4665393829345703, + "9": 3.543761730194092, + "10": 3.3938088417053223, + "11": 3.585111141204834, + "12": 3.5599217414855957, + "13": 5.370888710021973, + "14": 4.655491828918457, + "15": 3.761195659637451, + "16": 3.7823736667633057, + "17": 3.613765239715576, + "18": 4.473066329956055, + "19": 4.02977991104126, + "20": 3.686736583709717, + "21": 4.340450286865234, + "22": 3.4855732917785645, + "23": 3.9735124111175537, + "24": 5.338529586791992, + "25": 4.520724773406982, + "26": 4.07056999206543, + "27": 5.221320152282715, + "28": 7.9278364181518555, + "29": 6.7389140129089355, + "30": 5.49710750579834, + "31": 3.967289924621582, + "32": 4.2562665939331055, + "33": 5.819311141967773, + "34": 9.422431945800781, + "35": 8.984328269958496, + "36": 7.551095485687256, + "37": 7.422612190246582, + "38": 5.4401750564575195, + "39": 5.10221529006958, + "40": 5.74896240234375, + "41": 4.661088943481445, + "42": 4.535565376281738, + "43": 4.940070152282715, + "44": 9.825352668762207, + "45": 9.690672874450684, + "46": 9.897573471069336, + "47": 7.447362899780273, + "48": 6.910978317260742, + "49": 10.103110313415527, + "50": 8.139686584472656, + "51": 6.164586067199707, + "52": 3.775649309158325, + "53": 3.7716851234436035 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "train_epoch_time": 4.790753364562988, + "train_loss": 5.288014858524974, + "train_score": 0.04657236370371609, + "val_loss": 5.269995704173494, + "val_score": 0.04376255736251102 + }, + { + "epoch": 1, + "grad_norm": 1.4773788452148438, + "learning_rate": 0.215, + "model_norm": 90.87535095214844, + "step_logs": { + "grad_norm": { + "54": 3.777482509613037, + "55": 12.372153282165527, + "56": 5.544676303863525, + "57": 4.050521373748779, + "58": 3.751547336578369, + "59": 3.5061357021331787, + "60": 5.444124221801758, + "61": 10.985054969787598, + "62": 4.180367469787598, + "63": 3.997178077697754, + "64": 3.327848434448242, + "65": 5.355532169342041, + "66": 2.1705005168914795, + "67": 11.513355255126953, + "68": 3.6953213214874268, + "69": 2.9278368949890137, + "70": 4.160811901092529, + "71": 2.607463836669922, + "72": 2.622835874557495, + "73": 5.121217250823975, + "74": 3.033362865447998, + "75": 2.4544143676757812, + "76": 1.2142291069030762, + "77": 4.076670169830322, + "78": 2.422085762023926, + "79": 3.275275945663452, + "80": 4.8507914543151855, + "81": 2.527409076690674, + "82": 2.189275026321411, + "83": 0.9653934240341187, + "84": 1.6801722049713135, + "85": 2.077773332595825, + "86": 7.9734320640563965, + "87": 3.171640396118164, + "88": 2.356574296951294, + "89": 2.090884208679199, + "90": 2.095611572265625, + "91": 3.5968568325042725, + "92": 1.9283243417739868, + "93": 1.6904977560043335, + "94": 3.743948459625244, + "95": 2.1623547077178955, + "96": 1.9533060789108276, + "97": 2.341980457305908, + "98": 1.8053877353668213, + "99": 0.530434250831604, + "100": 1.4637936353683472, + "101": 1.803134799003601, + "102": 3.1357672214508057, + "103": 1.8519048690795898, + "104": 1.4711562395095825, + "105": 3.2019054889678955, + "106": 1.7563107013702393, + "107": 1.4773788452148438 + }, + "loss": { + "54": 5.308948993682861, + "55": 5.660882949829102, + "56": 10.037200927734375, + "57": 8.961925506591797, + "58": 7.4425435066223145, + "59": 5.633369445800781, + "60": 4.434139251708984, + "61": 5.658693313598633, + "62": 9.487836837768555, + "63": 7.790412902832031, + "64": 6.259358882904053, + "65": 5.018208980560303, + "66": 4.425844192504883, + "67": 5.726444244384766, + "68": 8.743014335632324, + "69": 7.259713649749756, + "70": 6.063974380493164, + "71": 5.477700233459473, + "72": 4.26163911819458, + "73": 3.714019298553467, + "74": 5.2956132888793945, + "75": 4.524656772613525, + "76": 3.433161735534668, + "77": 3.573251962661743, + "78": 4.77878475189209, + "79": 3.908001184463501, + "80": 3.990009307861328, + "81": 5.180383682250977, + "82": 4.075482368469238, + "83": 3.406607151031494, + "84": 3.387862205505371, + "85": 3.63771390914917, + "86": 4.154426574707031, + "87": 6.24276065826416, + "88": 5.5883002281188965, + "89": 4.471851825714111, + "90": 3.7065000534057617, + "91": 3.6209402084350586, + "92": 4.350677013397217, + "93": 3.6024441719055176, + "94": 3.5279650688171387, + "95": 4.4583210945129395, + "96": 3.7381820678710938, + "97": 3.440885066986084, + "98": 3.8653812408447266, + "99": 3.3540751934051514, + "100": 3.383711338043213, + "101": 3.5982909202575684, + "102": 3.517383575439453, + "103": 4.132829189300537, + "104": 3.5085182189941406, + "105": 3.5050244331359863, + "106": 4.13048791885376, + "107": 3.5065300464630127 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "train_epoch_time": 4.7885589599609375, + "train_loss": 3.4609267206753, + "train_score": 0.1526105183055103, + "val_loss": 3.480834239659709, + "val_score": 0.15114182700861745 + }, + { + "epoch": 2, + "grad_norm": 0.8592105507850647, + "learning_rate": 0.215, + "model_norm": 90.87315368652344, + "step_logs": { + "grad_norm": { + "108": 2.629751443862915, + "109": 1.7028534412384033, + "110": 1.0531786680221558, + "111": 2.445246696472168, + "112": 1.618475317955017, + "113": 0.8111779093742371, + "114": 1.7762572765350342, + "115": 1.5149307250976562, + "116": 0.5596305727958679, + "117": 0.8351070284843445, + "118": 1.8120121955871582, + "119": 1.493711233139038, + "120": 0.4410189986228943, + "121": 0.5326366424560547, + "122": 1.0765730142593384, + "123": 1.1864361763000488, + "124": 1.5656208992004395, + "125": 1.3925890922546387, + "126": 0.7641895413398743, + "127": 0.9468544721603394, + "128": 1.540255069732666, + "129": 1.360662579536438, + "130": 0.7268709540367126, + "131": 0.904043972492218, + "132": 1.4499551057815552, + "133": 1.2613534927368164, + "134": 0.6526756882667542, + "135": 0.7883232235908508, + "136": 1.2192714214324951, + "137": 1.1647212505340576, + "138": 0.9443559646606445, + "139": 0.9905188679695129, + "140": 1.0919480323791504, + "141": 1.0841748714447021, + "142": 1.0498263835906982, + "143": 1.015977144241333, + "144": 0.8942776322364807, + "145": 0.9320101141929626, + "146": 1.0307154655456543, + "147": 0.9822448492050171, + "148": 0.8373734951019287, + "149": 0.8981814384460449, + "150": 1.056370496749878, + "151": 0.9678896069526672, + "152": 0.7213351726531982, + "153": 0.7626000642776489, + "154": 0.8951994776725769, + "155": 0.9377069473266602, + "156": 0.9571329951286316, + "157": 0.9304360151290894, + "158": 0.8673065900802612, + "159": 0.858120858669281, + "160": 0.8175886273384094, + "161": 0.8592105507850647 + }, + "loss": { + "108": 3.4423701763153076, + "109": 3.8930225372314453, + "110": 3.380098819732666, + "111": 3.4664440155029297, + "112": 3.824242115020752, + "113": 3.367769718170166, + "114": 3.4301443099975586, + "115": 3.6164727210998535, + "116": 3.3217084407806396, + "117": 3.3393912315368652, + "118": 3.4138548374176025, + "119": 3.62984561920166, + "120": 3.296369791030884, + "121": 3.2922067642211914, + "122": 3.375063180923462, + "123": 3.457331657409668, + "124": 3.425962448120117, + "125": 3.554396629333496, + "126": 3.363281011581421, + "127": 3.384377956390381, + "128": 3.377870559692383, + "129": 3.49696683883667, + "130": 3.3201117515563965, + "131": 3.3843016624450684, + "132": 3.382411003112793, + "133": 3.4777114391326904, + "134": 3.3196568489074707, + "135": 3.3683416843414307, + "136": 3.3693923950195312, + "137": 3.434567451477051, + "138": 3.3395345211029053, + "139": 3.402350664138794, + "140": 3.3532817363739014, + "141": 3.3957369327545166, + "142": 3.3668744564056396, + "143": 3.4562594890594482, + "144": 3.3420727252960205, + "145": 3.389443874359131, + "146": 3.3524951934814453, + "147": 3.4206604957580566, + "148": 3.3603012561798096, + "149": 3.378995180130005, + "150": 3.3784937858581543, + "151": 3.403463125228882, + "152": 3.3319382667541504, + "153": 3.3676114082336426, + "154": 3.319920539855957, + "155": 3.4096570014953613, + "156": 3.3418140411376953, + "157": 3.3852076530456543, + "158": 3.3415472507476807, + "159": 3.381582260131836, + "160": 3.3125195503234863, + "161": 3.35613751411438 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "train_epoch_time": 4.788424730300903, + "train_loss": 3.3468939742193675, + "train_score": 0.15264974893678723, + "val_loss": 3.365585728709925, + "val_score": 0.15115528137141884 + }, + { + "epoch": 3, + "grad_norm": 0.6087993383407593, + "learning_rate": 0.215, + "model_norm": 90.8847427368164, + "step_logs": { + "grad_norm": { + "162": 0.9361050724983215, + "163": 0.880911111831665, + "164": 0.776106595993042, + "165": 0.7700256705284119, + "166": 0.7481832504272461, + "167": 0.7968769669532776, + "168": 0.8789586424827576, + "169": 0.8540317416191101, + "170": 0.7815343141555786, + "171": 0.7702912092208862, + "172": 0.7692718505859375, + "173": 0.769106924533844, + "174": 0.7554870247840881, + "175": 0.7641161680221558, + "176": 0.7776530981063843, + "177": 0.7733656167984009, + "178": 0.7354782819747925, + "179": 0.7410027384757996, + "180": 0.7778562307357788, + "181": 0.7700233459472656, + "182": 0.7346639633178711, + "183": 0.7208408117294312, + "184": 0.6835162043571472, + "185": 0.6808939576148987, + "186": 0.6911190748214722, + "187": 0.7192779183387756, + "188": 0.731620728969574, + "189": 0.7152380347251892, + "190": 0.7036978006362915, + "191": 0.6851203441619873, + "192": 0.64600670337677, + "193": 0.6500484943389893, + "194": 0.6319776177406311, + "195": 0.6150934100151062, + "196": 0.5809333920478821, + "197": 0.5438845753669739, + "198": 0.43556106090545654, + "199": 0.4299978017807007, + "200": 0.47716495394706726, + "201": 0.4903109073638916, + "202": 0.48185867071151733, + "203": 0.5080997347831726, + "204": 0.5930094122886658, + "205": 0.6333677172660828, + "206": 0.700766384601593, + "207": 0.706038236618042, + "208": 0.6878578066825867, + "209": 0.6737412810325623, + "210": 0.6068543195724487, + "211": 0.5982890129089355, + "212": 0.617000937461853, + "213": 0.6061030626296997, + "214": 0.5725844502449036, + "215": 0.6087993383407593 + }, + "loss": { + "162": 3.330514430999756, + "163": 3.390855312347412, + "164": 3.3226938247680664, + "165": 3.357847213745117, + "166": 3.3357012271881104, + "167": 3.3582639694213867, + "168": 3.369551181793213, + "169": 3.3661460876464844, + "170": 3.3172967433929443, + "171": 3.3735604286193848, + "172": 3.3187832832336426, + "173": 3.358832836151123, + "174": 3.3696632385253906, + "175": 3.3532443046569824, + "176": 3.3249213695526123, + "177": 3.338181257247925, + "178": 3.315990686416626, + "179": 3.335751533508301, + "180": 3.3583521842956543, + "181": 3.349484443664551, + "182": 3.311713457107544, + "183": 3.3350706100463867, + "184": 3.3260340690612793, + "185": 3.3500959873199463, + "186": 3.3126401901245117, + "187": 3.346989393234253, + "188": 3.3468844890594482, + "189": 3.341794490814209, + "190": 3.3432457447052, + "191": 3.358865737915039, + "192": 3.308229684829712, + "193": 3.341928243637085, + "194": 3.314577579498291, + "195": 3.3355393409729004, + "196": 3.3360981941223145, + "197": 3.359072208404541, + "198": 3.2878284454345703, + "199": 3.304107189178467, + "200": 3.3157496452331543, + "201": 3.3576459884643555, + "202": 3.2921295166015625, + "203": 3.3005096912384033, + "204": 3.3229928016662598, + "205": 3.3249733448028564, + "206": 3.323307752609253, + "207": 3.322645664215088, + "208": 3.3047432899475098, + "209": 3.3143444061279297, + "210": 3.282731533050537, + "211": 3.304281711578369, + "212": 3.322359561920166, + "213": 3.3297743797302246, + "214": 3.271000385284424, + "215": 3.283421277999878 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "train_epoch_time": 4.788079738616943, + "train_loss": 3.2949323500246024, + "train_score": 0.15291315464241434, + "val_loss": 3.311749011859828, + "val_score": 0.15153200334166825 + }, + { + "epoch": 4, + "grad_norm": 0.9821807146072388, + "learning_rate": 0.215, + "model_norm": 90.92161560058594, + "step_logs": { + "grad_norm": { + "216": 0.7106828093528748, + "217": 0.7289441227912903, + "218": 0.7488495707511902, + "219": 0.7892098426818848, + "220": 0.8758535385131836, + "221": 0.8916842341423035, + "222": 0.7863014936447144, + "223": 0.7079423069953918, + "224": 0.644995927810669, + "225": 0.6450706124305725, + "226": 0.6772181391716003, + "227": 0.7630394101142883, + "228": 0.8540375828742981, + "229": 0.8938946723937988, + "230": 0.8704829216003418, + "231": 0.7382178902626038, + "232": 0.5688614845275879, + "233": 0.5606990456581116, + "234": 0.6594306230545044, + "235": 0.8704900741577148, + "236": 1.026406168937683, + "237": 1.0585143566131592, + "238": 1.070932149887085, + "239": 1.240985631942749, + "240": 1.182536005973816, + "241": 1.1327226161956787, + "242": 1.0874539613723755, + "243": 1.2034251689910889, + "244": 1.2215259075164795, + "245": 1.2296887636184692, + "246": 1.3253308534622192, + "247": 1.3344638347625732, + "248": 1.2188246250152588, + "249": 1.0702893733978271, + "250": 1.048462152481079, + "251": 1.0182101726531982, + "252": 1.0540645122528076, + "253": 1.1424111127853394, + "254": 1.3030123710632324, + "255": 1.4511394500732422, + "256": 1.334547758102417, + "257": 1.1782286167144775, + "258": 1.0350970029830933, + "259": 1.0300384759902954, + "260": 1.1178781986236572, + "261": 1.012025237083435, + "262": 0.8595377206802368, + "263": 0.873961329460144, + "264": 0.8866082429885864, + "265": 0.9155511260032654, + "266": 0.9512255191802979, + "267": 0.9299663305282593, + "268": 0.9550856351852417, + "269": 0.9821807146072388 + }, + "loss": { + "216": 3.3120460510253906, + "217": 3.2978439331054688, + "218": 3.3191962242126465, + "219": 3.2930498123168945, + "220": 3.3162074089050293, + "221": 3.280308723449707, + "222": 3.2659242153167725, + "223": 3.270326614379883, + "224": 3.2244462966918945, + "225": 3.248737335205078, + "226": 3.21547532081604, + "227": 3.19978666305542, + "228": 3.222573757171631, + "229": 3.186828851699829, + "230": 3.167494773864746, + "231": 3.1894612312316895, + "232": 3.1455721855163574, + "233": 3.1205992698669434, + "234": 3.141139507293701, + "235": 3.115446090698242, + "236": 3.124189853668213, + "237": 3.144502639770508, + "238": 3.1282873153686523, + "239": 3.1211657524108887, + "240": 3.110682487487793, + "241": 3.0861527919769287, + "242": 3.0409159660339355, + "243": 3.0720391273498535, + "244": 3.0308175086975098, + "245": 3.0496511459350586, + "246": 3.045163631439209, + "247": 3.0699145793914795, + "248": 3.0300710201263428, + "249": 3.0218124389648438, + "250": 2.9954240322113037, + "251": 2.9963502883911133, + "252": 2.971977710723877, + "253": 2.99910306930542, + "254": 2.996168375015259, + "255": 3.0508017539978027, + "256": 3.0176072120666504, + "257": 3.0055220127105713, + "258": 2.955789566040039, + "259": 2.9714255332946777, + "260": 2.964890956878662, + "261": 2.98284912109375, + "262": 2.948190689086914, + "263": 2.9432339668273926, + "264": 2.919694423675537, + "265": 2.944222927093506, + "266": 2.935487747192383, + "267": 2.9397125244140625, + "268": 2.9095535278320312, + "269": 2.956695079803467 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "train_epoch_time": 4.788573503494263, + "train_loss": 2.929616554293092, + "train_score": 0.2061939561854476, + "val_loss": 2.9433626835710283, + "val_score": 0.20191859199861162 + }, + { + "epoch": 5, + "grad_norm": 0.7710897922515869, + "learning_rate": 0.215, + "model_norm": 90.93704223632812, + "step_logs": { + "grad_norm": { + "270": 0.9826538562774658, + "271": 0.9667792320251465, + "272": 0.9048634171485901, + "273": 0.9344331622123718, + "274": 0.9651849269866943, + "275": 1.1015145778656006, + "276": 1.1377453804016113, + "277": 1.186976671218872, + "278": 0.9845960736274719, + "279": 0.8418571352958679, + "280": 0.7251552939414978, + "281": 0.6260077357292175, + "282": 0.6355652809143066, + "283": 0.6870912909507751, + "284": 0.7565816640853882, + "285": 0.8322285413742065, + "286": 0.9116373062133789, + "287": 0.9543244242668152, + "288": 0.9194244742393494, + "289": 0.8983859419822693, + "290": 0.942709743976593, + "291": 0.9613945484161377, + "292": 0.956580400466919, + "293": 0.8938926458358765, + "294": 0.8112130761146545, + "295": 0.7788195610046387, + "296": 0.7702645063400269, + "297": 0.7575488686561584, + "298": 0.7975647449493408, + "299": 0.7941131591796875, + "300": 1.005330204963684, + "301": 1.058396816253662, + "302": 1.203217625617981, + "303": 1.216032862663269, + "304": 1.217220425605774, + "305": 1.472736120223999, + "306": 1.3118972778320312, + "307": 1.011157512664795, + "308": 0.8079589605331421, + "309": 0.6990920901298523, + "310": 0.7661309242248535, + "311": 0.8004953265190125, + "312": 1.0540677309036255, + "313": 1.0774898529052734, + "314": 0.9952903985977173, + "315": 1.055246114730835, + "316": 1.1704283952713013, + "317": 1.2923685312271118, + "318": 1.196557879447937, + "319": 1.065063238143921, + "320": 0.9533786177635193, + "321": 0.8603740930557251, + "322": 0.7936952114105225, + "323": 0.7710897922515869 + }, + "loss": { + "270": 2.921260356903076, + "271": 2.9505672454833984, + "272": 2.942061185836792, + "273": 2.927546977996826, + "274": 2.9226222038269043, + "275": 2.938413143157959, + "276": 2.965902328491211, + "277": 2.9459402561187744, + "278": 2.964310646057129, + "279": 2.9288299083709717, + "280": 2.925896644592285, + "281": 2.8850784301757812, + "282": 2.899108409881592, + "283": 2.900360107421875, + "284": 2.909640312194824, + "285": 2.8972275257110596, + "286": 2.9204258918762207, + "287": 2.9014711380004883, + "288": 2.9194884300231934, + "289": 2.910529613494873, + "290": 2.9192309379577637, + "291": 2.9204721450805664, + "292": 2.9077541828155518, + "293": 2.9187631607055664, + "294": 2.888162612915039, + "295": 2.9116618633270264, + "296": 2.892401933670044, + "297": 2.889254093170166, + "298": 2.897447347640991, + "299": 2.885918617248535, + "300": 2.89274263381958, + "301": 2.937957525253296, + "302": 2.9188385009765625, + "303": 2.963953971862793, + "304": 2.934126853942871, + "305": 2.9722161293029785, + "306": 2.9473910331726074, + "307": 2.921459436416626, + "308": 2.894925832748413, + "309": 2.881929397583008, + "310": 2.8746695518493652, + "311": 2.894906759262085, + "312": 2.867159366607666, + "313": 2.92246413230896, + "314": 2.875049114227295, + "315": 2.908360004425049, + "316": 2.9193291664123535, + "317": 2.9422414302825928, + "318": 2.925055503845215, + "319": 2.9113144874572754, + "320": 2.8835608959198, + "321": 2.886540651321411, + "322": 2.868971109390259, + "323": 2.871776580810547 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "train_epoch_time": 4.788305759429932, + "train_loss": 2.8671649144745968, + "train_score": 0.21053174318507892, + "val_loss": 2.889315739564863, + "val_score": 0.204286559834546 + }, + { + "epoch": 6, + "grad_norm": 1.035821557044983, + "learning_rate": 0.215, + "model_norm": 90.9523696899414, + "step_logs": { + "grad_norm": { + "324": 0.7981342077255249, + "325": 0.8987910747528076, + "326": 1.0302822589874268, + "327": 1.0341377258300781, + "328": 0.9448365569114685, + "329": 0.9385917782783508, + "330": 0.9752305150032043, + "331": 1.0387269258499146, + "332": 1.0405452251434326, + "333": 0.977863609790802, + "334": 0.8374691009521484, + "335": 0.7338889241218567, + "336": 0.7356550693511963, + "337": 0.7309194207191467, + "338": 0.7133876085281372, + "339": 0.7473136186599731, + "340": 0.8495583534240723, + "341": 0.9066926836967468, + "342": 0.8879075050354004, + "343": 0.8375911116600037, + "344": 0.8218190670013428, + "345": 0.8624690175056458, + "346": 0.8988082408905029, + "347": 0.9910452365875244, + "348": 0.9790074229240417, + "349": 0.8766512274742126, + "350": 0.8103423714637756, + "351": 0.7999845147132874, + "352": 0.8938480615615845, + "353": 0.9749035835266113, + "354": 1.0497792959213257, + "355": 1.1329431533813477, + "356": 1.0805407762527466, + "357": 1.132259488105774, + "358": 1.0434494018554688, + "359": 0.9649423956871033, + "360": 1.0336040258407593, + "361": 1.1711769104003906, + "362": 1.0905392169952393, + "363": 0.8944039940834045, + "364": 0.9082133173942566, + "365": 0.9736754298210144, + "366": 0.9713060855865479, + "367": 0.9027257561683655, + "368": 0.8863203525543213, + "369": 0.9012054800987244, + "370": 0.8935128450393677, + "371": 0.8855329751968384, + "372": 0.8607180714607239, + "373": 0.9539946913719177, + "374": 1.0477733612060547, + "375": 1.0993893146514893, + "376": 1.1203137636184692, + "377": 1.035821557044983 + }, + "loss": { + "324": 2.857433319091797, + "325": 2.8956727981567383, + "326": 2.8817408084869385, + "327": 2.9009511470794678, + "328": 2.8811378479003906, + "329": 2.864175319671631, + "330": 2.8641550540924072, + "331": 2.880978584289551, + "332": 2.891833543777466, + "333": 2.8694396018981934, + "334": 2.8575727939605713, + "335": 2.8614745140075684, + "336": 2.860243558883667, + "337": 2.850189685821533, + "338": 2.844123363494873, + "339": 2.8437130451202393, + "340": 2.8458852767944336, + "341": 2.877870559692383, + "342": 2.8635194301605225, + "343": 2.841906785964966, + "344": 2.8393115997314453, + "345": 2.8548948764801025, + "346": 2.852095127105713, + "347": 2.8916561603546143, + "348": 2.874770164489746, + "349": 2.8520350456237793, + "350": 2.843285083770752, + "351": 2.847202777862549, + "352": 2.8495430946350098, + "353": 2.861645460128784, + "354": 2.860210418701172, + "355": 2.8598785400390625, + "356": 2.8754961490631104, + "357": 2.8878116607666016, + "358": 2.9009838104248047, + "359": 2.836493968963623, + "360": 2.843789577484131, + "361": 2.8523473739624023, + "362": 2.9021668434143066, + "363": 2.836028814315796, + "364": 2.8244357109069824, + "365": 2.844738483428955, + "366": 2.8751652240753174, + "367": 2.8390631675720215, + "368": 2.847855567932129, + "369": 2.8226897716522217, + "370": 2.8287134170532227, + "371": 2.8229715824127197, + "372": 2.8411402702331543, + "373": 2.8220102787017822, + "374": 2.872851848602295, + "375": 2.8293468952178955, + "376": 2.8655569553375244, + "377": 2.8372063636779785 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "train_epoch_time": 4.78901481628418, + "train_loss": 2.8365759218098274, + "train_score": 0.2067824157443929, + "val_loss": 2.86207695779242, + "val_score": 0.2005552170642071 + }, + { + "epoch": 7, + "grad_norm": 0.7384330034255981, + "learning_rate": 0.215, + "model_norm": 90.97216033935547, + "step_logs": { + "grad_norm": { + "378": 0.9507484436035156, + "379": 0.810348391532898, + "380": 0.716852605342865, + "381": 0.6468346118927002, + "382": 0.5992889404296875, + "383": 0.58887779712677, + "384": 0.6116159558296204, + "385": 0.7137957215309143, + "386": 0.7953575253486633, + "387": 0.972137987613678, + "388": 1.0486242771148682, + "389": 1.1355589628219604, + "390": 1.0920562744140625, + "391": 1.0073076486587524, + "392": 1.081760287284851, + "393": 1.103289246559143, + "394": 1.0707428455352783, + "395": 0.975640058517456, + "396": 0.9401341080665588, + "397": 0.891375720500946, + "398": 0.7809472680091858, + "399": 0.735560953617096, + "400": 0.6912685036659241, + "401": 0.7386559247970581, + "402": 0.7470145225524902, + "403": 0.8052874207496643, + "404": 0.8702596426010132, + "405": 1.0296536684036255, + "406": 1.056732416152954, + "407": 1.0242395401000977, + "408": 0.9787147045135498, + "409": 0.8223461508750916, + "410": 0.7620035409927368, + "411": 0.7719262838363647, + "412": 0.8005836009979248, + "413": 0.8055107593536377, + "414": 0.7846072912216187, + "415": 0.7065761685371399, + "416": 0.7001236081123352, + "417": 0.7254408597946167, + "418": 0.7898728847503662, + "419": 0.9493966102600098, + "420": 0.9976383447647095, + "421": 1.0153205394744873, + "422": 1.0433634519577026, + "423": 1.0602290630340576, + "424": 1.0212446451187134, + "425": 0.9672016501426697, + "426": 1.1142154932022095, + "427": 1.3501923084259033, + "428": 1.3754948377609253, + "429": 1.0360045433044434, + "430": 0.8087738752365112, + "431": 0.7384330034255981 + }, + "loss": { + "378": 2.824772357940674, + "379": 2.805464744567871, + "380": 2.8015615940093994, + "381": 2.783661127090454, + "382": 2.800049066543579, + "383": 2.7849767208099365, + "384": 2.780452251434326, + "385": 2.786926507949829, + "386": 2.793444871902466, + "387": 2.7944281101226807, + "388": 2.842393398284912, + "389": 2.8049488067626953, + "390": 2.8402552604675293, + "391": 2.796936273574829, + "392": 2.8165361881256104, + "393": 2.8305273056030273, + "394": 2.831249713897705, + "395": 2.7945737838745117, + "396": 2.802100658416748, + "397": 2.797694683074951, + "398": 2.794318199157715, + "399": 2.7647833824157715, + "400": 2.7864327430725098, + "401": 2.766012668609619, + "402": 2.7869274616241455, + "403": 2.777254104614258, + "404": 2.7917895317077637, + "405": 2.770083427429199, + "406": 2.818441867828369, + "407": 2.788850784301758, + "408": 2.7916901111602783, + "409": 2.780367851257324, + "410": 2.7838335037231445, + "411": 2.7763659954071045, + "412": 2.772993564605713, + "413": 2.7483928203582764, + "414": 2.7762808799743652, + "415": 2.7538294792175293, + "416": 2.738293409347534, + "417": 2.7267959117889404, + "418": 2.7461116313934326, + "419": 2.752950668334961, + "420": 2.770277500152588, + "421": 2.736666440963745, + "422": 2.769049882888794, + "423": 2.767879009246826, + "424": 2.765526533126831, + "425": 2.7341904640197754, + "426": 2.7779548168182373, + "427": 2.778820514678955, + "428": 2.8092403411865234, + "429": 2.765235424041748, + "430": 2.736257553100586, + "431": 2.7105939388275146 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "train_epoch_time": 4.788388729095459, + "train_loss": 2.7166390637927282, + "train_score": 0.24429698710889694, + "val_loss": 2.7314381952537596, + "val_score": 0.2374381098113569 + }, + { + "epoch": 8, + "grad_norm": 1.186734914779663, + "learning_rate": 0.215, + "model_norm": 90.99371337890625, + "step_logs": { + "grad_norm": { + "432": 0.682357907295227, + "433": 0.7879934310913086, + "434": 0.839699923992157, + "435": 0.9034207463264465, + "436": 0.9427834749221802, + "437": 1.0318924188613892, + "438": 1.0097386837005615, + "439": 0.8537572026252747, + "440": 0.7883947491645813, + "441": 0.7391544580459595, + "442": 0.7961458563804626, + "443": 0.8726483583450317, + "444": 0.8910840749740601, + "445": 0.9529770612716675, + "446": 1.0102248191833496, + "447": 1.102903962135315, + "448": 1.0989457368850708, + "449": 1.0225383043289185, + "450": 0.9723678231239319, + "451": 0.8849908113479614, + "452": 0.8795887231826782, + "453": 0.9208425879478455, + "454": 0.923948347568512, + "455": 0.9137952327728271, + "456": 0.8775184750556946, + "457": 0.8621120452880859, + "458": 0.9566969871520996, + "459": 1.244726538658142, + "460": 1.4193376302719116, + "461": 1.4288270473480225, + "462": 1.2880305051803589, + "463": 1.0228943824768066, + "464": 0.90586918592453, + "465": 0.8441426753997803, + "466": 0.7748436331748962, + "467": 0.7632634043693542, + "468": 0.9062524437904358, + "469": 0.9382572174072266, + "470": 1.0792484283447266, + "471": 1.1229135990142822, + "472": 1.1008241176605225, + "473": 1.0941704511642456, + "474": 1.0628392696380615, + "475": 1.113686203956604, + "476": 1.03805410861969, + "477": 0.9889453053474426, + "478": 0.99018394947052, + "479": 0.9048968553543091, + "480": 0.762435257434845, + "481": 0.7004373669624329, + "482": 0.6968955993652344, + "483": 0.8532578349113464, + "484": 1.0391627550125122, + "485": 1.186734914779663 + }, + "loss": { + "432": 2.711761713027954, + "433": 2.701874256134033, + "434": 2.7173707485198975, + "435": 2.717919111251831, + "436": 2.7440590858459473, + "437": 2.716951847076416, + "438": 2.7477917671203613, + "439": 2.711336612701416, + "440": 2.734084129333496, + "441": 2.686770439147949, + "442": 2.7162363529205322, + "443": 2.704514503479004, + "444": 2.718574047088623, + "445": 2.7034876346588135, + "446": 2.7333061695098877, + "447": 2.7030677795410156, + "448": 2.744272232055664, + "449": 2.7094311714172363, + "450": 2.731375217437744, + "451": 2.689774990081787, + "452": 2.6910858154296875, + "453": 2.6939282417297363, + "454": 2.726001262664795, + "455": 2.691908121109009, + "456": 2.6913273334503174, + "457": 2.6898093223571777, + "458": 2.703033924102783, + "459": 2.7084052562713623, + "460": 2.780323028564453, + "461": 2.760359287261963, + "462": 2.7674484252929688, + "463": 2.703690528869629, + "464": 2.688377618789673, + "465": 2.705472469329834, + "466": 2.6742730140686035, + "467": 2.677372932434082, + "468": 2.6668734550476074, + "469": 2.688608407974243, + "470": 2.682274103164673, + "471": 2.7363436222076416, + "472": 2.7071564197540283, + "473": 2.720700979232788, + "474": 2.679595470428467, + "475": 2.7048511505126953, + "476": 2.6746039390563965, + "477": 2.6931827068328857, + "478": 2.669553518295288, + "479": 2.6869726181030273, + "480": 2.6779532432556152, + "481": 2.6584787368774414, + "482": 2.6361300945281982, + "483": 2.6422805786132812, + "484": 2.6790413856506348, + "485": 2.7129361629486084 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "train_epoch_time": 4.790045499801636, + "train_loss": 2.6991676622689025, + "train_score": 0.2482413468564429, + "val_loss": 2.714708259814642, + "val_score": 0.2427346445320668 + }, + { + "epoch": 9, + "grad_norm": 1.203169822692871, + "learning_rate": 0.215, + "model_norm": 91.01960754394531, + "step_logs": { + "grad_norm": { + "486": 1.0850011110305786, + "487": 0.9896923899650574, + "488": 0.9088544845581055, + "489": 0.8589906692504883, + "490": 0.7930198907852173, + "491": 0.6774317622184753, + "492": 0.6814072728157043, + "493": 0.7896897792816162, + "494": 0.9204282760620117, + "495": 1.031484842300415, + "496": 1.2368268966674805, + "497": 1.2173197269439697, + "498": 1.0859532356262207, + "499": 1.083764910697937, + "500": 1.0810452699661255, + "501": 1.0741888284683228, + "502": 0.927306592464447, + "503": 0.8310501575469971, + "504": 0.7653688192367554, + "505": 0.7235044836997986, + "506": 0.7619850039482117, + "507": 0.8009397983551025, + "508": 0.8710378408432007, + "509": 0.8227996826171875, + "510": 0.8100309371948242, + "511": 0.8168901801109314, + "512": 0.7745519876480103, + "513": 0.7738197445869446, + "514": 0.6978654265403748, + "515": 0.7017480731010437, + "516": 0.7891150712966919, + "517": 0.8656883835792542, + "518": 0.9118044376373291, + "519": 1.0882283449172974, + "520": 1.1870903968811035, + "521": 1.3700114488601685, + "522": 1.319442629814148, + "523": 1.0366289615631104, + "524": 0.9920868277549744, + "525": 0.9393367171287537, + "526": 0.9754626154899597, + "527": 0.9165051579475403, + "528": 0.8340020179748535, + "529": 0.8424265384674072, + "530": 0.8531990051269531, + "531": 0.7943776249885559, + "532": 0.7697798609733582, + "533": 0.7897421717643738, + "534": 0.815986156463623, + "535": 0.8025452494621277, + "536": 0.7521207928657532, + "537": 0.7967057228088379, + "538": 0.901458740234375, + "539": 1.203169822692871 + }, + "loss": { + "486": 2.709219455718994, + "487": 2.6682960987091064, + "488": 2.6564416885375977, + "489": 2.671679973602295, + "490": 2.6597859859466553, + "491": 2.6467785835266113, + "492": 2.6194868087768555, + "493": 2.6616547107696533, + "494": 2.656613349914551, + "495": 2.6583967208862305, + "496": 2.6574866771698, + "497": 2.7099177837371826, + "498": 2.663578510284424, + "499": 2.6736714839935303, + "500": 2.646918296813965, + "501": 2.6775763034820557, + "502": 2.6462886333465576, + "503": 2.6455416679382324, + "504": 2.642223834991455, + "505": 2.601130485534668, + "506": 2.603966236114502, + "507": 2.6414854526519775, + "508": 2.6106271743774414, + "509": 2.635369300842285, + "510": 2.6214137077331543, + "511": 2.624495029449463, + "512": 2.6106646060943604, + "513": 2.629624366760254, + "514": 2.63250732421875, + "515": 2.5941827297210693, + "516": 2.599414348602295, + "517": 2.603377342224121, + "518": 2.6217551231384277, + "519": 2.630720615386963, + "520": 2.655055522918701, + "521": 2.645128011703491, + "522": 2.7116947174072266, + "523": 2.637521743774414, + "524": 2.634775400161743, + "525": 2.626157283782959, + "526": 2.625753164291382, + "527": 2.6087613105773926, + "528": 2.592386484146118, + "529": 2.5810465812683105, + "530": 2.6116766929626465, + "531": 2.584160327911377, + "532": 2.5971899032592773, + "533": 2.58386492729187, + "534": 2.578446388244629, + "535": 2.567394733428955, + "536": 2.559359312057495, + "537": 2.540008306503296, + "538": 2.5873641967773438, + "539": 2.5817277431488037 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "train_epoch_time": 4.78992486000061, + "train_loss": 2.6492610668007237, + "train_score": 0.22535083389265123, + "val_loss": 2.668723805489962, + "val_score": 0.220593247584032 + }, + { + "epoch": 10, + "grad_norm": 0.7871867418289185, + "learning_rate": 0.215, + "model_norm": 91.04473876953125, + "step_logs": { + "grad_norm": { + "540": 1.2602779865264893, + "541": 1.2112749814987183, + "542": 1.174929141998291, + "543": 0.983267605304718, + "544": 1.0428472757339478, + "545": 0.9895178079605103, + "546": 0.9512880444526672, + "547": 0.9754201769828796, + "548": 0.9579707384109497, + "549": 0.9788139462471008, + "550": 0.9489416480064392, + "551": 0.8465482592582703, + "552": 0.7796411514282227, + "553": 0.8723862171173096, + "554": 1.021549105644226, + "555": 1.3432765007019043, + "556": 1.3512208461761475, + "557": 1.0133731365203857, + "558": 0.9505003690719604, + "559": 1.0170173645019531, + "560": 1.0611085891723633, + "561": 0.9612119197845459, + "562": 0.8281588554382324, + "563": 0.7536771893501282, + "564": 0.672906219959259, + "565": 0.6428030729293823, + "566": 0.7232397794723511, + "567": 0.9086180329322815, + "568": 0.9008992910385132, + "569": 0.832752525806427, + "570": 0.866485595703125, + "571": 0.9066757559776306, + "572": 0.9216229319572449, + "573": 0.955962598323822, + "574": 0.9388243556022644, + "575": 0.8178623914718628, + "576": 0.801074743270874, + "577": 0.8566450476646423, + "578": 1.083534598350525, + "579": 1.1811403036117554, + "580": 1.2144383192062378, + "581": 1.2031227350234985, + "582": 1.0034691095352173, + "583": 0.9011889100074768, + "584": 0.8310750722885132, + "585": 0.7468541264533997, + "586": 0.7037428021430969, + "587": 0.646641194820404, + "588": 0.5984599590301514, + "589": 0.6355694532394409, + "590": 0.6957169771194458, + "591": 0.7257423400878906, + "592": 0.7625904083251953, + "593": 0.7871867418289185 + }, + "loss": { + "540": 2.6534512042999268, + "541": 2.593578815460205, + "542": 2.648163318634033, + "543": 2.620131731033325, + "544": 2.587038278579712, + "545": 2.581254720687866, + "546": 2.5697569847106934, + "547": 2.5983409881591797, + "548": 2.571627616882324, + "549": 2.575474739074707, + "550": 2.579862117767334, + "551": 2.562931776046753, + "552": 2.5702977180480957, + "553": 2.5301671028137207, + "554": 2.5761911869049072, + "555": 2.6057610511779785, + "556": 2.6591172218322754, + "557": 2.569087028503418, + "558": 2.5528788566589355, + "559": 2.587589740753174, + "560": 2.551438093185425, + "561": 2.59968638420105, + "562": 2.567417621612549, + "563": 2.5396628379821777, + "564": 2.5480270385742188, + "565": 2.5135445594787598, + "566": 2.529240608215332, + "567": 2.5173680782318115, + "568": 2.565337657928467, + "569": 2.5562033653259277, + "570": 2.5528626441955566, + "571": 2.5378787517547607, + "572": 2.5373783111572266, + "573": 2.5506067276000977, + "574": 2.535454273223877, + "575": 2.527514934539795, + "576": 2.522322416305542, + "577": 2.531142234802246, + "578": 2.5362415313720703, + "579": 2.5890583992004395, + "580": 2.5655345916748047, + "581": 2.574313163757324, + "582": 2.550959587097168, + "583": 2.52030611038208, + "584": 2.530621290206909, + "585": 2.499192714691162, + "586": 2.503462791442871, + "587": 2.510725259780884, + "588": 2.48935866355896, + "589": 2.4809839725494385, + "590": 2.508025646209717, + "591": 2.504624843597412, + "592": 2.4841713905334473, + "593": 2.5031657218933105 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "train_epoch_time": 4.7895379066467285, + "train_loss": 2.5049408083495655, + "train_score": 0.29937791432677585, + "val_loss": 2.524848353027887, + "val_score": 0.2920314301100445 + }, + { + "epoch": 11, + "grad_norm": 1.018774390220642, + "learning_rate": 0.215, + "model_norm": 91.06373596191406, + "step_logs": { + "grad_norm": { + "594": 0.7413973212242126, + "595": 0.6726373434066772, + "596": 0.6823249459266663, + "597": 0.8069524168968201, + "598": 1.0592670440673828, + "599": 1.1828581094741821, + "600": 1.2749831676483154, + "601": 1.3434191942214966, + "602": 1.2307745218276978, + "603": 0.9597514867782593, + "604": 0.6865532994270325, + "605": 0.5563322305679321, + "606": 0.48271387815475464, + "607": 0.4907934069633484, + "608": 0.5307155251502991, + "609": 0.5439010858535767, + "610": 0.5960483551025391, + "611": 0.7206151485443115, + "612": 0.8573791980743408, + "613": 0.9597889184951782, + "614": 1.0009410381317139, + "615": 1.0134549140930176, + "616": 1.0042377710342407, + "617": 0.9839712381362915, + "618": 0.9389065504074097, + "619": 0.8162915706634521, + "620": 0.7601591944694519, + "621": 0.6697289347648621, + "622": 0.6222559213638306, + "623": 0.5901453495025635, + "624": 0.5396410226821899, + "625": 0.567928671836853, + "626": 0.6483227610588074, + "627": 0.7782958149909973, + "628": 0.9330300092697144, + "629": 1.0496703386306763, + "630": 1.0337681770324707, + "631": 1.0632206201553345, + "632": 1.0099393129348755, + "633": 0.9314202070236206, + "634": 0.9370139241218567, + "635": 0.8868887424468994, + "636": 0.9204682111740112, + "637": 0.958300769329071, + "638": 0.9064491987228394, + "639": 0.790294349193573, + "640": 0.7852020263671875, + "641": 0.7949424386024475, + "642": 0.7599385976791382, + "643": 0.8014470338821411, + "644": 0.8337162733078003, + "645": 0.886461615562439, + "646": 0.9523445963859558, + "647": 1.018774390220642 + }, + "loss": { + "594": 2.5035481452941895, + "595": 2.5226058959960938, + "596": 2.4986114501953125, + "597": 2.476590156555176, + "598": 2.530712127685547, + "599": 2.539583683013916, + "600": 2.5293524265289307, + "601": 2.5718021392822266, + "602": 2.563169002532959, + "603": 2.5441746711730957, + "604": 2.479951858520508, + "605": 2.4913501739501953, + "606": 2.483147382736206, + "607": 2.4557957649230957, + "608": 2.462026596069336, + "609": 2.488463878631592, + "610": 2.495217800140381, + "611": 2.477020740509033, + "612": 2.5005712509155273, + "613": 2.4987125396728516, + "614": 2.522017478942871, + "615": 2.518216609954834, + "616": 2.5110747814178467, + "617": 2.5315303802490234, + "618": 2.5348381996154785, + "619": 2.504847526550293, + "620": 2.493001937866211, + "621": 2.4996390342712402, + "622": 2.4671339988708496, + "623": 2.4714293479919434, + "624": 2.472681999206543, + "625": 2.44960880279541, + "626": 2.457479953765869, + "627": 2.498391628265381, + "628": 2.495114326477051, + "629": 2.4832682609558105, + "630": 2.492354393005371, + "631": 2.5123190879821777, + "632": 2.5073142051696777, + "633": 2.4813597202301025, + "634": 2.4902448654174805, + "635": 2.45639705657959, + "636": 2.5060043334960938, + "637": 2.5054495334625244, + "638": 2.5122714042663574, + "639": 2.452633857727051, + "640": 2.446535587310791, + "641": 2.4790456295013428, + "642": 2.4660730361938477, + "643": 2.4774904251098633, + "644": 2.4870049953460693, + "645": 2.4797635078430176, + "646": 2.5009849071502686, + "647": 2.48178768157959 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "train_epoch_time": 4.790262937545776, + "train_loss": 2.498673621551207, + "train_score": 0.2863712786944046, + "val_loss": 2.522901584305528, + "val_score": 0.27530317147071 + }, + { + "epoch": 12, + "grad_norm": 0.33665525913238525, + "learning_rate": 0.215, + "model_norm": 91.08087158203125, + "step_logs": { + "grad_norm": { + "648": 0.9625231623649597, + "649": 0.8293014168739319, + "650": 0.7890959978103638, + "651": 0.7312183976173401, + "652": 0.7107807397842407, + "653": 0.734476625919342, + "654": 0.7616944909095764, + "655": 0.8186032772064209, + "656": 0.8154640197753906, + "657": 0.6950883865356445, + "658": 0.5980730056762695, + "659": 0.5517351031303406, + "660": 0.49539831280708313, + "661": 0.46470922231674194, + "662": 0.5018994808197021, + "663": 0.4947921633720398, + "664": 0.4292982220649719, + "665": 0.40979939699172974, + "666": 0.43825864791870117, + "667": 0.3937656879425049, + "668": 0.37566396594047546, + "669": 0.4169904589653015, + "670": 0.561857283115387, + "671": 0.536394476890564, + "672": 0.37791213393211365, + "673": 0.35284048318862915, + "674": 0.33954349160194397, + "675": 0.3375079035758972, + "676": 0.35773971676826477, + "677": 0.36039799451828003, + "678": 0.3521564304828644, + "679": 0.3392495810985565, + "680": 0.31043189764022827, + "681": 0.3435986340045929, + "682": 0.37561917304992676, + "683": 0.44684740900993347, + "684": 0.41413599252700806, + "685": 0.3778912425041199, + "686": 0.34594035148620605, + "687": 0.3463926613330841, + "688": 0.31765952706336975, + "689": 0.2990760803222656, + "690": 0.32804200053215027, + "691": 0.3435922861099243, + "692": 0.2888297140598297, + "693": 0.28180789947509766, + "694": 0.24968475103378296, + "695": 0.23767679929733276, + "696": 0.29190051555633545, + "697": 0.3467346429824829, + "698": 0.43864163756370544, + "699": 0.45127299427986145, + "700": 0.36476215720176697, + "701": 0.33665525913238525 + }, + "loss": { + "648": 2.490993022918701, + "649": 2.4783997535705566, + "650": 2.4501852989196777, + "651": 2.4527153968811035, + "652": 2.4886221885681152, + "653": 2.463770866394043, + "654": 2.4613871574401855, + "655": 2.4705238342285156, + "656": 2.463290214538574, + "657": 2.446429491043091, + "658": 2.4535303115844727, + "659": 2.4461350440979004, + "660": 2.4382517337799072, + "661": 2.4183411598205566, + "662": 2.437877893447876, + "663": 2.4607834815979004, + "664": 2.441830635070801, + "665": 2.453338384628296, + "666": 2.4214272499084473, + "667": 2.422410488128662, + "668": 2.4281363487243652, + "669": 2.4038569927215576, + "670": 2.429398536682129, + "671": 2.4363975524902344, + "672": 2.4295310974121094, + "673": 2.4072105884552, + "674": 2.4294848442077637, + "675": 2.414706230163574, + "676": 2.413327217102051, + "677": 2.4182066917419434, + "678": 2.4071362018585205, + "679": 2.4100942611694336, + "680": 2.4204585552215576, + "681": 2.4130163192749023, + "682": 2.4163315296173096, + "683": 2.4212396144866943, + "684": 2.4241511821746826, + "685": 2.422750949859619, + "686": 2.4250311851501465, + "687": 2.4284510612487793, + "688": 2.413712501525879, + "689": 2.398535966873169, + "690": 2.420346736907959, + "691": 2.3918254375457764, + "692": 2.394608497619629, + "693": 2.3774867057800293, + "694": 2.4036495685577393, + "695": 2.425084114074707, + "696": 2.3976783752441406, + "697": 2.418344736099243, + "698": 2.3837039470672607, + "699": 2.4117681980133057, + "700": 2.4184937477111816, + "701": 2.3901455402374268 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "train_epoch_time": 4.788414716720581, + "train_loss": 2.4019142271969547, + "train_score": 0.32063531212006274, + "val_loss": 2.4292206153803932, + "val_score": 0.312114308984901 + }, + { + "epoch": 13, + "grad_norm": 0.25203627347946167, + "learning_rate": 0.14333333333333334, + "model_norm": 91.0897216796875, + "step_logs": { + "grad_norm": { + "702": 0.3300390839576721, + "703": 0.322391152381897, + "704": 0.30782660841941833, + "705": 0.2713398039340973, + "706": 0.3217991590499878, + "707": 0.3222607672214508, + "708": 0.2914176285266876, + "709": 0.27889591455459595, + "710": 0.27837029099464417, + "711": 0.3076034188270569, + "712": 0.28038936853408813, + "713": 0.356416791677475, + "714": 0.4073687791824341, + "715": 0.3918367326259613, + "716": 0.3988860845565796, + "717": 0.3759671747684479, + "718": 0.3604384660720825, + "719": 0.29835426807403564, + "720": 0.24581210315227509, + "721": 0.24378716945648193, + "722": 0.26598456501960754, + "723": 0.2564581036567688, + "724": 0.276718407869339, + "725": 0.2902151644229889, + "726": 0.3073812425136566, + "727": 0.27006322145462036, + "728": 0.24807585775852203, + "729": 0.23630265891551971, + "730": 0.23328597843647003, + "731": 0.2882058918476105, + "732": 0.27470651268959045, + "733": 0.22985504567623138, + "734": 0.224942147731781, + "735": 0.2476918250322342, + "736": 0.22961129248142242, + "737": 0.2383800446987152, + "738": 0.25636932253837585, + "739": 0.25010326504707336, + "740": 0.2347656637430191, + "741": 0.2664845585823059, + "742": 0.23746050894260406, + "743": 0.2609820067882538, + "744": 0.2527933418750763, + "745": 0.21657411754131317, + "746": 0.2503909170627594, + "747": 0.2367532104253769, + "748": 0.23744042217731476, + "749": 0.22273008525371552, + "750": 0.26137059926986694, + "751": 0.2341543436050415, + "752": 0.2515299320220947, + "753": 0.23059368133544922, + "754": 0.25316309928894043, + "755": 0.25203627347946167 + }, + "loss": { + "702": 2.387542247772217, + "703": 2.39223575592041, + "704": 2.3969063758850098, + "705": 2.391695499420166, + "706": 2.4152441024780273, + "707": 2.4017415046691895, + "708": 2.4039173126220703, + "709": 2.407346248626709, + "710": 2.4016051292419434, + "711": 2.3947181701660156, + "712": 2.3945980072021484, + "713": 2.407212972640991, + "714": 2.405872344970703, + "715": 2.414902687072754, + "716": 2.4006147384643555, + "717": 2.420238971710205, + "718": 2.415431022644043, + "719": 2.39057993888855, + "720": 2.3832461833953857, + "721": 2.4011502265930176, + "722": 2.4127063751220703, + "723": 2.3960819244384766, + "724": 2.376065254211426, + "725": 2.3859333992004395, + "726": 2.398648738861084, + "727": 2.369152069091797, + "728": 2.383495807647705, + "729": 2.3993654251098633, + "730": 2.4021952152252197, + "731": 2.3936712741851807, + "732": 2.387683391571045, + "733": 2.3873724937438965, + "734": 2.3726930618286133, + "735": 2.3907036781311035, + "736": 2.3885369300842285, + "737": 2.3717007637023926, + "738": 2.3812522888183594, + "739": 2.3822555541992188, + "740": 2.384267568588257, + "741": 2.3919520378112793, + "742": 2.4204275608062744, + "743": 2.3882110118865967, + "744": 2.385528564453125, + "745": 2.389493227005005, + "746": 2.3675942420959473, + "747": 2.397498369216919, + "748": 2.3713765144348145, + "749": 2.3653931617736816, + "750": 2.3721046447753906, + "751": 2.3684959411621094, + "752": 2.3836441040039062, + "753": 2.388432741165161, + "754": 2.3816676139831543, + "755": 2.379958152770996 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "train_epoch_time": 4.789869785308838, + "train_loss": 2.3827254545739938, + "train_score": 0.3249237803595309, + "val_loss": 2.411594339134225, + "val_score": 0.3167201848240862 + }, + { + "epoch": 14, + "grad_norm": 0.2197117954492569, + "learning_rate": 0.07166666666666667, + "model_norm": 91.09257507324219, + "step_logs": { + "grad_norm": { + "756": 0.2761439085006714, + "757": 0.24706952273845673, + "758": 0.22961826622486115, + "759": 0.26658567786216736, + "760": 0.301471084356308, + "761": 0.2331073135137558, + "762": 0.2244357317686081, + "763": 0.23235604166984558, + "764": 0.23938170075416565, + "765": 0.23986923694610596, + "766": 0.24092619121074677, + "767": 0.27001503109931946, + "768": 0.2521958649158478, + "769": 0.21570946276187897, + "770": 0.2512192130088806, + "771": 0.2271350473165512, + "772": 0.2642590403556824, + "773": 0.25384005904197693, + "774": 0.24231238663196564, + "775": 0.22705785930156708, + "776": 0.23054121434688568, + "777": 0.2104002833366394, + "778": 0.2288323938846588, + "779": 0.2242511510848999, + "780": 0.2163362205028534, + "781": 0.20032843947410583, + "782": 0.24287815392017365, + "783": 0.2637001872062683, + "784": 0.20668497681617737, + "785": 0.25995853543281555, + "786": 0.2332717329263687, + "787": 0.27230343222618103, + "788": 0.2583862543106079, + "789": 0.2421560138463974, + "790": 0.2332403063774109, + "791": 0.22376008331775665, + "792": 0.22356942296028137, + "793": 0.2044358104467392, + "794": 0.23896855115890503, + "795": 0.20602770149707794, + "796": 0.23659393191337585, + "797": 0.21350815892219543, + "798": 0.23285506665706635, + "799": 0.2323119193315506, + "800": 0.20167531073093414, + "801": 0.21216879785060883, + "802": 0.23666132986545563, + "803": 0.22863946855068207, + "804": 0.227693572640419, + "805": 0.2767631709575653, + "806": 0.22245074808597565, + "807": 0.2513234615325928, + "808": 0.25680112838745117, + "809": 0.2197117954492569 + }, + "loss": { + "756": 2.3960371017456055, + "757": 2.393949508666992, + "758": 2.398939609527588, + "759": 2.3947665691375732, + "760": 2.3522768020629883, + "761": 2.397989273071289, + "762": 2.395103693008423, + "763": 2.3695473670959473, + "764": 2.3735008239746094, + "765": 2.3729841709136963, + "766": 2.398881435394287, + "767": 2.3834304809570312, + "768": 2.393691301345825, + "769": 2.378028392791748, + "770": 2.3891730308532715, + "771": 2.375607490539551, + "772": 2.381958484649658, + "773": 2.3937265872955322, + "774": 2.400770902633667, + "775": 2.3694400787353516, + "776": 2.3539044857025146, + "777": 2.3629379272460938, + "778": 2.3816232681274414, + "779": 2.3935296535491943, + "780": 2.3777899742126465, + "781": 2.3724355697631836, + "782": 2.372957468032837, + "783": 2.389726161956787, + "784": 2.374579429626465, + "785": 2.376941204071045, + "786": 2.3771867752075195, + "787": 2.385077714920044, + "788": 2.3737740516662598, + "789": 2.3993072509765625, + "790": 2.3594346046447754, + "791": 2.384274959564209, + "792": 2.375826835632324, + "793": 2.3713130950927734, + "794": 2.387220859527588, + "795": 2.37376070022583, + "796": 2.375234842300415, + "797": 2.3680973052978516, + "798": 2.3630502223968506, + "799": 2.3770058155059814, + "800": 2.354630470275879, + "801": 2.3690831661224365, + "802": 2.36740779876709, + "803": 2.371968984603882, + "804": 2.3626205921173096, + "805": 2.3998422622680664, + "806": 2.3764305114746094, + "807": 2.3635830879211426, + "808": 2.403512716293335, + "809": 2.376469850540161 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "train_epoch_time": 4.789149284362793, + "train_loss": 2.3769338460016454, + "train_score": 0.32556940454834676, + "val_loss": 2.4063635773007275, + "val_score": 0.31698030318923714 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:28:10.393864", + "final_model_norm": 91.09257507324219, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:26:29.625420", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 2.429553508758545, + "learning_rate": 2.15e-11, + "model_norm": 89.89427947998047, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.733994483947754, + "3": 8.656057357788086, + "4": 20.534622192382812, + "5": 6.440240859985352, + "6": 5.055216312408447, + "7": 3.8297367095947266, + "8": 4.107420444488525, + "9": 4.2545857429504395, + "10": 4.459016799926758, + "11": 5.157927513122559, + "12": 4.648646831512451, + "13": 71.50898742675781, + "14": 357.7301330566406, + "15": 3.219618320465088, + "16": 4.466315269470215, + "17": 3.305650472640991, + "18": 1.2785460948944092, + "19": 1.3744328022003174, + "20": 2.1724610328674316, + "21": 8.90324878692627, + "22": 3.4738388061523438, + "23": 3.4021096229553223, + "24": 4.153079032897949, + "25": 13.375604629516602, + "26": 4.175424575805664, + "27": 4.691089630126953, + "28": 3.8858461380004883, + "29": 3.612222909927368, + "30": 11.23609733581543, + "31": 4.208217144012451, + "32": 3.885915756225586, + "33": 3.747910499572754, + "34": 7.2109599113464355, + "35": 15.730438232421875, + "36": 6.669101238250732, + "37": 5.69589376449585, + "38": 4.972746849060059, + "39": 7.909084320068359, + "40": 4.504845142364502, + "41": 13.990959167480469, + "42": 6.469381332397461, + "43": 4.600672721862793, + "44": 4.368273735046387, + "45": 4.056450366973877, + "46": 3.6727654933929443, + "47": 14.389432907104492, + "48": 7.941073894500732, + "49": 4.657362461090088, + "50": 4.3772382736206055, + "51": 3.771397113800049, + "52": 3.2211754322052, + "53": 2.429553508758545 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.8340747356414795, + "3": 3.709585666656494, + "4": 4.2086381912231445, + "5": 4.403763771057129, + "6": 3.881211519241333, + "7": 3.5605177879333496, + "8": 3.4620606899261475, + "9": 3.549063205718994, + "10": 3.461212635040283, + "11": 3.5233871936798096, + "12": 3.3178398609161377, + "13": 3.916867256164551, + "14": 5.120562553405762, + "15": 4.033894062042236, + "16": 3.7446446418762207, + "17": 4.082850933074951, + "18": 3.466677188873291, + "19": 3.383971691131592, + "20": 3.504695415496826, + "21": 3.826809883117676, + "22": 5.354043960571289, + "23": 4.4896559715271, + "24": 3.637166738510132, + "25": 4.800013542175293, + "26": 7.301655292510986, + "27": 6.175267696380615, + "28": 5.3836870193481445, + "29": 4.045472145080566, + "30": 4.214983940124512, + "31": 7.163357734680176, + "32": 6.275996208190918, + "33": 4.783047199249268, + "34": 3.9009814262390137, + "35": 7.055430889129639, + "36": 11.038154602050781, + "37": 9.603250503540039, + "38": 7.779790878295898, + "39": 6.452442169189453, + "40": 5.659243583679199, + "41": 6.63803243637085, + "42": 10.90620231628418, + "43": 9.19009017944336, + "44": 7.223609924316406, + "45": 5.790656089782715, + "46": 4.32966423034668, + "47": 6.517666339874268, + "48": 10.89421272277832, + "49": 10.847827911376953, + "50": 8.655927658081055, + "51": 6.700267791748047, + "52": 4.685117721557617, + "53": 3.6778411865234375 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "train_epoch_time": 4.789998531341553, + "train_loss": 3.778417190007192, + "train_score": 0.13783514165296787, + "val_loss": 3.776106384126245, + "val_score": 0.13452568864097003 + }, + { + "epoch": 1, + "grad_norm": 0.6967712044715881, + "learning_rate": 0.215, + "model_norm": 89.84645080566406, + "step_logs": { + "grad_norm": { + "54": 1.7600692510604858, + "55": 6.95321798324585, + "56": 3.4420149326324463, + "57": 2.800166130065918, + "58": 2.639221668243408, + "59": 2.2791171073913574, + "60": 2.3787615299224854, + "61": 5.642912864685059, + "62": 2.712333917617798, + "63": 2.2643442153930664, + "64": 1.639472246170044, + "65": 4.633570671081543, + "66": 2.0411229133605957, + "67": 1.8406808376312256, + "68": 5.9409379959106445, + "69": 2.588007926940918, + "70": 1.9243590831756592, + "71": 1.6075596809387207, + "72": 0.7534778118133545, + "73": 1.6058335304260254, + "74": 9.571342468261719, + "75": 2.1735141277313232, + "76": 0.8565264344215393, + "77": 1.656238079071045, + "78": 9.02771282196045, + "79": 4.570710182189941, + "80": 2.4140424728393555, + "81": 1.9558131694793701, + "82": 1.6673396825790405, + "83": 1.4261865615844727, + "84": 0.6584086418151855, + "85": 1.119946837425232, + "86": 1.3256529569625854, + "87": 2.158568859100342, + "88": 1.4991145133972168, + "89": 0.5431514382362366, + "90": 0.9848825931549072, + "91": 1.1982654333114624, + "92": 2.015868902206421, + "93": 1.463441014289856, + "94": 0.33475884795188904, + "95": 0.46374404430389404, + "96": 0.6652947664260864, + "97": 1.3676626682281494, + "98": 1.327052354812622, + "99": 1.0718811750411987, + "100": 1.1613391637802124, + "101": 1.4255242347717285, + "102": 1.275913953781128, + "103": 0.7472560405731201, + "104": 0.8954002261161804, + "105": 1.3840980529785156, + "106": 1.2118228673934937, + "107": 0.6967712044715881 + }, + "loss": { + "54": 3.782855987548828, + "55": 3.918456554412842, + "56": 6.271005630493164, + "57": 5.484335899353027, + "58": 4.090142250061035, + "59": 3.589216709136963, + "60": 3.927196979522705, + "61": 3.7309350967407227, + "62": 5.297145843505859, + "63": 4.52878999710083, + "64": 3.600069046020508, + "65": 3.644352674484253, + "66": 4.423394680023193, + "67": 3.611009120941162, + "68": 3.770536184310913, + "69": 5.148294448852539, + "70": 4.328619956970215, + "71": 3.677562713623047, + "72": 3.3822851181030273, + "73": 3.479600429534912, + "74": 4.305522918701172, + "75": 4.096094131469727, + "76": 3.3589093685150146, + "77": 3.4561381340026855, + "78": 4.316786766052246, + "79": 6.879027366638184, + "80": 6.11417818069458, + "81": 5.050774097442627, + "82": 4.30275297164917, + "83": 3.7803187370300293, + "84": 3.4178764820098877, + "85": 3.3938331604003906, + "86": 3.49715518951416, + "87": 3.4492218494415283, + "88": 3.701573371887207, + "89": 3.3722610473632812, + "90": 3.3772826194763184, + "91": 3.4302945137023926, + "92": 3.4313745498657227, + "93": 3.6659979820251465, + "94": 3.3707165718078613, + "95": 3.333737373352051, + "96": 3.3669753074645996, + "97": 3.392742156982422, + "98": 3.5068230628967285, + "99": 3.3893918991088867, + "100": 3.437774658203125, + "101": 3.3910889625549316, + "102": 3.5048890113830566, + "103": 3.3261966705322266, + "104": 3.4144062995910645, + "105": 3.3719491958618164, + "106": 3.473080635070801, + "107": 3.329042911529541 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "train_epoch_time": 4.78846001625061, + "train_loss": 3.3715476909703126, + "train_score": 0.08542974356941377, + "val_loss": 3.3898103467085057, + "val_score": 0.08248869838657116 + }, + { + "epoch": 2, + "grad_norm": 0.6376702785491943, + "learning_rate": 0.215, + "model_norm": 89.86298370361328, + "step_logs": { + "grad_norm": { + "108": 0.863978922367096, + "109": 1.4014583826065063, + "110": 1.223598599433899, + "111": 0.5734009146690369, + "112": 0.6804056167602539, + "113": 1.1065216064453125, + "114": 1.067942500114441, + "115": 0.8983179926872253, + "116": 0.9495359063148499, + "117": 1.0689358711242676, + "118": 1.0224673748016357, + "119": 0.8617774248123169, + "120": 0.854465663433075, + "121": 0.8484242558479309, + "122": 0.8702432513237, + "123": 0.8752347230911255, + "124": 0.9114002585411072, + "125": 1.0401203632354736, + "126": 0.9841242432594299, + "127": 0.8059213757514954, + "128": 0.8353803157806396, + "129": 0.9692856669425964, + "130": 0.9037263989448547, + "131": 0.6930402517318726, + "132": 0.7376375794410706, + "133": 0.862440288066864, + "134": 0.8603819012641907, + "135": 0.8117675185203552, + "136": 0.7948665022850037, + "137": 0.7000136971473694, + "138": 0.7195701003074646, + "139": 0.8498217463493347, + "140": 0.8268479108810425, + "141": 0.7609023451805115, + "142": 0.7767173647880554, + "143": 0.7971246242523193, + "144": 0.7868272066116333, + "145": 0.8084514141082764, + "146": 0.8119480609893799, + "147": 0.808192253112793, + "148": 0.838414192199707, + "149": 0.8663260340690613, + "150": 0.8310330510139465, + "151": 0.7649414539337158, + "152": 0.7240082621574402, + "153": 0.6730470657348633, + "154": 0.6659411787986755, + "155": 0.6326982378959656, + "156": 0.6284421682357788, + "157": 0.5721337795257568, + "158": 0.5678961873054504, + "159": 0.570471465587616, + "160": 0.5975511074066162, + "161": 0.6376702785491943 + }, + "loss": { + "108": 3.346294641494751, + "109": 3.3726882934570312, + "110": 3.5092005729675293, + "111": 3.295762538909912, + "112": 3.3364429473876953, + "113": 3.3828094005584717, + "114": 3.4595792293548584, + "115": 3.351487636566162, + "116": 3.4365453720092773, + "117": 3.3558294773101807, + "118": 3.389944553375244, + "119": 3.3285160064697266, + "120": 3.3939123153686523, + "121": 3.3478825092315674, + "122": 3.3591840267181396, + "123": 3.3183021545410156, + "124": 3.3832719326019287, + "125": 3.3444571495056152, + "126": 3.416065216064453, + "127": 3.3270702362060547, + "128": 3.390368700027466, + "129": 3.3990607261657715, + "130": 3.425734043121338, + "131": 3.3351869583129883, + "132": 3.3583717346191406, + "133": 3.3464856147766113, + "134": 3.365583896636963, + "135": 3.3311767578125, + "136": 3.3878769874572754, + "137": 3.2899458408355713, + "138": 3.3204338550567627, + "139": 3.3566741943359375, + "140": 3.348726749420166, + "141": 3.3130178451538086, + "142": 3.327881336212158, + "143": 3.321107864379883, + "144": 3.3257904052734375, + "145": 3.3359487056732178, + "146": 3.3460803031921387, + "147": 3.312148094177246, + "148": 3.316803455352783, + "149": 3.3127455711364746, + "150": 3.350639581680298, + "151": 3.3072245121002197, + "152": 3.3171613216400146, + "153": 3.307497501373291, + "154": 3.3380799293518066, + "155": 3.293321132659912, + "156": 3.2975564002990723, + "157": 3.255033493041992, + "158": 3.2581634521484375, + "159": 3.2486581802368164, + "160": 3.240307092666626, + "161": 3.2430601119995117 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "train_epoch_time": 4.7881128787994385, + "train_loss": 3.2409612587226855, + "train_score": 0.15480182924691369, + "val_loss": 3.2546099600917846, + "val_score": 0.1502314148177783 + }, + { + "epoch": 3, + "grad_norm": 0.6691382527351379, + "learning_rate": 0.215, + "model_norm": 89.89545440673828, + "step_logs": { + "grad_norm": { + "162": 0.6216800212860107, + "163": 0.5795353651046753, + "164": 0.5379090905189514, + "165": 0.5064986944198608, + "166": 0.5122657418251038, + "167": 0.5290499329566956, + "168": 0.6136422157287598, + "169": 0.6900139451026917, + "170": 0.6494053602218628, + "171": 0.6496060490608215, + "172": 0.8002558946609497, + "173": 0.8278506398200989, + "174": 0.7854248285293579, + "175": 0.7602683305740356, + "176": 0.8265597224235535, + "177": 0.9743447303771973, + "178": 0.9849590063095093, + "179": 0.9550031423568726, + "180": 0.9158548712730408, + "181": 0.8523992300033569, + "182": 0.8869295120239258, + "183": 0.8968014121055603, + "184": 0.8628901839256287, + "185": 0.83315110206604, + "186": 0.7910624146461487, + "187": 0.7288994789123535, + "188": 0.7334252595901489, + "189": 0.7752164006233215, + "190": 0.7915940284729004, + "191": 0.8068891763687134, + "192": 0.8567906022071838, + "193": 0.8967894911766052, + "194": 0.8805527091026306, + "195": 0.8582144975662231, + "196": 0.7758252620697021, + "197": 0.6706097722053528, + "198": 0.6795414686203003, + "199": 0.7499302625656128, + "200": 0.735964834690094, + "201": 0.6751560568809509, + "202": 0.6224300861358643, + "203": 0.6208912134170532, + "204": 0.6566837430000305, + "205": 0.6400348544120789, + "206": 0.7055973410606384, + "207": 0.7964776754379272, + "208": 0.725046694278717, + "209": 0.6219080090522766, + "210": 0.6124748587608337, + "211": 0.6556366086006165, + "212": 0.6675980091094971, + "213": 0.6274852752685547, + "214": 0.611222505569458, + "215": 0.6691382527351379 + }, + "loss": { + "162": 3.2319462299346924, + "163": 3.219452381134033, + "164": 3.2075250148773193, + "165": 3.201167106628418, + "166": 3.1718106269836426, + "167": 3.144150972366333, + "168": 3.1475160121917725, + "169": 3.1607632637023926, + "170": 3.177461624145508, + "171": 3.130967140197754, + "172": 3.136554718017578, + "173": 3.1449737548828125, + "174": 3.1375277042388916, + "175": 3.099252223968506, + "176": 3.1178438663482666, + "177": 3.1382901668548584, + "178": 3.156012535095215, + "179": 3.0988309383392334, + "180": 3.131251811981201, + "181": 3.110795259475708, + "182": 3.096956253051758, + "183": 3.0759496688842773, + "184": 3.0825419425964355, + "185": 3.060507297515869, + "186": 3.076098918914795, + "187": 3.0639452934265137, + "188": 3.046398162841797, + "189": 3.065018653869629, + "190": 3.0406084060668945, + "191": 3.0320305824279785, + "192": 3.0403335094451904, + "193": 3.0507290363311768, + "194": 3.07023024559021, + "195": 3.052114486694336, + "196": 3.0559685230255127, + "197": 3.005636215209961, + "198": 3.0089898109436035, + "199": 3.009814500808716, + "200": 3.015291452407837, + "201": 2.9974365234375, + "202": 2.9953560829162598, + "203": 3.005653142929077, + "204": 2.987767219543457, + "205": 2.9744162559509277, + "206": 2.9824533462524414, + "207": 2.9882657527923584, + "208": 3.0003840923309326, + "209": 2.9617490768432617, + "210": 2.9827628135681152, + "211": 2.975161552429199, + "212": 2.96435546875, + "213": 2.9418067932128906, + "214": 2.985100030899048, + "215": 2.9532151222229004 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "train_epoch_time": 4.788455963134766, + "train_loss": 2.970902385328558, + "train_score": 0.17343077473609655, + "val_loss": 2.990309273471241, + "val_score": 0.17139512791494005 + }, + { + "epoch": 4, + "grad_norm": 0.7655919790267944, + "learning_rate": 0.215, + "model_norm": 89.91508483886719, + "step_logs": { + "grad_norm": { + "216": 0.6569374799728394, + "217": 0.5437264442443848, + "218": 0.48358988761901855, + "219": 0.4807252585887909, + "220": 0.4809775650501251, + "221": 0.4261274039745331, + "222": 0.4274457097053528, + "223": 0.43906083703041077, + "224": 0.4313525855541229, + "225": 0.5159060955047607, + "226": 0.6199490427970886, + "227": 0.6708369851112366, + "228": 0.7206170558929443, + "229": 0.8778797388076782, + "230": 1.0434292554855347, + "231": 1.012017011642456, + "232": 0.8054400086402893, + "233": 0.6809650659561157, + "234": 0.5923705101013184, + "235": 0.589709997177124, + "236": 0.643692672252655, + "237": 0.7189840078353882, + "238": 0.8345604538917542, + "239": 0.821356475353241, + "240": 0.758682370185852, + "241": 0.7388073801994324, + "242": 0.7493603229522705, + "243": 0.7750712633132935, + "244": 0.7239165902137756, + "245": 0.7388054132461548, + "246": 0.7830007672309875, + "247": 0.7371181845664978, + "248": 0.6785892248153687, + "249": 0.7478886842727661, + "250": 0.7603949904441833, + "251": 0.6679902076721191, + "252": 0.6268831491470337, + "253": 0.745229184627533, + "254": 0.8539769053459167, + "255": 0.849469780921936, + "256": 0.8347134590148926, + "257": 0.8746486306190491, + "258": 0.8160930275917053, + "259": 0.69259113073349, + "260": 0.5686430335044861, + "261": 0.5244439244270325, + "262": 0.4675756096839905, + "263": 0.42535269260406494, + "264": 0.39161190390586853, + "265": 0.414936900138855, + "266": 0.5057854652404785, + "267": 0.6051677465438843, + "268": 0.6840121746063232, + "269": 0.7655919790267944 + }, + "loss": { + "216": 2.959449052810669, + "217": 2.928818702697754, + "218": 2.9616847038269043, + "219": 2.955103635787964, + "220": 2.963385581970215, + "221": 2.9368133544921875, + "222": 2.9342844486236572, + "223": 2.9423367977142334, + "224": 2.9325857162475586, + "225": 2.9307456016540527, + "226": 2.9643044471740723, + "227": 2.920894145965576, + "228": 2.9425387382507324, + "229": 2.960052490234375, + "230": 2.969963788986206, + "231": 2.980578899383545, + "232": 2.957820415496826, + "233": 2.952867031097412, + "234": 2.9020159244537354, + "235": 2.9305548667907715, + "236": 2.9248666763305664, + "237": 2.936262369155884, + "238": 2.9247281551361084, + "239": 2.9441075325012207, + "240": 2.9048638343811035, + "241": 2.928342342376709, + "242": 2.9197912216186523, + "243": 2.9355950355529785, + "244": 2.9159674644470215, + "245": 2.9187963008880615, + "246": 2.9043824672698975, + "247": 2.91329288482666, + "248": 2.8938283920288086, + "249": 2.909095287322998, + "250": 2.889674425125122, + "251": 2.898662567138672, + "252": 2.888793468475342, + "253": 2.9009788036346436, + "254": 2.8788414001464844, + "255": 2.9252638816833496, + "256": 2.891234874725342, + "257": 2.902353286743164, + "258": 2.9111204147338867, + "259": 2.9013242721557617, + "260": 2.8581056594848633, + "261": 2.898707628250122, + "262": 2.8596737384796143, + "263": 2.858137845993042, + "264": 2.863687515258789, + "265": 2.8653366565704346, + "266": 2.884807586669922, + "267": 2.874598979949951, + "268": 2.8718433380126953, + "269": 2.878401279449463 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "train_epoch_time": 4.788722276687622, + "train_loss": 2.8829704161524945, + "train_score": 0.20100542499311, + "val_loss": 2.9084927285716673, + "val_score": 0.19775670960152053 + }, + { + "epoch": 5, + "grad_norm": 0.5735481381416321, + "learning_rate": 0.215, + "model_norm": 89.9293212890625, + "step_logs": { + "grad_norm": { + "270": 0.796120285987854, + "271": 0.8118538856506348, + "272": 0.7633218765258789, + "273": 0.6854509711265564, + "274": 0.6247511506080627, + "275": 0.6082665920257568, + "276": 0.6928306221961975, + "277": 0.7025632262229919, + "278": 0.6927878260612488, + "279": 0.7126378417015076, + "280": 0.6892486214637756, + "281": 0.6750990152359009, + "282": 0.7017691135406494, + "283": 0.770732581615448, + "284": 0.7970418334007263, + "285": 0.7432883381843567, + "286": 0.7211617231369019, + "287": 0.7396063804626465, + "288": 0.7051949501037598, + "289": 0.6790603995323181, + "290": 0.6845196485519409, + "291": 0.7265959978103638, + "292": 0.7176300883293152, + "293": 0.6816290020942688, + "294": 0.6067876815795898, + "295": 0.5847397446632385, + "296": 0.6183571219444275, + "297": 0.6058297753334045, + "298": 0.5975099205970764, + "299": 0.5996508002281189, + "300": 0.5979251861572266, + "301": 0.5816647410392761, + "302": 0.578787088394165, + "303": 0.5957953929901123, + "304": 0.6086187958717346, + "305": 0.671294093132019, + "306": 0.7799011468887329, + "307": 0.74623703956604, + "308": 0.6684860587120056, + "309": 0.6856642365455627, + "310": 0.7539567947387695, + "311": 0.6747231483459473, + "312": 0.6263933181762695, + "313": 0.6678944230079651, + "314": 0.7819444537162781, + "315": 0.7640763521194458, + "316": 0.644639790058136, + "317": 0.6193239688873291, + "318": 0.624279260635376, + "319": 0.6400131583213806, + "320": 0.6569095849990845, + "321": 0.661838173866272, + "322": 0.598699688911438, + "323": 0.5735481381416321 + }, + "loss": { + "270": 2.885068893432617, + "271": 2.892613649368286, + "272": 2.874335765838623, + "273": 2.882042407989502, + "274": 2.8603515625, + "275": 2.863640069961548, + "276": 2.8464183807373047, + "277": 2.8889899253845215, + "278": 2.8550949096679688, + "279": 2.880115509033203, + "280": 2.8660616874694824, + "281": 2.8706936836242676, + "282": 2.8713297843933105, + "283": 2.8797903060913086, + "284": 2.8528199195861816, + "285": 2.8667685985565186, + "286": 2.8620505332946777, + "287": 2.860910177230835, + "288": 2.847256660461426, + "289": 2.860405921936035, + "290": 2.8828535079956055, + "291": 2.8549137115478516, + "292": 2.8421316146850586, + "293": 2.835737705230713, + "294": 2.843327045440674, + "295": 2.849231719970703, + "296": 2.857534408569336, + "297": 2.848630905151367, + "298": 2.8949599266052246, + "299": 2.8469786643981934, + "300": 2.8290023803710938, + "301": 2.850537061691284, + "302": 2.8316574096679688, + "303": 2.8586585521698, + "304": 2.845435619354248, + "305": 2.846201181411743, + "306": 2.838244915008545, + "307": 2.85735821723938, + "308": 2.8405256271362305, + "309": 2.830526828765869, + "310": 2.8481059074401855, + "311": 2.8509984016418457, + "312": 2.828366279602051, + "313": 2.840717315673828, + "314": 2.8361728191375732, + "315": 2.869767665863037, + "316": 2.8425450325012207, + "317": 2.833724021911621, + "318": 2.8254990577697754, + "319": 2.840945243835449, + "320": 2.8350791931152344, + "321": 2.832787036895752, + "322": 2.8447256088256836, + "323": 2.8329739570617676 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "train_epoch_time": 4.788723707199097, + "train_loss": 2.822763564562695, + "train_score": 0.229455478807946, + "val_loss": 2.847779504467234, + "val_score": 0.22466543510162734 + }, + { + "epoch": 6, + "grad_norm": 0.721624493598938, + "learning_rate": 0.215, + "model_norm": 89.94625854492188, + "step_logs": { + "grad_norm": { + "324": 0.5964891314506531, + "325": 0.6091309189796448, + "326": 0.6144530773162842, + "327": 0.5864863395690918, + "328": 0.5359792113304138, + "329": 0.5493559241294861, + "330": 0.5981965661048889, + "331": 0.5725377202033997, + "332": 0.5112264752388, + "333": 0.5338640809059143, + "334": 0.595308244228363, + "335": 0.6108565926551819, + "336": 0.6135203242301941, + "337": 0.6320286989212036, + "338": 0.6825681924819946, + "339": 0.7148227095603943, + "340": 0.6866228580474854, + "341": 0.6864261031150818, + "342": 0.6876449584960938, + "343": 0.721110999584198, + "344": 0.7774609923362732, + "345": 0.8076304793357849, + "346": 0.8222858309745789, + "347": 0.8402779698371887, + "348": 0.8327051997184753, + "349": 0.8058454394340515, + "350": 0.723849356174469, + "351": 0.687146782875061, + "352": 0.8715532422065735, + "353": 0.731410801410675, + "354": 0.7635579705238342, + "355": 0.7563762068748474, + "356": 0.7085869312286377, + "357": 0.6681196689605713, + "358": 0.581696093082428, + "359": 0.6046619415283203, + "360": 0.6671164035797119, + "361": 0.6966533660888672, + "362": 0.7446925044059753, + "363": 0.7331202626228333, + "364": 0.696814775466919, + "365": 0.7409701943397522, + "366": 0.7478776574134827, + "367": 0.7537860870361328, + "368": 0.7984800934791565, + "369": 0.7961688041687012, + "370": 0.80507493019104, + "371": 0.7771061658859253, + "372": 0.7710264921188354, + "373": 0.7771677374839783, + "374": 0.7087388634681702, + "375": 0.6863659024238586, + "376": 0.67284095287323, + "377": 0.721624493598938 + }, + "loss": { + "324": 2.8285293579101562, + "325": 2.838834524154663, + "326": 2.8331103324890137, + "327": 2.8147382736206055, + "328": 2.8175084590911865, + "329": 2.8149991035461426, + "330": 2.823151111602783, + "331": 2.8318426609039307, + "332": 2.8063690662384033, + "333": 2.806149959564209, + "334": 2.8080615997314453, + "335": 2.794569492340088, + "336": 2.796664237976074, + "337": 2.8131580352783203, + "338": 2.815187454223633, + "339": 2.848173141479492, + "340": 2.8063628673553467, + "341": 2.833559036254883, + "342": 2.827437400817871, + "343": 2.821094274520874, + "344": 2.8275089263916016, + "345": 2.8373918533325195, + "346": 2.840057849884033, + "347": 2.8302066326141357, + "348": 2.8216428756713867, + "349": 2.822509288787842, + "350": 2.8014183044433594, + "351": 2.8154940605163574, + "352": 2.8080015182495117, + "353": 2.8117311000823975, + "354": 2.810521125793457, + "355": 2.8014698028564453, + "356": 2.785714864730835, + "357": 2.8231570720672607, + "358": 2.808838129043579, + "359": 2.8177099227905273, + "360": 2.7885584831237793, + "361": 2.806155204772949, + "362": 2.8029279708862305, + "363": 2.8152644634246826, + "364": 2.810152053833008, + "365": 2.8166346549987793, + "366": 2.797429323196411, + "367": 2.7932543754577637, + "368": 2.79209566116333, + "369": 2.8221168518066406, + "370": 2.7892518043518066, + "371": 2.804565906524658, + "372": 2.7962522506713867, + "373": 2.7812294960021973, + "374": 2.7683475017547607, + "375": 2.7984366416931152, + "376": 2.7574472427368164, + "377": 2.7779312133789062 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "train_epoch_time": 4.788815021514893, + "train_loss": 2.7804879842563888, + "train_score": 0.24339580349036236, + "val_loss": 2.8054805831164633, + "val_score": 0.23691787501427117 + }, + { + "epoch": 7, + "grad_norm": 0.8514337539672852, + "learning_rate": 0.215, + "model_norm": 89.96620178222656, + "step_logs": { + "grad_norm": { + "378": 0.7090153694152832, + "379": 0.6544874906539917, + "380": 0.6257517337799072, + "381": 0.658334493637085, + "382": 0.7453734874725342, + "383": 0.7668471336364746, + "384": 0.7865362167358398, + "385": 0.8101611137390137, + "386": 0.8240614533424377, + "387": 0.802543580532074, + "388": 0.7697931528091431, + "389": 0.745103120803833, + "390": 0.7492871284484863, + "391": 0.7381245493888855, + "392": 0.6822201013565063, + "393": 0.6685869097709656, + "394": 0.6939062476158142, + "395": 0.8011731505393982, + "396": 0.8890218734741211, + "397": 0.9615203142166138, + "398": 0.9776833057403564, + "399": 0.9332268238067627, + "400": 0.7564985752105713, + "401": 0.6738104820251465, + "402": 0.6722136735916138, + "403": 0.7064204216003418, + "404": 0.8017610311508179, + "405": 0.8405110239982605, + "406": 0.7676529884338379, + "407": 0.7388855218887329, + "408": 0.7953731417655945, + "409": 0.8301306366920471, + "410": 0.8346095085144043, + "411": 0.8095090389251709, + "412": 0.8254018425941467, + "413": 0.8188120722770691, + "414": 0.8567057847976685, + "415": 0.8541874289512634, + "416": 0.8077358603477478, + "417": 0.7974807620048523, + "418": 0.7672068476676941, + "419": 0.7511753439903259, + "420": 0.6982523202896118, + "421": 0.6772213578224182, + "422": 0.7334694862365723, + "423": 0.7740538716316223, + "424": 0.792039155960083, + "425": 0.8051450848579407, + "426": 0.8875683546066284, + "427": 0.8709783554077148, + "428": 0.7662844061851501, + "429": 0.7867828607559204, + "430": 0.8641607761383057, + "431": 0.8514337539672852 + }, + "loss": { + "378": 2.795520305633545, + "379": 2.774301767349243, + "380": 2.7756214141845703, + "381": 2.783689260482788, + "382": 2.8028249740600586, + "383": 2.7717487812042236, + "384": 2.7767343521118164, + "385": 2.800795555114746, + "386": 2.772435426712036, + "387": 2.792219400405884, + "388": 2.7631430625915527, + "389": 2.779628276824951, + "390": 2.7496178150177, + "391": 2.783785343170166, + "392": 2.7655348777770996, + "393": 2.7732787132263184, + "394": 2.762220859527588, + "395": 2.766599655151367, + "396": 2.7918457984924316, + "397": 2.785118579864502, + "398": 2.786693572998047, + "399": 2.8107094764709473, + "400": 2.7763500213623047, + "401": 2.7645668983459473, + "402": 2.738198757171631, + "403": 2.7353878021240234, + "404": 2.7589430809020996, + "405": 2.7586965560913086, + "406": 2.7454304695129395, + "407": 2.741732597351074, + "408": 2.742398738861084, + "409": 2.767534017562866, + "410": 2.760796546936035, + "411": 2.7551798820495605, + "412": 2.7555508613586426, + "413": 2.758802890777588, + "414": 2.7387495040893555, + "415": 2.760998249053955, + "416": 2.7372655868530273, + "417": 2.754990816116333, + "418": 2.7356529235839844, + "419": 2.7401700019836426, + "420": 2.7333812713623047, + "421": 2.7174830436706543, + "422": 2.735703706741333, + "423": 2.7497267723083496, + "424": 2.7364587783813477, + "425": 2.7370429039001465, + "426": 2.742906093597412, + "427": 2.7440524101257324, + "428": 2.7095909118652344, + "429": 2.7307381629943848, + "430": 2.7078957557678223, + "431": 2.7399444580078125 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "train_epoch_time": 4.78853440284729, + "train_loss": 2.717233853634324, + "train_score": 0.24648381458378932, + "val_loss": 2.7367622192636833, + "val_score": 0.24217853074985313 + }, + { + "epoch": 8, + "grad_norm": 0.7189271450042725, + "learning_rate": 0.215, + "model_norm": 89.98765563964844, + "step_logs": { + "grad_norm": { + "432": 0.7926420569419861, + "433": 0.7989777326583862, + "434": 0.8231915235519409, + "435": 0.8223428130149841, + "436": 0.777944803237915, + "437": 0.8000281453132629, + "438": 0.790794312953949, + "439": 0.8025705814361572, + "440": 0.9105353951454163, + "441": 0.9671114683151245, + "442": 0.963681697845459, + "443": 1.0239083766937256, + "444": 0.8694352507591248, + "445": 0.689085066318512, + "446": 0.6770842671394348, + "447": 0.7280488014221191, + "448": 0.7739311456680298, + "449": 0.795528769493103, + "450": 0.8310406804084778, + "451": 0.8493906855583191, + "452": 0.8626106381416321, + "453": 0.8665022850036621, + "454": 0.9031314253807068, + "455": 0.9011504054069519, + "456": 0.8754758834838867, + "457": 0.8766058683395386, + "458": 0.8711204528808594, + "459": 0.8336485624313354, + "460": 0.8183372020721436, + "461": 0.8496360778808594, + "462": 0.8110702633857727, + "463": 0.8053188323974609, + "464": 0.8417520523071289, + "465": 0.8457974791526794, + "466": 0.7822505831718445, + "467": 0.762436032295227, + "468": 0.7585875988006592, + "469": 0.7191105484962463, + "470": 0.7579717636108398, + "471": 0.8460325598716736, + "472": 0.7916812896728516, + "473": 0.7571032047271729, + "474": 0.8186747431755066, + "475": 0.833448052406311, + "476": 0.8681373000144958, + "477": 0.8861498236656189, + "478": 0.8977588415145874, + "479": 0.8764435052871704, + "480": 0.8431791663169861, + "481": 0.8432224988937378, + "482": 0.7808829545974731, + "483": 0.7327014207839966, + "484": 0.7027769088745117, + "485": 0.7189271450042725 + }, + "loss": { + "432": 2.726034164428711, + "433": 2.7230072021484375, + "434": 2.716484308242798, + "435": 2.7352614402770996, + "436": 2.7156851291656494, + "437": 2.7152185440063477, + "438": 2.6940643787384033, + "439": 2.694014072418213, + "440": 2.7165210247039795, + "441": 2.7441444396972656, + "442": 2.7323758602142334, + "443": 2.7198245525360107, + "444": 2.74222993850708, + "445": 2.678779363632202, + "446": 2.6904664039611816, + "447": 2.6978626251220703, + "448": 2.719388008117676, + "449": 2.7081289291381836, + "450": 2.6950571537017822, + "451": 2.71567440032959, + "452": 2.706803321838379, + "453": 2.720440626144409, + "454": 2.686661720275879, + "455": 2.707317590713501, + "456": 2.682138442993164, + "457": 2.7008464336395264, + "458": 2.6916580200195312, + "459": 2.696120500564575, + "460": 2.6658854484558105, + "461": 2.6969757080078125, + "462": 2.6969947814941406, + "463": 2.6731390953063965, + "464": 2.6696786880493164, + "465": 2.7056665420532227, + "466": 2.6599316596984863, + "467": 2.682149887084961, + "468": 2.683171272277832, + "469": 2.678713083267212, + "470": 2.658048629760742, + "471": 2.7085988521575928, + "472": 2.696321964263916, + "473": 2.6821703910827637, + "474": 2.6862521171569824, + "475": 2.6759538650512695, + "476": 2.654883861541748, + "477": 2.690098524093628, + "478": 2.662038564682007, + "479": 2.6642403602600098, + "480": 2.680803060531616, + "481": 2.669417142868042, + "482": 2.657824754714966, + "483": 2.6759777069091797, + "484": 2.6603567600250244, + "485": 2.6720380783081055 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "train_epoch_time": 4.788131475448608, + "train_loss": 2.6499642800395424, + "train_score": 0.2541775018447099, + "val_loss": 2.6721480404605273, + "val_score": 0.24951115837395532 + }, + { + "epoch": 9, + "grad_norm": 1.1421939134597778, + "learning_rate": 0.215, + "model_norm": 90.0042724609375, + "step_logs": { + "grad_norm": { + "486": 0.774936854839325, + "487": 0.789344310760498, + "488": 0.7899917364120483, + "489": 0.839102029800415, + "490": 0.8981372714042664, + "491": 0.8626739978790283, + "492": 0.8392364978790283, + "493": 0.8226275444030762, + "494": 0.7573186159133911, + "495": 0.7592282295227051, + "496": 0.8314062356948853, + "497": 0.8425015807151794, + "498": 0.8403638601303101, + "499": 0.8894930481910706, + "500": 0.8961754441261292, + "501": 0.8464363813400269, + "502": 0.8113922476768494, + "503": 0.8303462266921997, + "504": 0.8036644458770752, + "505": 0.8080917596817017, + "506": 0.838226318359375, + "507": 0.8522719144821167, + "508": 0.8235961198806763, + "509": 0.8319439888000488, + "510": 0.8286922574043274, + "511": 0.8342664241790771, + "512": 0.8681519031524658, + "513": 0.8117028474807739, + "514": 0.7368595600128174, + "515": 0.7533247470855713, + "516": 0.7707027792930603, + "517": 0.7643579244613647, + "518": 0.7609565854072571, + "519": 0.7944704294204712, + "520": 0.83812016248703, + "521": 0.8342036604881287, + "522": 0.8647300601005554, + "523": 0.8340088129043579, + "524": 0.8161774277687073, + "525": 0.8764876127243042, + "526": 0.8609760999679565, + "527": 0.7913983464241028, + "528": 0.7784112691879272, + "529": 0.787605345249176, + "530": 0.7245925664901733, + "531": 0.7029888033866882, + "532": 0.7153397798538208, + "533": 0.7838276028633118, + "534": 0.8455857634544373, + "535": 0.9038657546043396, + "536": 0.9485446810722351, + "537": 0.9726511836051941, + "538": 1.2298110723495483, + "539": 1.1421939134597778 + }, + "loss": { + "486": 2.6547884941101074, + "487": 2.6654915809631348, + "488": 2.651487350463867, + "489": 2.669064998626709, + "490": 2.6671881675720215, + "491": 2.6770849227905273, + "492": 2.6650609970092773, + "493": 2.6781563758850098, + "494": 2.6454010009765625, + "495": 2.651963472366333, + "496": 2.643042802810669, + "497": 2.6682839393615723, + "498": 2.636284351348877, + "499": 2.6732192039489746, + "500": 2.6559107303619385, + "501": 2.661837577819824, + "502": 2.6275672912597656, + "503": 2.6706929206848145, + "504": 2.636566638946533, + "505": 2.6518187522888184, + "506": 2.6540379524230957, + "507": 2.644232749938965, + "508": 2.646148920059204, + "509": 2.6418371200561523, + "510": 2.632054090499878, + "511": 2.6597626209259033, + "512": 2.644534111022949, + "513": 2.657193660736084, + "514": 2.649960994720459, + "515": 2.6479477882385254, + "516": 2.634392261505127, + "517": 2.6228082180023193, + "518": 2.641427516937256, + "519": 2.640104293823242, + "520": 2.6271142959594727, + "521": 2.636256456375122, + "522": 2.6182384490966797, + "523": 2.6398000717163086, + "524": 2.6404244899749756, + "525": 2.6555655002593994, + "526": 2.6381704807281494, + "527": 2.6360859870910645, + "528": 2.634208917617798, + "529": 2.6462559700012207, + "530": 2.6401264667510986, + "531": 2.6096601486206055, + "532": 2.604496479034424, + "533": 2.6160969734191895, + "534": 2.6260948181152344, + "535": 2.639922618865967, + "536": 2.6462202072143555, + "537": 2.658841609954834, + "538": 2.6615941524505615, + "539": 2.685661792755127 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "train_epoch_time": 4.78758978843689, + "train_loss": 2.642064675915361, + "train_score": 0.2591261656417737, + "val_loss": 2.673168515775836, + "val_score": 0.25316177539518864 + }, + { + "epoch": 10, + "grad_norm": 0.8105792999267578, + "learning_rate": 0.215, + "model_norm": 90.02066802978516, + "step_logs": { + "grad_norm": { + "540": 0.9202944040298462, + "541": 0.6527591943740845, + "542": 0.47710418701171875, + "543": 0.4099416136741638, + "544": 0.3978358209133148, + "545": 0.4463668465614319, + "546": 0.5331204533576965, + "547": 0.6135859489440918, + "548": 0.7367595434188843, + "549": 0.8629884719848633, + "550": 0.942812979221344, + "551": 0.8674833178520203, + "552": 0.8434673547744751, + "553": 0.8539221286773682, + "554": 0.8579002618789673, + "555": 0.8397010564804077, + "556": 0.8100090026855469, + "557": 0.7524319887161255, + "558": 0.6755796074867249, + "559": 0.6974266171455383, + "560": 0.7552376985549927, + "561": 0.8239412307739258, + "562": 0.8400392532348633, + "563": 0.7862880229949951, + "564": 0.7592045664787292, + "565": 0.7833024859428406, + "566": 0.8161395192146301, + "567": 0.8042528629302979, + "568": 0.8144784569740295, + "569": 0.854443371295929, + "570": 0.8920965790748596, + "571": 0.86241614818573, + "572": 0.8849621415138245, + "573": 0.8547306656837463, + "574": 0.7955521941184998, + "575": 0.8058813810348511, + "576": 0.7878369688987732, + "577": 0.7980623841285706, + "578": 0.806784451007843, + "579": 0.8099424242973328, + "580": 0.800332248210907, + "581": 0.8077536225318909, + "582": 0.82627934217453, + "583": 0.8378875255584717, + "584": 0.8036258220672607, + "585": 0.7895596027374268, + "586": 0.8051947951316833, + "587": 0.7983424067497253, + "588": 0.7736822962760925, + "589": 0.8170568346977234, + "590": 0.798283576965332, + "591": 0.7759832739830017, + "592": 0.8374855518341064, + "593": 0.8105792999267578 + }, + "loss": { + "540": 2.645414352416992, + "541": 2.601228713989258, + "542": 2.607617139816284, + "543": 2.6148698329925537, + "544": 2.577263116836548, + "545": 2.618373155593872, + "546": 2.604825019836426, + "547": 2.5996720790863037, + "548": 2.6001129150390625, + "549": 2.6322121620178223, + "550": 2.6262269020080566, + "551": 2.649817943572998, + "552": 2.603966474533081, + "553": 2.626373291015625, + "554": 2.602977991104126, + "555": 2.6050095558166504, + "556": 2.6211352348327637, + "557": 2.617664098739624, + "558": 2.582747220993042, + "559": 2.6031644344329834, + "560": 2.599234104156494, + "561": 2.621400833129883, + "562": 2.604050397872925, + "563": 2.6263318061828613, + "564": 2.6078262329101562, + "565": 2.6009013652801514, + "566": 2.611826181411743, + "567": 2.6159708499908447, + "568": 2.6037800312042236, + "569": 2.6126961708068848, + "570": 2.606269598007202, + "571": 2.6246185302734375, + "572": 2.6028943061828613, + "573": 2.607581377029419, + "574": 2.586949110031128, + "575": 2.6003947257995605, + "576": 2.6079659461975098, + "577": 2.6256375312805176, + "578": 2.587466239929199, + "579": 2.6293957233428955, + "580": 2.59682035446167, + "581": 2.6400249004364014, + "582": 2.595548152923584, + "583": 2.598919630050659, + "584": 2.6017112731933594, + "585": 2.6250505447387695, + "586": 2.603376865386963, + "587": 2.5864858627319336, + "588": 2.558115243911743, + "589": 2.610131025314331, + "590": 2.5858097076416016, + "591": 2.5904438495635986, + "592": 2.5815930366516113, + "593": 2.5887889862060547 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "train_epoch_time": 4.787614583969116, + "train_loss": 2.575866939347649, + "train_score": 0.277403156256231, + "val_loss": 2.5959476907272427, + "val_score": 0.27063002368065303 + }, + { + "epoch": 11, + "grad_norm": 0.875197172164917, + "learning_rate": 0.215, + "model_norm": 90.03768920898438, + "step_logs": { + "grad_norm": { + "594": 0.7318904399871826, + "595": 0.7436156272888184, + "596": 0.8266604542732239, + "597": 0.8615882992744446, + "598": 0.9162756204605103, + "599": 0.9221148490905762, + "600": 0.8109691739082336, + "601": 0.7648108601570129, + "602": 0.8120312690734863, + "603": 0.8307549357414246, + "604": 0.8280071020126343, + "605": 0.8337923288345337, + "606": 0.8870125412940979, + "607": 0.9061279296875, + "608": 0.8689539432525635, + "609": 0.8609936833381653, + "610": 0.8323072195053101, + "611": 0.8224025368690491, + "612": 0.7654007077217102, + "613": 0.7466140985488892, + "614": 0.7352055907249451, + "615": 0.6889374256134033, + "616": 0.6608432531356812, + "617": 0.681706428527832, + "618": 0.7127463221549988, + "619": 0.7802812457084656, + "620": 0.7767740488052368, + "621": 0.7628665566444397, + "622": 0.8329023122787476, + "623": 0.8388475179672241, + "624": 0.8315045237541199, + "625": 0.8319355249404907, + "626": 0.8590131402015686, + "627": 0.9036568403244019, + "628": 0.9744861125946045, + "629": 0.9878485202789307, + "630": 0.9991350769996643, + "631": 0.9696122407913208, + "632": 0.8828770518302917, + "633": 0.8702228665351868, + "634": 0.8099728226661682, + "635": 0.7859927415847778, + "636": 0.8151223063468933, + "637": 0.7930382490158081, + "638": 0.7298247218132019, + "639": 0.7442294359207153, + "640": 0.780197024345398, + "641": 0.8113732933998108, + "642": 0.8424757122993469, + "643": 0.8369328379631042, + "644": 0.9091742634773254, + "645": 0.8744722604751587, + "646": 0.8243760466575623, + "647": 0.875197172164917 + }, + "loss": { + "594": 2.5986640453338623, + "595": 2.5922470092773438, + "596": 2.559065341949463, + "597": 2.6092607975006104, + "598": 2.5705878734588623, + "599": 2.611008644104004, + "600": 2.5883731842041016, + "601": 2.584822177886963, + "602": 2.5769855976104736, + "603": 2.6051034927368164, + "604": 2.558088541030884, + "605": 2.5871388912200928, + "606": 2.5882492065429688, + "607": 2.5978493690490723, + "608": 2.5725479125976562, + "609": 2.5780062675476074, + "610": 2.572591543197632, + "611": 2.5934290885925293, + "612": 2.554013729095459, + "613": 2.574382781982422, + "614": 2.566502332687378, + "615": 2.560737133026123, + "616": 2.5686984062194824, + "617": 2.558824300765991, + "618": 2.5420079231262207, + "619": 2.588202953338623, + "620": 2.547837495803833, + "621": 2.565319776535034, + "622": 2.576639175415039, + "623": 2.59043288230896, + "624": 2.5701470375061035, + "625": 2.563173294067383, + "626": 2.548001289367676, + "627": 2.594208240509033, + "628": 2.5640463829040527, + "629": 2.588613510131836, + "630": 2.5525963306427, + "631": 2.5972185134887695, + "632": 2.557283401489258, + "633": 2.572479248046875, + "634": 2.5519425868988037, + "635": 2.575077533721924, + "636": 2.5363049507141113, + "637": 2.56215238571167, + "638": 2.533780574798584, + "639": 2.554736852645874, + "640": 2.5582571029663086, + "641": 2.580899715423584, + "642": 2.553868293762207, + "643": 2.5631532669067383, + "644": 2.552365779876709, + "645": 2.572369337081909, + "646": 2.542363166809082, + "647": 2.5645699501037598 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "train_epoch_time": 4.789721488952637, + "train_loss": 2.5492118271727815, + "train_score": 0.27290956780763403, + "val_loss": 2.5740746077659074, + "val_score": 0.2664860799378287 + }, + { + "epoch": 12, + "grad_norm": 0.333244651556015, + "learning_rate": 0.215, + "model_norm": 90.05404663085938, + "step_logs": { + "grad_norm": { + "648": 0.8464791178703308, + "649": 0.792945384979248, + "650": 0.7463672161102295, + "651": 0.7440354824066162, + "652": 0.6671098470687866, + "653": 0.6137361526489258, + "654": 0.6120710372924805, + "655": 0.6230190992355347, + "656": 0.6977333426475525, + "657": 0.7608842849731445, + "658": 0.7870320081710815, + "659": 0.7720927000045776, + "660": 0.7534703612327576, + "661": 0.7125488519668579, + "662": 0.6258469223976135, + "663": 0.5617893934249878, + "664": 0.5211414694786072, + "665": 0.5212739109992981, + "666": 0.5903427600860596, + "667": 0.614560067653656, + "668": 0.513554573059082, + "669": 0.48795396089553833, + "670": 0.4889700412750244, + "671": 0.4966708719730377, + "672": 0.5107088088989258, + "673": 0.5752347111701965, + "674": 0.6768213510513306, + "675": 0.6287813186645508, + "676": 0.5406306982040405, + "677": 0.5165386199951172, + "678": 0.5147174000740051, + "679": 0.5726690292358398, + "680": 0.5681961178779602, + "681": 0.5488607287406921, + "682": 0.5158001184463501, + "683": 0.49433648586273193, + "684": 0.5104917287826538, + "685": 0.5056292414665222, + "686": 0.452014684677124, + "687": 0.4608464539051056, + "688": 0.3932307958602905, + "689": 0.34603723883628845, + "690": 0.350154846906662, + "691": 0.32048991322517395, + "692": 0.32272493839263916, + "693": 0.3508996069431305, + "694": 0.34015968441963196, + "695": 0.3036709129810333, + "696": 0.21490435302257538, + "697": 0.26322346925735474, + "698": 0.28995272517204285, + "699": 0.30863478779792786, + "700": 0.3334333300590515, + "701": 0.333244651556015 + }, + "loss": { + "648": 2.5739521980285645, + "649": 2.5625720024108887, + "650": 2.528660774230957, + "651": 2.5539798736572266, + "652": 2.539076805114746, + "653": 2.5287158489227295, + "654": 2.5322656631469727, + "655": 2.5453648567199707, + "656": 2.515843391418457, + "657": 2.5385658740997314, + "658": 2.540787935256958, + "659": 2.5231783390045166, + "660": 2.5068016052246094, + "661": 2.531193733215332, + "662": 2.537111520767212, + "663": 2.515406608581543, + "664": 2.5116560459136963, + "665": 2.5265791416168213, + "666": 2.5038695335388184, + "667": 2.5283470153808594, + "668": 2.4902427196502686, + "669": 2.511512517929077, + "670": 2.5232579708099365, + "671": 2.5061254501342773, + "672": 2.4852712154388428, + "673": 2.5059726238250732, + "674": 2.4989097118377686, + "675": 2.5028374195098877, + "676": 2.4783995151519775, + "677": 2.5110583305358887, + "678": 2.505390167236328, + "679": 2.479796886444092, + "680": 2.4870598316192627, + "681": 2.50077748298645, + "682": 2.518289804458618, + "683": 2.4920525550842285, + "684": 2.497824192047119, + "685": 2.5083065032958984, + "686": 2.4897947311401367, + "687": 2.505855083465576, + "688": 2.4958443641662598, + "689": 2.481900453567505, + "690": 2.4656410217285156, + "691": 2.494762897491455, + "692": 2.4867098331451416, + "693": 2.487244129180908, + "694": 2.487565040588379, + "695": 2.4959309101104736, + "696": 2.486945629119873, + "697": 2.4767005443573, + "698": 2.4904942512512207, + "699": 2.4781301021575928, + "700": 2.491523265838623, + "701": 2.475743532180786 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "train_epoch_time": 4.789546966552734, + "train_loss": 2.4798699770288453, + "train_score": 0.2917907102463748, + "val_loss": 2.5084203365339066, + "val_score": 0.2836717860912209 + }, + { + "epoch": 13, + "grad_norm": 0.22255468368530273, + "learning_rate": 0.14333333333333334, + "model_norm": 90.06339263916016, + "step_logs": { + "grad_norm": { + "702": 0.3254527747631073, + "703": 0.32196152210235596, + "704": 0.27880069613456726, + "705": 0.2983052730560303, + "706": 0.2600209712982178, + "707": 0.24989959597587585, + "708": 0.2855205237865448, + "709": 0.2425023913383484, + "710": 0.19059450924396515, + "711": 0.2019641250371933, + "712": 0.23242764174938202, + "713": 0.2383408099412918, + "714": 0.3065362870693207, + "715": 0.28599047660827637, + "716": 0.24889793992042542, + "717": 0.23252075910568237, + "718": 0.2174641191959381, + "719": 0.23333929479122162, + "720": 0.2253527045249939, + "721": 0.2340705394744873, + "722": 0.21060937643051147, + "723": 0.19206549227237701, + "724": 0.2401646226644516, + "725": 0.22520342469215393, + "726": 0.2765664756298065, + "727": 0.2403375655412674, + "728": 0.23163574934005737, + "729": 0.2577357292175293, + "730": 0.28781864047050476, + "731": 0.28929200768470764, + "732": 0.2649271488189697, + "733": 0.19876720011234283, + "734": 0.23014399409294128, + "735": 0.22949522733688354, + "736": 0.25297853350639343, + "737": 0.2293279469013214, + "738": 0.244782954454422, + "739": 0.26518508791923523, + "740": 0.2911612391471863, + "741": 0.2876875102519989, + "742": 0.2208746075630188, + "743": 0.2016288936138153, + "744": 0.18806782364845276, + "745": 0.18521235883235931, + "746": 0.22986558079719543, + "747": 0.24523615837097168, + "748": 0.22743189334869385, + "749": 0.24057932198047638, + "750": 0.2770741879940033, + "751": 0.22961671650409698, + "752": 0.2245391607284546, + "753": 0.2719363868236542, + "754": 0.2566705644130707, + "755": 0.22255468368530273 + }, + "loss": { + "702": 2.470973014831543, + "703": 2.4694724082946777, + "704": 2.4753260612487793, + "705": 2.4650182723999023, + "706": 2.4860806465148926, + "707": 2.4775500297546387, + "708": 2.471518039703369, + "709": 2.446056365966797, + "710": 2.4666614532470703, + "711": 2.4795753955841064, + "712": 2.463106632232666, + "713": 2.4601449966430664, + "714": 2.4911739826202393, + "715": 2.475454330444336, + "716": 2.4524683952331543, + "717": 2.479682445526123, + "718": 2.475834369659424, + "719": 2.4757492542266846, + "720": 2.4753825664520264, + "721": 2.457169771194458, + "722": 2.4689249992370605, + "723": 2.4699013233184814, + "724": 2.483585834503174, + "725": 2.4806008338928223, + "726": 2.4481563568115234, + "727": 2.4571986198425293, + "728": 2.4966092109680176, + "729": 2.4555840492248535, + "730": 2.464932918548584, + "731": 2.4793448448181152, + "732": 2.4816951751708984, + "733": 2.462080717086792, + "734": 2.4785118103027344, + "735": 2.477837085723877, + "736": 2.442711114883423, + "737": 2.467097282409668, + "738": 2.464322566986084, + "739": 2.4767346382141113, + "740": 2.455671787261963, + "741": 2.4706153869628906, + "742": 2.4600114822387695, + "743": 2.462299346923828, + "744": 2.461491823196411, + "745": 2.4541378021240234, + "746": 2.469048023223877, + "747": 2.4606869220733643, + "748": 2.4661974906921387, + "749": 2.463500499725342, + "750": 2.463406562805176, + "751": 2.45944881439209, + "752": 2.4614057540893555, + "753": 2.4608306884765625, + "754": 2.4598333835601807, + "755": 2.4613046646118164 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "train_epoch_time": 4.788499593734741, + "train_loss": 2.458518195118076, + "train_score": 0.29523515953566115, + "val_loss": 2.4877452877714767, + "val_score": 0.28768567082255087 + }, + { + "epoch": 14, + "grad_norm": 0.22655870020389557, + "learning_rate": 0.07166666666666667, + "model_norm": 90.0665054321289, + "step_logs": { + "grad_norm": { + "756": 0.20486435294151306, + "757": 0.20474794507026672, + "758": 0.2312590479850769, + "759": 0.21431821584701538, + "760": 0.27988582849502563, + "761": 0.23682889342308044, + "762": 0.2059798687696457, + "763": 0.21797338128089905, + "764": 0.24932338297367096, + "765": 0.21827971935272217, + "766": 0.22083665430545807, + "767": 0.2079319953918457, + "768": 0.21095088124275208, + "769": 0.25421908497810364, + "770": 0.22950629889965057, + "771": 0.25423556566238403, + "772": 0.2013881653547287, + "773": 0.21997787058353424, + "774": 0.23208023607730865, + "775": 0.21095694601535797, + "776": 0.21547789871692657, + "777": 0.2107398808002472, + "778": 0.22477035224437714, + "779": 0.2206610143184662, + "780": 0.2200593799352646, + "781": 0.2470172494649887, + "782": 0.19685353338718414, + "783": 0.22865822911262512, + "784": 0.21985742449760437, + "785": 0.21865254640579224, + "786": 0.22141164541244507, + "787": 0.24514709413051605, + "788": 0.21211248636245728, + "789": 0.21584874391555786, + "790": 0.23548944294452667, + "791": 0.22890590131282806, + "792": 0.22032247483730316, + "793": 0.19007331132888794, + "794": 0.2175266444683075, + "795": 0.20073430240154266, + "796": 0.22327978909015656, + "797": 0.20514175295829773, + "798": 0.2098139226436615, + "799": 0.23038193583488464, + "800": 0.1975613236427307, + "801": 0.19325576722621918, + "802": 0.2310025691986084, + "803": 0.17658105492591858, + "804": 0.23319245874881744, + "805": 0.231744185090065, + "806": 0.21699704229831696, + "807": 0.2077859342098236, + "808": 0.20882068574428558, + "809": 0.22655870020389557 + }, + "loss": { + "756": 2.4470129013061523, + "757": 2.4508137702941895, + "758": 2.481356143951416, + "759": 2.4673657417297363, + "760": 2.426910638809204, + "761": 2.4617691040039062, + "762": 2.462177038192749, + "763": 2.451254367828369, + "764": 2.4547553062438965, + "765": 2.4492552280426025, + "766": 2.455984592437744, + "767": 2.457019090652466, + "768": 2.4542086124420166, + "769": 2.451037645339966, + "770": 2.4701530933380127, + "771": 2.475334644317627, + "772": 2.452157974243164, + "773": 2.4461469650268555, + "774": 2.465477466583252, + "775": 2.4510555267333984, + "776": 2.4737071990966797, + "777": 2.466977119445801, + "778": 2.432931900024414, + "779": 2.4575607776641846, + "780": 2.4424209594726562, + "781": 2.4508864879608154, + "782": 2.4603219032287598, + "783": 2.461171865463257, + "784": 2.464177131652832, + "785": 2.4428091049194336, + "786": 2.441136360168457, + "787": 2.4483089447021484, + "788": 2.4741387367248535, + "789": 2.44865083694458, + "790": 2.4641923904418945, + "791": 2.447673797607422, + "792": 2.467134475708008, + "793": 2.446427822113037, + "794": 2.445901870727539, + "795": 2.4542627334594727, + "796": 2.4518728256225586, + "797": 2.447606086730957, + "798": 2.449295997619629, + "799": 2.440582752227783, + "800": 2.471240997314453, + "801": 2.4618782997131348, + "802": 2.440150737762451, + "803": 2.450505018234253, + "804": 2.4535796642303467, + "805": 2.453066825866699, + "806": 2.4409408569335938, + "807": 2.4557628631591797, + "808": 2.443204879760742, + "809": 2.4749069213867188 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "train_epoch_time": 4.788120269775391, + "train_loss": 2.452035622822503, + "train_score": 0.29824807216310434, + "val_loss": 2.4824917954226997, + "val_score": 0.2904707235118415 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:29:51.343361", + "final_model_norm": 90.0665054321289, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:28:10.527306", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 6.077920913696289, + "learning_rate": 2.15e-11, + "model_norm": 87.36822509765625, + "step_logs": { + "grad_norm": { + "0": 22.7664794921875, + "1": 23.4499454498291, + "2": 6.6349334716796875, + "3": 7.6453537940979, + "4": 21.053424835205078, + "5": 5.934286594390869, + "6": 5.640627861022949, + "7": 4.451308250427246, + "8": 4.475229263305664, + "9": 4.164397716522217, + "10": 3.7405574321746826, + "11": 4.641786575317383, + "12": 49.5823860168457, + "13": 9.11623764038086, + "14": 9.79990291595459, + "15": 4.567177772521973, + "16": 4.766762733459473, + "17": 9.39215087890625, + "18": 4.285684108734131, + "19": 4.2459893226623535, + "20": 2.2008137702941895, + "21": 2.864260673522949, + "22": 3.6269586086273193, + "23": 7.04875373840332, + "24": 4.5517730712890625, + "25": 5.148194313049316, + "26": 6.476187705993652, + "27": 6.546270370483398, + "28": 4.287119388580322, + "29": 7.839882850646973, + "30": 4.186831951141357, + "31": 6.000692844390869, + "32": 4.175516605377197, + "33": 15.996603965759277, + "34": 9.080464363098145, + "35": 7.6884284019470215, + "36": 38.54820251464844, + "37": 14.596907615661621, + "38": 6.987175464630127, + "39": 19.94704818725586, + "40": 7.987712383270264, + "41": 5.601441383361816, + "42": 8.12180233001709, + "43": 17.737707138061523, + "44": 9.772014617919922, + "45": 5.679656982421875, + "46": 4.703907012939453, + "47": 7.149433135986328, + "48": 12.810194969177246, + "49": 5.0451154708862305, + "50": 4.432951927185059, + "51": 4.247447490692139, + "52": 3.1106362342834473, + "53": 6.077920913696289 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.8218994140625, + "3": 3.692150592803955, + "4": 4.159581184387207, + "5": 4.390280723571777, + "6": 3.900965690612793, + "7": 3.615814208984375, + "8": 3.503880500793457, + "9": 3.5965518951416016, + "10": 3.383478879928589, + "11": 3.4530043601989746, + "12": 3.501619815826416, + "13": 3.548842430114746, + "14": 6.311364650726318, + "15": 4.368919849395752, + "16": 3.7320752143859863, + "17": 4.103618144989014, + "18": 5.041207313537598, + "19": 4.1853837966918945, + "20": 3.632913112640381, + "21": 3.43510103225708, + "22": 3.862799882888794, + "23": 3.7428183555603027, + "24": 5.015302658081055, + "25": 4.486359119415283, + "26": 4.012516975402832, + "27": 5.328915596008301, + "28": 4.898715019226074, + "29": 4.456262588500977, + "30": 5.987215042114258, + "31": 4.812840461730957, + "32": 4.575559616088867, + "33": 6.3855366706848145, + "34": 9.660634994506836, + "35": 7.86287260055542, + "36": 15.256529808044434, + "37": 8.436349868774414, + "38": 6.197444438934326, + "39": 21.0192813873291, + "40": 11.476614952087402, + "41": 9.14688777923584, + "42": 6.709882736206055, + "43": 9.87257194519043, + "44": 13.530729293823242, + "45": 11.448413848876953, + "46": 8.412544250488281, + "47": 6.6663665771484375, + "48": 5.991023063659668, + "49": 11.85226058959961, + "50": 9.765589714050293, + "51": 8.009904861450195, + "52": 5.777992248535156, + "53": 4.64279842376709 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "train_epoch_time": 4.789698362350464, + "train_loss": 6.529137916783863, + "train_score": 0.15261163918068968, + "val_loss": 6.497304850139246, + "val_score": 0.15114631179621793 + }, + { + "epoch": 1, + "grad_norm": 1.4856921434402466, + "learning_rate": 0.215, + "model_norm": 87.27825927734375, + "step_logs": { + "grad_norm": { + "54": 11.869610786437988, + "55": 4.589770793914795, + "56": 3.871187448501587, + "57": 3.0290331840515137, + "58": 2.436267852783203, + "59": 2.2911205291748047, + "60": 1.804243803024292, + "61": 2.240053415298462, + "62": 7.117811679840088, + "63": 2.5063371658325195, + "64": 2.2628278732299805, + "65": 1.9130676984786987, + "66": 1.442577838897705, + "67": 4.810521125793457, + "68": 1.9549905061721802, + "69": 1.8040640354156494, + "70": 1.366626262664795, + "71": 4.335608005523682, + "72": 1.8876115083694458, + "73": 1.7165403366088867, + "74": 0.7243217825889587, + "75": 2.042759656906128, + "76": 1.7447822093963623, + "77": 0.6170192360877991, + "78": 0.7040872573852539, + "79": 1.5890698432922363, + "80": 1.6113613843917847, + "81": 1.5932040214538574, + "82": 1.6757556200027466, + "83": 1.8084766864776611, + "84": 1.5848227739334106, + "85": 0.8979315161705017, + "86": 1.2454639673233032, + "87": 2.7386019229888916, + "88": 1.6894149780273438, + "89": 1.1755759716033936, + "90": 2.214646339416504, + "91": 1.549574613571167, + "92": 0.48825210332870483, + "93": 0.8731581568717957, + "94": 1.1210391521453857, + "95": 2.0358450412750244, + "96": 1.5302635431289673, + "97": 0.4016644060611725, + "98": 0.30150073766708374, + "99": 0.37550848722457886, + "100": 0.6430637240409851, + "101": 0.8840423226356506, + "102": 1.7102267742156982, + "103": 1.507495403289795, + "104": 0.7491609454154968, + "105": 0.9745044112205505, + "106": 1.8706775903701782, + "107": 1.4856921434402466 + }, + "loss": { + "54": 6.523248672485352, + "55": 9.864269256591797, + "56": 8.262983322143555, + "57": 6.586701393127441, + "58": 5.057945251464844, + "59": 4.078909873962402, + "60": 3.469339609146118, + "61": 3.7730026245117188, + "62": 4.014268398284912, + "63": 5.833746910095215, + "64": 5.0863566398620605, + "65": 4.142653465270996, + "66": 3.459670066833496, + "67": 3.709359645843506, + "68": 4.762309551239014, + "69": 4.044641494750977, + "70": 3.460516929626465, + "71": 3.6896283626556396, + "72": 4.4954023361206055, + "73": 3.896549701690674, + "74": 3.3746585845947266, + "75": 3.424943447113037, + "76": 3.7681851387023926, + "77": 3.3631649017333984, + "78": 3.356832504272461, + "79": 3.42862868309021, + "80": 3.6133546829223633, + "81": 3.4071061611175537, + "82": 3.5914642810821533, + "83": 3.437199115753174, + "84": 3.650853395462036, + "85": 3.3495874404907227, + "86": 3.4265880584716797, + "87": 3.49777889251709, + "88": 3.8932418823242188, + "89": 3.388164758682251, + "90": 3.4002089500427246, + "91": 3.7453536987304688, + "92": 3.344038724899292, + "93": 3.35149884223938, + "94": 3.4281797409057617, + "95": 3.447147846221924, + "96": 3.7067110538482666, + "97": 3.359945058822632, + "98": 3.2945384979248047, + "99": 3.339402198791504, + "100": 3.316775321960449, + "101": 3.402829170227051, + "102": 3.37821626663208, + "103": 3.588418960571289, + "104": 3.3727869987487793, + "105": 3.369229793548584, + "106": 3.4336323738098145, + "107": 3.6263248920440674 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "train_epoch_time": 4.786825180053711, + "train_loss": 3.3327819862529915, + "train_score": 0.15260939743033097, + "val_loss": 3.3497142756163183, + "val_score": 0.151137342221017 + }, + { + "epoch": 2, + "grad_norm": 0.8111432790756226, + "learning_rate": 0.215, + "model_norm": 87.2788314819336, + "step_logs": { + "grad_norm": { + "108": 0.35535019636154175, + "109": 0.41004061698913574, + "110": 0.7887808084487915, + "111": 1.0005075931549072, + "112": 1.6472957134246826, + "113": 1.4260789155960083, + "114": 0.6731420755386353, + "115": 0.8426632285118103, + "116": 1.4507036209106445, + "117": 1.3046455383300781, + "118": 0.8133808374404907, + "119": 0.9756836891174316, + "120": 1.4995774030685425, + "121": 1.268728494644165, + "122": 0.5787860751152039, + "123": 0.7328672409057617, + "124": 1.1708590984344482, + "125": 1.1593525409698486, + "126": 1.08345365524292, + "127": 1.107581615447998, + "128": 1.1313707828521729, + "129": 1.1563996076583862, + "130": 1.1990396976470947, + "131": 1.165459394454956, + "132": 1.0536150932312012, + "133": 1.0777044296264648, + "134": 1.1467459201812744, + "135": 1.1163371801376343, + "136": 1.0541691780090332, + "137": 1.0202151536941528, + "138": 0.8965868353843689, + "139": 0.9138199090957642, + "140": 0.9571571946144104, + "141": 0.9860230088233948, + "142": 1.0593401193618774, + "143": 0.9975009560585022, + "144": 0.8001709580421448, + "145": 0.8473645448684692, + "146": 0.9612659811973572, + "147": 0.9572203159332275, + "148": 0.948639452457428, + "149": 0.9353222250938416, + "150": 0.8946100473403931, + "151": 0.866615891456604, + "152": 0.8099010586738586, + "153": 0.8457209467887878, + "154": 0.9313750267028809, + "155": 0.9235068559646606, + "156": 0.8846747279167175, + "157": 0.8261868953704834, + "158": 0.6901465058326721, + "159": 0.7314639091491699, + "160": 0.8012805581092834, + "161": 0.8111432790756226 + }, + "loss": { + "108": 3.322842836380005, + "109": 3.31815242767334, + "110": 3.356515407562256, + "111": 3.3720946311950684, + "112": 3.3815221786499023, + "113": 3.5712978839874268, + "114": 3.3611392974853516, + "115": 3.365431785583496, + "116": 3.387702703475952, + "117": 3.51013445854187, + "118": 3.331129550933838, + "119": 3.3873703479766846, + "120": 3.3873519897460938, + "121": 3.5078282356262207, + "122": 3.356292247772217, + "123": 3.340384006500244, + "124": 3.3751261234283447, + "125": 3.46814227104187, + "126": 3.3612375259399414, + "127": 3.417555570602417, + "128": 3.380253314971924, + "129": 3.452554225921631, + "130": 3.3590869903564453, + "131": 3.4244987964630127, + "132": 3.348438262939453, + "133": 3.4079668521881104, + "134": 3.361523151397705, + "135": 3.4007935523986816, + "136": 3.3805172443389893, + "137": 3.425121784210205, + "138": 3.351166248321533, + "139": 3.390328884124756, + "140": 3.319578170776367, + "141": 3.360882043838501, + "142": 3.3449981212615967, + "143": 3.430695056915283, + "144": 3.33786940574646, + "145": 3.371558666229248, + "146": 3.346402406692505, + "147": 3.3841986656188965, + "148": 3.3241076469421387, + "149": 3.3841922283172607, + "150": 3.3525712490081787, + "151": 3.3927950859069824, + "152": 3.3303329944610596, + "153": 3.368870973587036, + "154": 3.353760242462158, + "155": 3.4009079933166504, + "156": 3.3530325889587402, + "157": 3.388096570968628, + "158": 3.3189611434936523, + "159": 3.330489158630371, + "160": 3.362576961517334, + "161": 3.3486876487731934 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "train_epoch_time": 4.787140846252441, + "train_loss": 3.3408212109651934, + "train_score": 0.1526071556799723, + "val_loss": 3.3590463866596245, + "val_score": 0.15113285743341653 + }, + { + "epoch": 3, + "grad_norm": 0.6169420480728149, + "learning_rate": 0.215, + "model_norm": 87.28771209716797, + "step_logs": { + "grad_norm": { + "162": 0.8387348651885986, + "163": 0.8527382016181946, + "164": 0.858231246471405, + "165": 0.8544841408729553, + "166": 0.8558735847473145, + "167": 0.8337991833686829, + "168": 0.7569968700408936, + "169": 0.7420404553413391, + "170": 0.7194749712944031, + "171": 0.709821343421936, + "172": 0.6832669973373413, + "173": 0.7073488235473633, + "174": 0.7284060120582581, + "175": 0.7330428957939148, + "176": 0.7531108260154724, + "177": 0.7508716583251953, + "178": 0.729932963848114, + "179": 0.7399073839187622, + "180": 0.7604870200157166, + "181": 0.7344769835472107, + "182": 0.6657344698905945, + "183": 0.6793492436408997, + "184": 1.359244465827942, + "185": 0.7296868562698364, + "186": 0.7522320747375488, + "187": 0.7540530562400818, + "188": 0.7392151951789856, + "189": 0.7214963436126709, + "190": 0.7019885778427124, + "191": 0.6850538849830627, + "192": 0.6184571385383606, + "193": 0.6071142554283142, + "194": 0.644235372543335, + "195": 0.5772275924682617, + "196": 0.5074628591537476, + "197": 0.50543612241745, + "198": 0.55255526304245, + "199": 0.559185802936554, + "200": 0.5517669320106506, + "201": 0.5493614077568054, + "202": 0.560684323310852, + "203": 0.58799147605896, + "204": 0.5828995108604431, + "205": 0.5954332947731018, + "206": 0.6386681795120239, + "207": 0.6341294050216675, + "208": 2.3786919116973877, + "209": 0.5914960503578186, + "210": 0.6234338283538818, + "211": 0.6651337146759033, + "212": 0.7441654205322266, + "213": 0.6836565732955933, + "214": 0.5970624089241028, + "215": 0.6169420480728149 + }, + "loss": { + "162": 3.3511502742767334, + "163": 3.3699984550476074, + "164": 3.310316562652588, + "165": 3.377857208251953, + "166": 3.3292713165283203, + "167": 3.403754711151123, + "168": 3.349243640899658, + "169": 3.3550338745117188, + "170": 3.331726312637329, + "171": 3.3729991912841797, + "172": 3.3522112369537354, + "173": 3.3064165115356445, + "174": 3.3293087482452393, + "175": 3.3332948684692383, + "176": 3.3127431869506836, + "177": 3.356052875518799, + "178": 3.3308143615722656, + "179": 3.335836887359619, + "180": 3.331758499145508, + "181": 3.370661973953247, + "182": 3.3110573291778564, + "183": 3.3385353088378906, + "184": 3.3235414028167725, + "185": 3.3248844146728516, + "186": 3.3646793365478516, + "187": 3.3493258953094482, + "188": 3.339038848876953, + "189": 3.359717607498169, + "190": 3.3311684131622314, + "191": 3.3507838249206543, + "192": 3.3271584510803223, + "193": 3.356938362121582, + "194": 3.3362069129943848, + "195": 3.3590471744537354, + "196": 3.3082900047302246, + "197": 3.328854560852051, + "198": 3.3504397869110107, + "199": 3.314671516418457, + "200": 3.3251729011535645, + "201": 3.328035831451416, + "202": 3.3087635040283203, + "203": 3.354644298553467, + "204": 3.3108410835266113, + "205": 3.290799379348755, + "206": 3.3224592208862305, + "207": 3.345951557159424, + "208": 3.299952507019043, + "209": 3.3060054779052734, + "210": 3.3326518535614014, + "211": 3.305673122406006, + "212": 3.3199119567871094, + "213": 3.316722869873047, + "214": 3.3202450275421143, + "215": 3.31882643699646 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "train_epoch_time": 4.787456750869751, + "train_loss": 3.313899508234076, + "train_score": 0.15282348453399955, + "val_loss": 3.3315236439797964, + "val_score": 0.15133018813916108 + }, + { + "epoch": 4, + "grad_norm": 0.670421838760376, + "learning_rate": 0.215, + "model_norm": 87.3119125366211, + "step_logs": { + "grad_norm": { + "216": 0.6451330780982971, + "217": 0.6348541378974915, + "218": 0.6159845590591431, + "219": 0.6299844980239868, + "220": 0.6784602403640747, + "221": 0.6624956130981445, + "222": 0.5949603915214539, + "223": 0.574101984500885, + "224": 0.5231589674949646, + "225": 0.541008472442627, + "226": 0.5974878668785095, + "227": 0.586403489112854, + "228": 0.5651665329933167, + "229": 0.5600197911262512, + "230": 0.6042607426643372, + "231": 0.7281671762466431, + "232": 0.5431760549545288, + "233": 0.539922297000885, + "234": 0.5826413631439209, + "235": 0.7212216258049011, + "236": 0.6261200904846191, + "237": 0.6035612225532532, + "238": 0.6246703267097473, + "239": 0.6139011979103088, + "240": 0.6408724188804626, + "241": 0.6072050929069519, + "242": 0.537537157535553, + "243": 0.5610190033912659, + "244": 0.5322518348693848, + "245": 0.5885132551193237, + "246": 0.4615285098552704, + "247": 0.43787333369255066, + "248": 0.40506094694137573, + "249": 0.3916375935077667, + "250": 0.41418108344078064, + "251": 0.405314564704895, + "252": 0.4265989065170288, + "253": 0.44390663504600525, + "254": 0.43615999817848206, + "255": 0.46891316771507263, + "256": 0.42196711897850037, + "257": 0.5520937442779541, + "258": 0.4568729102611542, + "259": 0.4163910448551178, + "260": 0.4757370948791504, + "261": 0.532852828502655, + "262": 0.5347477793693542, + "263": 0.5436654686927795, + "264": 0.5505630373954773, + "265": 0.6072302460670471, + "266": 0.6586366295814514, + "267": 0.7452479004859924, + "268": 0.7439396977424622, + "269": 0.670421838760376 + }, + "loss": { + "216": 3.317852020263672, + "217": 3.3241753578186035, + "218": 3.280233860015869, + "219": 3.3255691528320312, + "220": 3.330249547958374, + "221": 3.3232483863830566, + "222": 3.3002681732177734, + "223": 3.3094522953033447, + "224": 3.2819571495056152, + "225": 3.2664742469787598, + "226": 3.294806480407715, + "227": 3.289361000061035, + "228": 3.2745213508605957, + "229": 3.252540349960327, + "230": 3.2487754821777344, + "231": 3.2643327713012695, + "232": 3.265249490737915, + "233": 3.2404260635375977, + "234": 3.244673490524292, + "235": 3.247864246368408, + "236": 3.2547900676727295, + "237": 3.2230396270751953, + "238": 3.237241744995117, + "239": 3.2245707511901855, + "240": 3.2629575729370117, + "241": 3.2289626598358154, + "242": 3.2242543697357178, + "243": 3.218991279602051, + "244": 3.202751874923706, + "245": 3.196383476257324, + "246": 3.228240489959717, + "247": 3.188025712966919, + "248": 3.221277952194214, + "249": 3.206367254257202, + "250": 3.1918442249298096, + "251": 3.1852059364318848, + "252": 3.2247707843780518, + "253": 3.1700127124786377, + "254": 3.1840429306030273, + "255": 3.1898441314697266, + "256": 3.1908950805664062, + "257": 3.1921744346618652, + "258": 3.18982195854187, + "259": 3.17565655708313, + "260": 3.1605029106140137, + "261": 3.164581775665283, + "262": 3.1729331016540527, + "263": 3.1780571937561035, + "264": 3.1998534202575684, + "265": 3.162940740585327, + "266": 3.182992696762085, + "267": 3.195838212966919, + "268": 3.1946492195129395, + "269": 3.1778159141540527 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "train_epoch_time": 4.786958932876587, + "train_loss": 3.161458277668125, + "train_score": 0.16830501256662941, + "val_loss": 3.1793044029503, + "val_score": 0.16916170339907352 + }, + { + "epoch": 5, + "grad_norm": 0.37339940667152405, + "learning_rate": 0.215, + "model_norm": 87.32972717285156, + "step_logs": { + "grad_norm": { + "270": 0.6625267863273621, + "271": 0.6968127489089966, + "272": 0.6935476660728455, + "273": 0.71907639503479, + "274": 0.6943307518959045, + "275": 0.7740764617919922, + "276": 0.7148994207382202, + "277": 0.6267349720001221, + "278": 0.5778032541275024, + "279": 0.6179059147834778, + "280": 0.6499062180519104, + "281": 0.6469667553901672, + "282": 0.6240212917327881, + "283": 0.6231783032417297, + "284": 0.6064325571060181, + "285": 0.569333016872406, + "286": 0.5686502456665039, + "287": 0.597974419593811, + "288": 0.675082266330719, + "289": 0.6956878900527954, + "290": 0.723746657371521, + "291": 2.3303587436676025, + "292": 0.7077440619468689, + "293": 0.6254588961601257, + "294": 0.6035510301589966, + "295": 0.606689989566803, + "296": 0.5411074757575989, + "297": 0.5205525159835815, + "298": 0.5259056687355042, + "299": 0.5359814763069153, + "300": 0.5090657472610474, + "301": 0.4810810089111328, + "302": 0.47699999809265137, + "303": 0.4793867766857147, + "304": 0.46788397431373596, + "305": 0.49155518412590027, + "306": 0.4893914759159088, + "307": 0.4411744773387909, + "308": 0.3958076536655426, + "309": 0.38335585594177246, + "310": 0.3874962329864502, + "311": 0.39907410740852356, + "312": 0.36685293912887573, + "313": 0.36812588572502136, + "314": 0.37550675868988037, + "315": 0.3682321012020111, + "316": 0.29608356952667236, + "317": 0.3181905150413513, + "318": 0.3577767014503479, + "319": 0.34146150946617126, + "320": 0.31448543071746826, + "321": 0.3274773955345154, + "322": 0.32901275157928467, + "323": 0.37339940667152405 + }, + "loss": { + "270": 3.1653189659118652, + "271": 3.1713690757751465, + "272": 3.168196678161621, + "273": 3.178093194961548, + "274": 3.163637638092041, + "275": 3.1799283027648926, + "276": 3.146660804748535, + "277": 3.1673648357391357, + "278": 3.1741108894348145, + "279": 3.1767468452453613, + "280": 3.129758358001709, + "281": 3.155074119567871, + "282": 3.169614315032959, + "283": 3.1428141593933105, + "284": 3.1488687992095947, + "285": 3.1413421630859375, + "286": 3.1328420639038086, + "287": 3.135307788848877, + "288": 3.145845413208008, + "289": 3.144890785217285, + "290": 3.13344407081604, + "291": 3.157616138458252, + "292": 3.139730453491211, + "293": 3.1229124069213867, + "294": 3.156095266342163, + "295": 3.1524593830108643, + "296": 3.1093201637268066, + "297": 3.1507599353790283, + "298": 3.1365773677825928, + "299": 3.1154351234436035, + "300": 3.119210958480835, + "301": 3.1349916458129883, + "302": 3.118986129760742, + "303": 3.114738702774048, + "304": 3.1073477268218994, + "305": 3.1248555183410645, + "306": 3.148867130279541, + "307": 3.1083621978759766, + "308": 3.0962090492248535, + "309": 3.102027416229248, + "310": 3.0883021354675293, + "311": 3.1197428703308105, + "312": 3.0965030193328857, + "313": 3.0827300548553467, + "314": 3.1131014823913574, + "315": 3.1007065773010254, + "316": 3.0781543254852295, + "317": 3.0970852375030518, + "318": 3.0916857719421387, + "319": 3.0961599349975586, + "320": 3.0929479598999023, + "321": 3.0957860946655273, + "322": 3.0800256729125977, + "323": 3.0998568534851074 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "train_epoch_time": 4.78779411315918, + "train_loss": 3.0909181351299093, + "train_score": 0.16923197631856463, + "val_loss": 3.1075858760509645, + "val_score": 0.1697133325305516 + }, + { + "epoch": 6, + "grad_norm": 0.6102023124694824, + "learning_rate": 0.215, + "model_norm": 87.34899139404297, + "step_logs": { + "grad_norm": { + "324": 0.4444461464881897, + "325": 0.4435453414916992, + "326": 0.41333329677581787, + "327": 0.4126593768596649, + "328": 0.41375643014907837, + "329": 0.4180811941623688, + "330": 0.40402382612228394, + "331": 0.3396933078765869, + "332": 0.3120470345020294, + "333": 0.289199560880661, + "334": 0.30204156041145325, + "335": 0.37522488832473755, + "336": 0.36297503113746643, + "337": 0.34802213311195374, + "338": 0.3163204789161682, + "339": 0.39584481716156006, + "340": 0.4930034279823303, + "341": 0.5016665458679199, + "342": 0.5021680593490601, + "343": 0.5064347982406616, + "344": 0.5202027559280396, + "345": 0.5447956323623657, + "346": 0.5895446538925171, + "347": 0.5841565728187561, + "348": 0.5752105116844177, + "349": 0.5648969411849976, + "350": 0.5351980924606323, + "351": 0.4993586242198944, + "352": 0.44063395261764526, + "353": 0.42633867263793945, + "354": 0.421481192111969, + "355": 0.44679000973701477, + "356": 0.48287343978881836, + "357": 0.5100218653678894, + "358": 0.5513139963150024, + "359": 0.5653921365737915, + "360": 0.5791242122650146, + "361": 0.5630341172218323, + "362": 0.559499979019165, + "363": 0.5815654993057251, + "364": 0.6149219274520874, + "365": 0.5965008735656738, + "366": 0.5366994738578796, + "367": 0.5196714997291565, + "368": 0.543583333492279, + "369": 0.5505325198173523, + "370": 0.5447538495063782, + "371": 0.5405067801475525, + "372": 0.5433398485183716, + "373": 0.5742666721343994, + "374": 0.591201663017273, + "375": 0.566830039024353, + "376": 0.5739427804946899, + "377": 0.6102023124694824 + }, + "loss": { + "324": 3.1055169105529785, + "325": 3.1025147438049316, + "326": 3.094877004623413, + "327": 3.0828020572662354, + "328": 3.089111804962158, + "329": 3.076788902282715, + "330": 3.093977451324463, + "331": 3.090378761291504, + "332": 3.0599541664123535, + "333": 3.074714183807373, + "334": 3.0523428916931152, + "335": 3.0880205631256104, + "336": 3.055826425552368, + "337": 3.0632164478302, + "338": 3.0672097206115723, + "339": 3.062894105911255, + "340": 3.0573248863220215, + "341": 3.0769617557525635, + "342": 3.0491747856140137, + "343": 3.07951021194458, + "344": 3.061558485031128, + "345": 3.0667731761932373, + "346": 3.0706470012664795, + "347": 3.071831226348877, + "348": 3.061471462249756, + "349": 3.0732688903808594, + "350": 3.054048538208008, + "351": 3.064836025238037, + "352": 3.049079656600952, + "353": 3.054973602294922, + "354": 3.0346391201019287, + "355": 3.032949686050415, + "356": 3.04341721534729, + "357": 3.034526824951172, + "358": 3.0418925285339355, + "359": 3.0504884719848633, + "360": 3.0313611030578613, + "361": 3.0420703887939453, + "362": 3.0334463119506836, + "363": 3.057742118835449, + "364": 3.040750503540039, + "365": 3.0545032024383545, + "366": 3.0207877159118652, + "367": 3.045848846435547, + "368": 3.0533385276794434, + "369": 3.048614501953125, + "370": 3.0379717350006104, + "371": 3.0152246952056885, + "372": 3.030602216720581, + "373": 3.0384011268615723, + "374": 3.0256872177124023, + "375": 3.020007610321045, + "376": 3.0139355659484863, + "377": 3.010154962539673 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "train_epoch_time": 4.787391901016235, + "train_loss": 3.0113053153542912, + "train_score": 0.20032841648313202, + "val_loss": 3.025356713173447, + "val_score": 0.19772083085590608 + }, + { + "epoch": 7, + "grad_norm": 0.6647846102714539, + "learning_rate": 0.215, + "model_norm": 87.36872100830078, + "step_logs": { + "grad_norm": { + "378": 0.6336457133293152, + "379": 0.6626040935516357, + "380": 0.7448855638504028, + "381": 0.7292468547821045, + "382": 0.6713274717330933, + "383": 0.6519176363945007, + "384": 0.6270025372505188, + "385": 0.6080090403556824, + "386": 0.5886038541793823, + "387": 0.6071902513504028, + "388": 0.5993844866752625, + "389": 0.586733341217041, + "390": 0.6157594919204712, + "391": 0.6306374669075012, + "392": 0.647163450717926, + "393": 0.613325297832489, + "394": 0.5771270990371704, + "395": 0.5612251162528992, + "396": 0.5745760798454285, + "397": 0.5865729451179504, + "398": 0.6132022738456726, + "399": 0.6045475602149963, + "400": 0.6059160232543945, + "401": 0.5982393622398376, + "402": 0.6250435709953308, + "403": 0.6244946122169495, + "404": 0.5974777340888977, + "405": 0.6061265468597412, + "406": 0.6592593789100647, + "407": 0.6593686938285828, + "408": 0.6627476811408997, + "409": 0.7173672318458557, + "410": 0.7597169280052185, + "411": 0.7351342439651489, + "412": 0.7842375040054321, + "413": 0.8148161172866821, + "414": 0.7393429279327393, + "415": 0.6865562200546265, + "416": 0.7091619372367859, + "417": 0.7017120122909546, + "418": 0.7307785749435425, + "419": 0.6986539959907532, + "420": 0.7098849415779114, + "421": 0.7611108422279358, + "422": 0.7074168920516968, + "423": 0.6444043517112732, + "424": 0.6126922965049744, + "425": 0.6457280516624451, + "426": 0.6926257014274597, + "427": 0.7298979163169861, + "428": 0.7551694512367249, + "429": 0.7180377840995789, + "430": 0.6376101970672607, + "431": 0.6647846102714539 + }, + "loss": { + "378": 3.012711524963379, + "379": 3.0218725204467773, + "380": 3.0113372802734375, + "381": 3.0132715702056885, + "382": 2.9926042556762695, + "383": 3.015521764755249, + "384": 2.9965782165527344, + "385": 2.985593318939209, + "386": 2.9920315742492676, + "387": 2.9951610565185547, + "388": 2.9654879570007324, + "389": 2.964205265045166, + "390": 2.97655987739563, + "391": 2.9747135639190674, + "392": 2.9611973762512207, + "393": 2.963017463684082, + "394": 2.9406800270080566, + "395": 2.9470438957214355, + "396": 2.9419054985046387, + "397": 2.952143669128418, + "398": 2.9493112564086914, + "399": 2.942966938018799, + "400": 2.9240810871124268, + "401": 2.9421606063842773, + "402": 2.938958168029785, + "403": 2.942342758178711, + "404": 2.924873113632202, + "405": 2.9249110221862793, + "406": 2.9123377799987793, + "407": 2.928295850753784, + "408": 2.9285576343536377, + "409": 2.948155403137207, + "410": 2.9168591499328613, + "411": 2.93835186958313, + "412": 2.934396743774414, + "413": 2.952394485473633, + "414": 2.9147510528564453, + "415": 2.9214444160461426, + "416": 2.915121555328369, + "417": 2.9249324798583984, + "418": 2.9088242053985596, + "419": 2.9207215309143066, + "420": 2.908618450164795, + "421": 2.9163992404937744, + "422": 2.902296781539917, + "423": 2.9183013439178467, + "424": 2.8893632888793945, + "425": 2.884275436401367, + "426": 2.89687442779541, + "427": 2.9012069702148438, + "428": 2.883091449737549, + "429": 2.9020495414733887, + "430": 2.883294105529785, + "431": 2.9085745811462402 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "train_epoch_time": 4.788618803024292, + "train_loss": 2.8893617737412964, + "train_score": 0.19945077111936538, + "val_loss": 2.895953709441403, + "val_score": 0.19649648409760231 + }, + { + "epoch": 8, + "grad_norm": 0.8645181655883789, + "learning_rate": 0.215, + "model_norm": 87.3841552734375, + "step_logs": { + "grad_norm": { + "432": 0.7320230603218079, + "433": 0.7574577331542969, + "434": 0.757548987865448, + "435": 0.7315129637718201, + "436": 0.6912124156951904, + "437": 0.6866511702537537, + "438": 0.6837176084518433, + "439": 0.658955454826355, + "440": 0.6572275757789612, + "441": 0.6956648230552673, + "442": 0.761847198009491, + "443": 0.8084657192230225, + "444": 0.7601466774940491, + "445": 0.7445797324180603, + "446": 0.7134843468666077, + "447": 0.6999015212059021, + "448": 0.6812036037445068, + "449": 0.7135583162307739, + "450": 0.8219456076622009, + "451": 0.8512318134307861, + "452": 0.8112087845802307, + "453": 0.7589200139045715, + "454": 0.6968483328819275, + "455": 0.71698397397995, + "456": 0.7640736699104309, + "457": 0.7552897334098816, + "458": 0.7140202522277832, + "459": 0.7089266180992126, + "460": 0.7449092268943787, + "461": 0.8189528584480286, + "462": 0.7386939525604248, + "463": 0.6635098457336426, + "464": 0.7072518467903137, + "465": 0.7312489151954651, + "466": 0.7184119820594788, + "467": 0.7435293197631836, + "468": 0.7844306230545044, + "469": 0.7601557970046997, + "470": 0.6535776853561401, + "471": 0.6401973962783813, + "472": 0.7259147763252258, + "473": 0.7636087536811829, + "474": 0.8167770504951477, + "475": 0.8350787162780762, + "476": 0.9137019515037537, + "477": 0.8558284044265747, + "478": 0.7120906114578247, + "479": 0.6557267308235168, + "480": 0.6116313338279724, + "481": 0.6060966849327087, + "482": 0.6451256275177002, + "483": 0.7006394863128662, + "484": 0.8169694542884827, + "485": 0.8645181655883789 + }, + "loss": { + "432": 2.895479679107666, + "433": 2.8825843334198, + "434": 2.895631790161133, + "435": 2.913215160369873, + "436": 2.8721237182617188, + "437": 2.9042539596557617, + "438": 2.8725738525390625, + "439": 2.895151138305664, + "440": 2.8678359985351562, + "441": 2.8971800804138184, + "442": 2.88449764251709, + "443": 2.8970119953155518, + "444": 2.9025702476501465, + "445": 2.892518997192383, + "446": 2.8612895011901855, + "447": 2.884847402572632, + "448": 2.870945453643799, + "449": 2.8805625438690186, + "450": 2.8783106803894043, + "451": 2.8899383544921875, + "452": 2.8653080463409424, + "453": 2.8676810264587402, + "454": 2.857863187789917, + "455": 2.874101400375366, + "456": 2.863027811050415, + "457": 2.876707077026367, + "458": 2.862279176712036, + "459": 2.854330062866211, + "460": 2.8601717948913574, + "461": 2.892487049102783, + "462": 2.8549087047576904, + "463": 2.857929229736328, + "464": 2.8356876373291016, + "465": 2.8624653816223145, + "466": 2.846660852432251, + "467": 2.8655991554260254, + "468": 2.877556800842285, + "469": 2.8712148666381836, + "470": 2.844513416290283, + "471": 2.837743043899536, + "472": 2.8294878005981445, + "473": 2.8706116676330566, + "474": 2.847562789916992, + "475": 2.862053394317627, + "476": 2.851762533187866, + "477": 2.846886157989502, + "478": 2.8304171562194824, + "479": 2.844226360321045, + "480": 2.8177504539489746, + "481": 2.815992593765259, + "482": 2.8315796852111816, + "483": 2.8190722465515137, + "484": 2.810741901397705, + "485": 2.8421401977539062 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "train_epoch_time": 4.788419485092163, + "train_loss": 2.8321788505981096, + "train_score": 0.2014952474935154, + "val_loss": 2.844468199974086, + "val_score": 0.19819173386190023 + }, + { + "epoch": 9, + "grad_norm": 0.7967838644981384, + "learning_rate": 0.215, + "model_norm": 87.40331268310547, + "step_logs": { + "grad_norm": { + "486": 0.863577663898468, + "487": 0.910273015499115, + "488": 1.0217578411102295, + "489": 1.0543659925460815, + "490": 0.9987280368804932, + "491": 0.8875438570976257, + "492": 0.763034999370575, + "493": 0.7586895823478699, + "494": 0.8020071387290955, + "495": 0.7677304148674011, + "496": 0.6512123942375183, + "497": 0.6617212891578674, + "498": 0.7435228228569031, + "499": 0.7903182506561279, + "500": 0.9293562769889832, + "501": 0.923592209815979, + "502": 0.8962411880493164, + "503": 0.9309093952178955, + "504": 1.055930256843567, + "505": 1.0557267665863037, + "506": 0.9419136643409729, + "507": 0.8568878769874573, + "508": 0.7049791216850281, + "509": 0.671477735042572, + "510": 0.6625528335571289, + "511": 0.749578595161438, + "512": 0.8767014145851135, + "513": 0.8458848595619202, + "514": 0.8194774985313416, + "515": 0.8010562062263489, + "516": 0.7702640891075134, + "517": 0.7862281203269958, + "518": 0.7974769473075867, + "519": 0.8601404428482056, + "520": 0.891409158706665, + "521": 0.9281553030014038, + "522": 0.954971194267273, + "523": 0.9194952249526978, + "524": 0.9138132333755493, + "525": 0.9203317165374756, + "526": 0.8772714138031006, + "527": 0.7488911151885986, + "528": 0.6080586314201355, + "529": 0.5554305911064148, + "530": 0.4985148012638092, + "531": 0.5306365489959717, + "532": 0.5902933478355408, + "533": 0.6542091369628906, + "534": 0.8878393769264221, + "535": 0.9390087127685547, + "536": 0.8490703701972961, + "537": 0.7996656894683838, + "538": 0.7763608694076538, + "539": 0.7967838644981384 + }, + "loss": { + "486": 2.8398008346557617, + "487": 2.846127986907959, + "488": 2.8413970470428467, + "489": 2.855076551437378, + "490": 2.822486400604248, + "491": 2.8407158851623535, + "492": 2.81491756439209, + "493": 2.825246810913086, + "494": 2.803802013397217, + "495": 2.813525438308716, + "496": 2.8082432746887207, + "497": 2.804034471511841, + "498": 2.803358554840088, + "499": 2.811717987060547, + "500": 2.8228657245635986, + "501": 2.836787462234497, + "502": 2.824258327484131, + "503": 2.825194835662842, + "504": 2.8211379051208496, + "505": 2.8480353355407715, + "506": 2.819782257080078, + "507": 2.8179264068603516, + "508": 2.7950034141540527, + "509": 2.804069995880127, + "510": 2.771759033203125, + "511": 2.792710781097412, + "512": 2.8026788234710693, + "513": 2.805750846862793, + "514": 2.7909491062164307, + "515": 2.7906928062438965, + "516": 2.7877557277679443, + "517": 2.7926082611083984, + "518": 2.776503562927246, + "519": 2.7887425422668457, + "520": 2.765568733215332, + "521": 2.8041157722473145, + "522": 2.7904062271118164, + "523": 2.8032431602478027, + "524": 2.8205058574676514, + "525": 2.802842617034912, + "526": 2.7756612300872803, + "527": 2.7845804691314697, + "528": 2.759321689605713, + "529": 2.74727201461792, + "530": 2.7412500381469727, + "531": 2.7346391677856445, + "532": 2.7556943893432617, + "533": 2.7593154907226562, + "534": 2.766733169555664, + "535": 2.7949352264404297, + "536": 2.7737178802490234, + "537": 2.7629737854003906, + "538": 2.752772092819214, + "539": 2.7690720558166504 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "train_epoch_time": 4.788261651992798, + "train_loss": 2.75392352334054, + "train_score": 0.21601394363164558, + "val_loss": 2.7689981208739955, + "val_score": 0.21132319180207193 + }, + { + "epoch": 10, + "grad_norm": 0.6906720399856567, + "learning_rate": 0.215, + "model_norm": 87.4227523803711, + "step_logs": { + "grad_norm": { + "540": 0.7674277424812317, + "541": 0.8204943537712097, + "542": 0.8144788146018982, + "543": 0.7926110029220581, + "544": 0.7511706352233887, + "545": 0.8522761464118958, + "546": 0.8249024152755737, + "547": 0.7127631306648254, + "548": 0.6826196908950806, + "549": 0.632779598236084, + "550": 0.6577969789505005, + "551": 0.7654299736022949, + "552": 0.8071280121803284, + "553": 0.843177855014801, + "554": 0.8526422381401062, + "555": 0.8966861963272095, + "556": 0.7629060745239258, + "557": 0.5559282898902893, + "558": 0.5548087954521179, + "559": 0.5962457060813904, + "560": 0.6299737691879272, + "561": 0.6545103192329407, + "562": 0.7233629822731018, + "563": 0.794723629951477, + "564": 0.9696587920188904, + "565": 1.0049703121185303, + "566": 0.8969777226448059, + "567": 0.7934767007827759, + "568": 0.6555570363998413, + "569": 0.6655522584915161, + "570": 0.7455049157142639, + "571": 0.7585878372192383, + "572": 0.711405336856842, + "573": 0.7046390771865845, + "574": 0.7649972438812256, + "575": 0.7420599460601807, + "576": 0.6338107585906982, + "577": 0.5890849232673645, + "578": 0.6227665543556213, + "579": 0.626175045967102, + "580": 0.6935070753097534, + "581": 0.7580548524856567, + "582": 0.869880735874176, + "583": 0.883039653301239, + "584": 0.8479851484298706, + "585": 0.8208180665969849, + "586": 0.822525680065155, + "587": 0.8165813088417053, + "588": 0.8054198622703552, + "589": 0.79904705286026, + "590": 0.7519515156745911, + "591": 0.7321115136146545, + "592": 0.7165383696556091, + "593": 0.6906720399856567 + }, + "loss": { + "540": 2.7537412643432617, + "541": 2.7477316856384277, + "542": 2.7607250213623047, + "543": 2.744685411453247, + "544": 2.7443807125091553, + "545": 2.771444797515869, + "546": 2.768183708190918, + "547": 2.7401552200317383, + "548": 2.7484447956085205, + "549": 2.750321865081787, + "550": 2.7463276386260986, + "551": 2.7468857765197754, + "552": 2.7507147789001465, + "553": 2.7330169677734375, + "554": 2.744373083114624, + "555": 2.7623205184936523, + "556": 2.750678539276123, + "557": 2.715502977371216, + "558": 2.698390007019043, + "559": 2.7112960815429688, + "560": 2.7187483310699463, + "561": 2.7352845668792725, + "562": 2.7246944904327393, + "563": 2.7442188262939453, + "564": 2.714545726776123, + "565": 2.746157169342041, + "566": 2.750765323638916, + "567": 2.7406365871429443, + "568": 2.7229270935058594, + "569": 2.7272002696990967, + "570": 2.7348685264587402, + "571": 2.717965841293335, + "572": 2.722810745239258, + "573": 2.681318759918213, + "574": 2.7119460105895996, + "575": 2.7244019508361816, + "576": 2.69827938079834, + "577": 2.6981313228607178, + "578": 2.7124061584472656, + "579": 2.708868980407715, + "580": 2.71474289894104, + "581": 2.7171144485473633, + "582": 2.705875873565674, + "583": 2.7219858169555664, + "584": 2.7225890159606934, + "585": 2.7190616130828857, + "586": 2.727053165435791, + "587": 2.735574960708618, + "588": 2.714120864868164, + "589": 2.7138586044311523, + "590": 2.7019567489624023, + "591": 2.7239129543304443, + "592": 2.6868321895599365, + "593": 2.704176902770996 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "train_epoch_time": 4.788338899612427, + "train_loss": 2.6928715892638504, + "train_score": 0.23654053084648494, + "val_loss": 2.7124274439433957, + "val_score": 0.2324241178043948 + }, + { + "epoch": 11, + "grad_norm": 0.8027198910713196, + "learning_rate": 0.215, + "model_norm": 87.44013977050781, + "step_logs": { + "grad_norm": { + "594": 0.6872090101242065, + "595": 0.7225258350372314, + "596": 0.8049277663230896, + "597": 0.8187502026557922, + "598": 0.7759734392166138, + "599": 0.7756792902946472, + "600": 0.8381021618843079, + "601": 0.8250980973243713, + "602": 0.7214074730873108, + "603": 0.6632557511329651, + "604": 0.6125983595848083, + "605": 0.6200022101402283, + "606": 0.6515315175056458, + "607": 0.6949056386947632, + "608": 0.7261806726455688, + "609": 0.7122187614440918, + "610": 0.6823997497558594, + "611": 0.7193889021873474, + "612": 0.8079313635826111, + "613": 0.9209291338920593, + "614": 1.0537763833999634, + "615": 1.1957826614379883, + "616": 0.9810210466384888, + "617": 0.7510979771614075, + "618": 0.6561508178710938, + "619": 0.6144815683364868, + "620": 0.6295700073242188, + "621": 0.6692794561386108, + "622": 0.6943836212158203, + "623": 0.6960968375205994, + "624": 0.7200412154197693, + "625": 0.7527978420257568, + "626": 0.7561212778091431, + "627": 0.7079261541366577, + "628": 0.6200481653213501, + "629": 0.5895781517028809, + "630": 0.623550295829773, + "631": 0.6208155155181885, + "632": 0.6302371025085449, + "633": 0.7330285310745239, + "634": 0.9005295634269714, + "635": 0.9371844530105591, + "636": 0.839468777179718, + "637": 0.7934845089912415, + "638": 0.7483586668968201, + "639": 0.6826823949813843, + "640": 0.6983129382133484, + "641": 0.7323094606399536, + "642": 0.762589156627655, + "643": 0.8058868050575256, + "644": 0.8357001543045044, + "645": 0.839078962802887, + "646": 0.8175479173660278, + "647": 0.8027198910713196 + }, + "loss": { + "594": 2.6956052780151367, + "595": 2.692734718322754, + "596": 2.703125, + "597": 2.6973822116851807, + "598": 2.7055504322052, + "599": 2.6921238899230957, + "600": 2.693716526031494, + "601": 2.7087883949279785, + "602": 2.6817100048065186, + "603": 2.7040693759918213, + "604": 2.674466609954834, + "605": 2.675713539123535, + "606": 2.698209762573242, + "607": 2.6889586448669434, + "608": 2.6803107261657715, + "609": 2.6847589015960693, + "610": 2.6696243286132812, + "611": 2.693406820297241, + "612": 2.672724485397339, + "613": 2.711531162261963, + "614": 2.7127370834350586, + "615": 2.7407872676849365, + "616": 2.722634792327881, + "617": 2.689072608947754, + "618": 2.661468982696533, + "619": 2.6796717643737793, + "620": 2.6591899394989014, + "621": 2.6647849082946777, + "622": 2.6759605407714844, + "623": 2.6636757850646973, + "624": 2.6850664615631104, + "625": 2.6810436248779297, + "626": 2.675769090652466, + "627": 2.649951696395874, + "628": 2.6543140411376953, + "629": 2.6440024375915527, + "630": 2.6621458530426025, + "631": 2.649712085723877, + "632": 2.6524407863616943, + "633": 2.672440528869629, + "634": 2.6835622787475586, + "635": 2.693869113922119, + "636": 2.6719155311584473, + "637": 2.679391860961914, + "638": 2.6672911643981934, + "639": 2.655522346496582, + "640": 2.65548038482666, + "641": 2.6493778228759766, + "642": 2.656399726867676, + "643": 2.6746835708618164, + "644": 2.662984609603882, + "645": 2.693105697631836, + "646": 2.6630825996398926, + "647": 2.689990282058716 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "train_epoch_time": 4.788145542144775, + "train_loss": 2.6561296804392525, + "train_score": 0.24334872664731408, + "val_loss": 2.672862677021224, + "val_score": 0.2390571186483659 + }, + { + "epoch": 12, + "grad_norm": 0.2774239182472229, + "learning_rate": 0.215, + "model_norm": 87.45559692382812, + "step_logs": { + "grad_norm": { + "648": 0.792896568775177, + "649": 0.8353447914123535, + "650": 0.7810977101325989, + "651": 0.6959315538406372, + "652": 0.6664336323738098, + "653": 0.6596155762672424, + "654": 0.6479871869087219, + "655": 0.6403710246086121, + "656": 0.5676006078720093, + "657": 0.5141499042510986, + "658": 0.49931102991104126, + "659": 0.4865494966506958, + "660": 0.4632185399532318, + "661": 0.4731646478176117, + "662": 0.47062548995018005, + "663": 0.4806637763977051, + "664": 0.5314803123474121, + "665": 0.6604889631271362, + "666": 0.7280536890029907, + "667": 0.763297438621521, + "668": 0.7442384958267212, + "669": 0.641446590423584, + "670": 0.5107265710830688, + "671": 0.4029940366744995, + "672": 0.36072438955307007, + "673": 0.30386051535606384, + "674": 0.24650147557258606, + "675": 0.2413417249917984, + "676": 0.24129937589168549, + "677": 0.19265297055244446, + "678": 0.1772371083498001, + "679": 0.19798628985881805, + "680": 0.28178197145462036, + "681": 0.2590637505054474, + "682": 0.1860135942697525, + "683": 0.2082049697637558, + "684": 0.20826393365859985, + "685": 0.1825958788394928, + "686": 0.19578436017036438, + "687": 0.1916915327310562, + "688": 0.1876634657382965, + "689": 0.23662607371807098, + "690": 0.2560403645038605, + "691": 0.2715053856372833, + "692": 0.29553651809692383, + "693": 0.32233282923698425, + "694": 0.33564308285713196, + "695": 0.3652053773403168, + "696": 0.2813124358654022, + "697": 0.2436148226261139, + "698": 0.27020028233528137, + "699": 0.31399574875831604, + "700": 0.2893792986869812, + "701": 0.2774239182472229 + }, + "loss": { + "648": 2.6537842750549316, + "649": 2.6676998138427734, + "650": 2.6535425186157227, + "651": 2.6724400520324707, + "652": 2.645719528198242, + "653": 2.6452810764312744, + "654": 2.6345176696777344, + "655": 2.6461164951324463, + "656": 2.640970230102539, + "657": 2.6327147483825684, + "658": 2.6243534088134766, + "659": 2.6190435886383057, + "660": 2.604731559753418, + "661": 2.6417384147644043, + "662": 2.6303091049194336, + "663": 2.6267900466918945, + "664": 2.619058609008789, + "665": 2.635082721710205, + "666": 2.6518149375915527, + "667": 2.616220474243164, + "668": 2.634359836578369, + "669": 2.648690700531006, + "670": 2.636586904525757, + "671": 2.617892026901245, + "672": 2.610780715942383, + "673": 2.6191556453704834, + "674": 2.6143531799316406, + "675": 2.604020118713379, + "676": 2.628568410873413, + "677": 2.61314058303833, + "678": 2.6149210929870605, + "679": 2.6063547134399414, + "680": 2.604421615600586, + "681": 2.6073555946350098, + "682": 2.617856979370117, + "683": 2.6121232509613037, + "684": 2.6046979427337646, + "685": 2.6048662662506104, + "686": 2.6013407707214355, + "687": 2.6054189205169678, + "688": 2.5942869186401367, + "689": 2.6175761222839355, + "690": 2.5908968448638916, + "691": 2.615455389022827, + "692": 2.5798377990722656, + "693": 2.596287488937378, + "694": 2.6051957607269287, + "695": 2.6023013591766357, + "696": 2.6002156734466553, + "697": 2.595587730407715, + "698": 2.585690498352051, + "699": 2.6131420135498047, + "700": 2.5900957584381104, + "701": 2.5907397270202637 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "train_epoch_time": 4.788262128829956, + "train_loss": 2.594737240984245, + "train_score": 0.2596384056500417, + "val_loss": 2.6130455312170473, + "val_score": 0.25239936085300246 + }, + { + "epoch": 13, + "grad_norm": 0.19743777811527252, + "learning_rate": 0.14333333333333334, + "model_norm": 87.4638671875, + "step_logs": { + "grad_norm": { + "702": 0.288577675819397, + "703": 0.206546813249588, + "704": 0.18744145333766937, + "705": 0.23894193768501282, + "706": 0.2635490894317627, + "707": 0.1706453412771225, + "708": 0.19992195069789886, + "709": 0.19615544378757477, + "710": 0.19003179669380188, + "711": 0.20042403042316437, + "712": 0.22111228108406067, + "713": 0.20645873248577118, + "714": 0.19862057268619537, + "715": 0.21230727434158325, + "716": 0.20010048151016235, + "717": 0.208111971616745, + "718": 0.2040364295244217, + "719": 0.20175381004810333, + "720": 0.20806914567947388, + "721": 0.2026674598455429, + "722": 0.19035674631595612, + "723": 0.18032985925674438, + "724": 0.1910620927810669, + "725": 0.17998524010181427, + "726": 0.17583727836608887, + "727": 0.17799758911132812, + "728": 0.20407570898532867, + "729": 0.18521158397197723, + "730": 0.201987624168396, + "731": 0.2337084859609604, + "732": 0.2262936383485794, + "733": 0.2119879424571991, + "734": 0.21688628196716309, + "735": 0.2242150902748108, + "736": 0.2115195095539093, + "737": 0.2281603366136551, + "738": 0.2723083794116974, + "739": 0.204091876745224, + "740": 0.19272255897521973, + "741": 0.19437508285045624, + "742": 0.20794957876205444, + "743": 0.20709531009197235, + "744": 0.19182224571704865, + "745": 0.20327045023441315, + "746": 0.18197189271450043, + "747": 0.16160893440246582, + "748": 0.1971614509820938, + "749": 0.19469018280506134, + "750": 0.1888684332370758, + "751": 0.1756402999162674, + "752": 0.17552946507930756, + "753": 0.16428831219673157, + "754": 0.1855579912662506, + "755": 0.19743777811527252 + }, + "loss": { + "702": 2.586320161819458, + "703": 2.5773162841796875, + "704": 2.5759706497192383, + "705": 2.5901947021484375, + "706": 2.598660945892334, + "707": 2.5948023796081543, + "708": 2.5947155952453613, + "709": 2.58949613571167, + "710": 2.6068930625915527, + "711": 2.598865509033203, + "712": 2.5606284141540527, + "713": 2.5946717262268066, + "714": 2.5841267108917236, + "715": 2.60235595703125, + "716": 2.5846023559570312, + "717": 2.576630115509033, + "718": 2.5817763805389404, + "719": 2.5823283195495605, + "720": 2.590935230255127, + "721": 2.5704972743988037, + "722": 2.593195915222168, + "723": 2.5623183250427246, + "724": 2.576021909713745, + "725": 2.60878849029541, + "726": 2.5802509784698486, + "727": 2.5956945419311523, + "728": 2.5693695545196533, + "729": 2.5748777389526367, + "730": 2.5698094367980957, + "731": 2.6033058166503906, + "732": 2.5717451572418213, + "733": 2.589519500732422, + "734": 2.5744495391845703, + "735": 2.5993423461914062, + "736": 2.5652925968170166, + "737": 2.573065757751465, + "738": 2.590214252471924, + "739": 2.5813217163085938, + "740": 2.5754079818725586, + "741": 2.5944643020629883, + "742": 2.575998067855835, + "743": 2.5622103214263916, + "744": 2.573355197906494, + "745": 2.567068099975586, + "746": 2.5831942558288574, + "747": 2.585444927215576, + "748": 2.5840260982513428, + "749": 2.5696730613708496, + "750": 2.5842604637145996, + "751": 2.579193115234375, + "752": 2.580641269683838, + "753": 2.5773744583129883, + "754": 2.5695557594299316, + "755": 2.584674119949341 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "train_epoch_time": 4.789228916168213, + "train_loss": 2.5746010681820053, + "train_score": 0.2640905218452771, + "val_loss": 2.591059433743272, + "val_score": 0.2571577215577924 + }, + { + "epoch": 14, + "grad_norm": 0.17980313301086426, + "learning_rate": 0.07166666666666667, + "model_norm": 87.4664306640625, + "step_logs": { + "grad_norm": { + "756": 0.19651539623737335, + "757": 0.22051754593849182, + "758": 0.18168343603610992, + "759": 0.19114398956298828, + "760": 0.20848040282726288, + "761": 0.19059225916862488, + "762": 0.17042526602745056, + "763": 0.19235539436340332, + "764": 0.22388316690921783, + "765": 0.18128003180027008, + "766": 0.21614381670951843, + "767": 0.1869804412126541, + "768": 0.17486269772052765, + "769": 0.20444579422473907, + "770": 0.20636223256587982, + "771": 0.15299025177955627, + "772": 0.1582631766796112, + "773": 0.17599335312843323, + "774": 0.1699964851140976, + "775": 0.17469920217990875, + "776": 0.17793191969394684, + "777": 0.17099779844284058, + "778": 0.18882369995117188, + "779": 0.2096959948539734, + "780": 0.17221367359161377, + "781": 0.19251389801502228, + "782": 0.1936895102262497, + "783": 0.16879186034202576, + "784": 0.17370820045471191, + "785": 0.17811767756938934, + "786": 0.193150594830513, + "787": 0.18296052515506744, + "788": 0.21088339388370514, + "789": 0.1723606288433075, + "790": 0.20569881796836853, + "791": 0.16170746088027954, + "792": 0.1840723603963852, + "793": 0.19184309244155884, + "794": 0.19387155771255493, + "795": 0.16084368526935577, + "796": 0.17537648975849152, + "797": 0.18694652616977692, + "798": 0.1754896193742752, + "799": 0.2051302045583725, + "800": 0.18033966422080994, + "801": 0.18328621983528137, + "802": 0.18514332175254822, + "803": 0.19138571619987488, + "804": 0.17341695725917816, + "805": 0.17652909457683563, + "806": 0.17719390988349915, + "807": 0.17969363927841187, + "808": 0.17272068560123444, + "809": 0.17980313301086426 + }, + "loss": { + "756": 2.5785164833068848, + "757": 2.574859619140625, + "758": 2.5759048461914062, + "759": 2.5679521560668945, + "760": 2.5693013668060303, + "761": 2.585184097290039, + "762": 2.581094264984131, + "763": 2.564300298690796, + "764": 2.5730648040771484, + "765": 2.566831350326538, + "766": 2.581516981124878, + "767": 2.5767414569854736, + "768": 2.5858826637268066, + "769": 2.5594544410705566, + "770": 2.60379695892334, + "771": 2.560962677001953, + "772": 2.5692503452301025, + "773": 2.5719165802001953, + "774": 2.5704259872436523, + "775": 2.555741548538208, + "776": 2.579489231109619, + "777": 2.5771191120147705, + "778": 2.5696277618408203, + "779": 2.577171802520752, + "780": 2.5648751258850098, + "781": 2.5556445121765137, + "782": 2.574836254119873, + "783": 2.550807476043701, + "784": 2.579025983810425, + "785": 2.5787100791931152, + "786": 2.5677437782287598, + "787": 2.57653546333313, + "788": 2.599581718444824, + "789": 2.561877965927124, + "790": 2.552694797515869, + "791": 2.579023838043213, + "792": 2.5785627365112305, + "793": 2.5573389530181885, + "794": 2.5599212646484375, + "795": 2.567988634109497, + "796": 2.5702061653137207, + "797": 2.5582666397094727, + "798": 2.569242238998413, + "799": 2.5669472217559814, + "800": 2.5579676628112793, + "801": 2.5632500648498535, + "802": 2.5691051483154297, + "803": 2.575640916824341, + "804": 2.578709125518799, + "805": 2.578014850616455, + "806": 2.5717475414276123, + "807": 2.572329044342041, + "808": 2.578866958618164, + "809": 2.557891845703125 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "train_epoch_time": 4.78950047492981, + "train_loss": 2.5690175032513043, + "train_score": 0.26566423052865773, + "val_loss": 2.5859530095254786, + "val_score": 0.2592341778404266 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:31:32.282473", + "final_model_norm": 87.4664306640625, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:29:51.480189", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.652818202972412, + "learning_rate": 4.64e-11, + "model_norm": 95.14454650878906, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.276208400726318, + "3": 8.374259948730469, + "4": 16.702686309814453, + "5": 4.318990230560303, + "6": 4.053266525268555, + "7": 4.086020469665527, + "8": 5.3812150955200195, + "9": 3.9882380962371826, + "10": 3.1579270362854004, + "11": 4.5157694816589355, + "12": 7.758707523345947, + "13": 6.416595458984375, + "14": 16.67332649230957, + "15": 282.22894287109375, + "16": 22.914852142333984, + "17": 18.954862594604492, + "18": 16.624263763427734, + "19": 9.764891624450684, + "20": 6.315908432006836, + "21": 11.82269287109375, + "22": 5.424689769744873, + "23": 5.268277645111084, + "24": 15.263899803161621, + "25": 14.404182434082031, + "26": 6.300836086273193, + "27": 4.741504669189453, + "28": 4.246946811676025, + "29": 3.7954163551330566, + "30": 10.74240493774414, + "31": 5.669170379638672, + "32": 4.174526691436768, + "33": 4.03250789642334, + "34": 2.977074146270752, + "35": 2.629021406173706, + "36": 6.231155872344971, + "37": 5.815568447113037, + "38": 4.085026264190674, + "39": 3.1897835731506348, + "40": 2.947141170501709, + "41": 11.358014106750488, + "42": 8.03885555267334, + "43": 3.9542248249053955, + "44": 3.493335008621216, + "45": 2.769835948944092, + "46": 2.4430980682373047, + "47": 1.7583775520324707, + "48": 7.817409038543701, + "49": 2.657479763031006, + "50": 3.692596435546875, + "51": 2.1273584365844727, + "52": 1.7742284536361694, + "53": 1.652818202972412 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.8033523559570312, + "3": 3.8736085891723633, + "4": 4.4257612228393555, + "5": 4.802114009857178, + "6": 4.167995929718018, + "7": 3.772918939590454, + "8": 3.651449203491211, + "9": 4.178749084472656, + "10": 3.4780311584472656, + "11": 3.7390782833099365, + "12": 4.454328536987305, + "13": 4.386186599731445, + "14": 8.97293758392334, + "15": 14.497613906860352, + "16": 5.278868675231934, + "17": 9.501153945922852, + "18": 11.088691711425781, + "19": 14.500003814697266, + "20": 14.40684700012207, + "21": 12.262041091918945, + "22": 10.736939430236816, + "23": 8.117122650146484, + "24": 11.160126686096191, + "25": 15.988414764404297, + "26": 16.101322174072266, + "27": 13.540946960449219, + "28": 10.738876342773438, + "29": 7.659339904785156, + "30": 6.603672504425049, + "31": 11.902063369750977, + "32": 10.373956680297852, + "33": 8.095108032226562, + "34": 5.627439498901367, + "35": 3.818453550338745, + "36": 5.04629373550415, + "37": 6.023892879486084, + "38": 8.797224044799805, + "39": 7.081573486328125, + "40": 4.819413661956787, + "41": 7.542447090148926, + "42": 12.643533706665039, + "43": 12.60511589050293, + "44": 10.406379699707031, + "45": 7.89165735244751, + "46": 5.85908317565918, + "47": 4.3023200035095215, + "48": 5.10648250579834, + "49": 8.244983673095703, + "50": 7.436631202697754, + "51": 6.794487953186035, + "52": 5.365841865539551, + "53": 4.076620578765869 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "train_epoch_time": 4.7896528244018555, + "train_loss": 3.811383214947825, + "train_score": 0.1526105183055103, + "val_loss": 3.8258205180053184, + "val_score": 0.15114182700861745 + }, + { + "epoch": 1, + "grad_norm": 0.48905959725379944, + "learning_rate": 0.464, + "model_norm": 95.07994842529297, + "step_logs": { + "grad_norm": { + "54": 3.6941728591918945, + "55": 1.5769537687301636, + "56": 1.4904303550720215, + "57": 1.2141751050949097, + "58": 3.826354742050171, + "59": 1.4384191036224365, + "60": 1.3476049900054932, + "61": 1.2866255044937134, + "62": 2.2173924446105957, + "63": 1.2530168294906616, + "64": 1.0791776180267334, + "65": 2.5892274379730225, + "66": 1.2960045337677002, + "67": 1.150550365447998, + "68": 0.636892557144165, + "69": 0.8866409659385681, + "70": 2.255319356918335, + "71": 1.1988329887390137, + "72": 1.037598729133606, + "73": 1.3717186450958252, + "74": 1.1444860696792603, + "75": 0.3106951117515564, + "76": 0.4716980755329132, + "77": 1.2585800886154175, + "78": 1.1399120092391968, + "79": 0.5681597590446472, + "80": 0.8048447966575623, + "81": 2.011240005493164, + "82": 1.1774786710739136, + "83": 0.8743529915809631, + "84": 1.4935977458953857, + "85": 1.0602136850357056, + "86": 0.2866179645061493, + "87": 0.5061366558074951, + "88": 0.7225818037986755, + "89": 1.5581732988357544, + "90": 1.103549838066101, + "91": 0.4402516484260559, + "92": 0.6982254385948181, + "93": 0.8109797835350037, + "94": 1.2745931148529053, + "95": 1.061758279800415, + "96": 0.35099613666534424, + "97": 0.4390299320220947, + "98": 0.8580119609832764, + "99": 0.9358235597610474, + "100": 1.1460063457489014, + "101": 0.9937201738357544, + "102": 0.47928911447525024, + "103": 0.6241452693939209, + "104": 1.1688700914382935, + "105": 0.970736563205719, + "106": 0.34720972180366516, + "107": 0.48905959725379944 + }, + "loss": { + "54": 3.819190740585327, + "55": 5.175209045410156, + "56": 4.418959617614746, + "57": 3.5471701622009277, + "58": 3.8454673290252686, + "59": 5.155857086181641, + "60": 4.353496551513672, + "61": 3.6526291370391846, + "62": 3.556497573852539, + "63": 4.180559158325195, + "64": 3.539870262145996, + "65": 3.563734292984009, + "66": 4.3748016357421875, + "67": 3.724374771118164, + "68": 3.342287302017212, + "69": 3.476107597351074, + "70": 3.532440185546875, + "71": 4.159850120544434, + "72": 3.531541347503662, + "73": 3.4115400314331055, + "74": 3.745779037475586, + "75": 3.3326189517974854, + "76": 3.366605281829834, + "77": 3.401813507080078, + "78": 3.697483539581299, + "79": 3.3507590293884277, + "80": 3.369561195373535, + "81": 3.525599479675293, + "82": 3.9687652587890625, + "83": 3.4684581756591797, + "84": 3.424407958984375, + "85": 3.729970932006836, + "86": 3.3281264305114746, + "87": 3.36848783493042, + "88": 3.370513677597046, + "89": 3.4576525688171387, + "90": 3.7721939086914062, + "91": 3.3521311283111572, + "92": 3.3533034324645996, + "93": 3.4419989585876465, + "94": 3.3973400592803955, + "95": 3.6539418697357178, + "96": 3.333446979522705, + "97": 3.348630428314209, + "98": 3.3506622314453125, + "99": 3.510232925415039, + "100": 3.4140822887420654, + "101": 3.562901496887207, + "102": 3.3373665809631348, + "103": 3.3839244842529297, + "104": 3.4235408306121826, + "105": 3.5752899646759033, + "106": 3.3537020683288574, + "107": 3.339092254638672 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "train_epoch_time": 4.787497520446777, + "train_loss": 3.376562429055935, + "train_score": 0.15260939743033097, + "val_loss": 3.395580074954115, + "val_score": 0.15114182700861745 + }, + { + "epoch": 2, + "grad_norm": 0.5694767832756042, + "learning_rate": 0.464, + "model_norm": 95.0799331665039, + "step_logs": { + "grad_norm": { + "108": 0.937101423740387, + "109": 0.9100974202156067, + "110": 0.7832270860671997, + "111": 0.8097555041313171, + "112": 0.9023659825325012, + "113": 0.8763775825500488, + "114": 0.7913365364074707, + "115": 0.8260985016822815, + "116": 0.9353631138801575, + "117": 0.8662790656089783, + "118": 0.6298329830169678, + "119": 0.701732337474823, + "120": 0.9115036725997925, + "121": 0.8654568195343018, + "122": 0.7037350535392761, + "123": 0.7147424221038818, + "124": 0.7594584822654724, + "125": 0.7744802832603455, + "126": 0.8010094165802002, + "127": 0.771429181098938, + "128": 0.6653699278831482, + "129": 0.7111042141914368, + "130": 0.8257189989089966, + "131": 0.7920432686805725, + "132": 0.671977162361145, + "133": 0.682690441608429, + "134": 0.6913823485374451, + "135": 0.6903976798057556, + "136": 0.7035506963729858, + "137": 0.7023475766181946, + "138": 0.6732025146484375, + "139": 0.6678005456924438, + "140": 0.6431317925453186, + "141": 0.6587648391723633, + "142": 0.7042838931083679, + "143": 0.673664391040802, + "144": 0.5878357291221619, + "145": 0.6040487289428711, + "146": 0.644805908203125, + "147": 0.6217125058174133, + "148": 0.5670838356018066, + "149": 0.6007515788078308, + "150": 0.6736865043640137, + "151": 0.6285593509674072, + "152": 0.512269914150238, + "153": 0.5301933288574219, + "154": 0.5890335440635681, + "155": 0.6092319488525391, + "156": 0.6309515237808228, + "157": 0.6185550689697266, + "158": 0.5833370089530945, + "159": 0.5794802308082581, + "160": 0.5471209287643433, + "161": 0.5694767832756042 + }, + "loss": { + "108": 3.3574681282043457, + "109": 3.4849300384521484, + "110": 3.3535819053649902, + "111": 3.461355447769165, + "112": 3.3725531101226807, + "113": 3.492842197418213, + "114": 3.394954204559326, + "115": 3.4387760162353516, + "116": 3.3632302284240723, + "117": 3.4699866771698, + "118": 3.361332654953003, + "119": 3.412446975708008, + "120": 3.335014581680298, + "121": 3.44108247756958, + "122": 3.3754525184631348, + "123": 3.432736396789551, + "124": 3.398573637008667, + "125": 3.4387564659118652, + "126": 3.3856282234191895, + "127": 3.4441332817077637, + "128": 3.3441851139068604, + "129": 3.3686683177948, + "130": 3.3438167572021484, + "131": 3.4587936401367188, + "132": 3.353421688079834, + "133": 3.3798303604125977, + "134": 3.33695650100708, + "135": 3.414294719696045, + "136": 3.3588247299194336, + "137": 3.391749382019043, + "138": 3.342038154602051, + "139": 3.40187406539917, + "140": 3.344695568084717, + "141": 3.3651533126831055, + "142": 3.36657452583313, + "143": 3.450082302093506, + "144": 3.338261604309082, + "145": 3.3799362182617188, + "146": 3.3470382690429688, + "147": 3.403172731399536, + "148": 3.358397960662842, + "149": 3.3757081031799316, + "150": 3.373363494873047, + "151": 3.388943672180176, + "152": 3.331705093383789, + "153": 3.3673605918884277, + "154": 3.3178887367248535, + "155": 3.4022326469421387, + "156": 3.3367085456848145, + "157": 3.3797800540924072, + "158": 3.3392200469970703, + "159": 3.3776514530181885, + "160": 3.309654712677002, + "161": 3.3489630222320557 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "train_epoch_time": 4.787580490112305, + "train_loss": 3.3418038329229804, + "train_score": 0.15261276005586902, + "val_loss": 3.3609827295647148, + "val_score": 0.15114182700861745 + }, + { + "epoch": 3, + "grad_norm": 0.592322587966919, + "learning_rate": 0.464, + "model_norm": 95.12150573730469, + "step_logs": { + "grad_norm": { + "162": 0.615007758140564, + "163": 0.5873151421546936, + "164": 0.5264214277267456, + "165": 0.5163225531578064, + "166": 0.4879056513309479, + "167": 0.5220987796783447, + "168": 0.5873386263847351, + "169": 0.5762194395065308, + "170": 0.5494808554649353, + "171": 0.5450929403305054, + "172": 0.550300657749176, + "173": 0.5463757514953613, + "174": 0.5122048854827881, + "175": 0.5181676745414734, + "176": 0.5633895993232727, + "177": 0.5566295981407166, + "178": 0.5165855288505554, + "179": 0.5185393691062927, + "180": 0.5492952466011047, + "181": 0.5626727342605591, + "182": 0.5611037611961365, + "183": 0.5426319241523743, + "184": 0.5038169026374817, + "185": 0.5322648882865906, + "186": 0.5438753962516785, + "187": 0.8277273774147034, + "188": 0.7329356074333191, + "189": 0.6525020003318787, + "190": 0.508832573890686, + "191": 0.5306726098060608, + "192": 0.5974617600440979, + "193": 0.6298349499702454, + "194": 0.6329159736633301, + "195": 0.6210013031959534, + "196": 0.5854630470275879, + "197": 1.0215471982955933, + "198": 0.7013662457466125, + "199": 0.6193742752075195, + "200": 0.49590811133384705, + "201": 0.5079411864280701, + "202": 0.5181183218955994, + "203": 0.538161039352417, + "204": 0.5654072761535645, + "205": 0.5818029046058655, + "206": 0.6068726778030396, + "207": 0.6282179951667786, + "208": 0.6167364120483398, + "209": 0.6415621042251587, + "210": 0.6173148155212402, + "211": 0.5772902965545654, + "212": 0.5579806566238403, + "213": 0.5356670618057251, + "214": 0.5393517017364502, + "215": 0.592322587966919 + }, + "loss": { + "162": 3.327364206314087, + "163": 3.3843235969543457, + "164": 3.3192696571350098, + "165": 3.3537888526916504, + "166": 3.330967903137207, + "167": 3.350696086883545, + "168": 3.3641114234924316, + "169": 3.359283447265625, + "170": 3.3143627643585205, + "171": 3.3739585876464844, + "172": 3.3140740394592285, + "173": 3.355048179626465, + "174": 3.3630337715148926, + "175": 3.342874526977539, + "176": 3.3190088272094727, + "177": 3.3339576721191406, + "178": 3.3038487434387207, + "179": 3.3242692947387695, + "180": 3.3445029258728027, + "181": 3.338287353515625, + "182": 3.297006845474243, + "183": 3.3233845233917236, + "184": 3.302051067352295, + "185": 3.330042839050293, + "186": 3.2854273319244385, + "187": 3.3331639766693115, + "188": 3.355712413787842, + "189": 3.3565173149108887, + "190": 3.3086819648742676, + "191": 3.3157200813293457, + "192": 3.2771875858306885, + "193": 3.31754207611084, + "194": 3.2726426124572754, + "195": 3.2991514205932617, + "196": 3.2811403274536133, + "197": 3.3124465942382812, + "198": 3.299910306930542, + "199": 3.3078887462615967, + "200": 3.2597906589508057, + "201": 3.284745216369629, + "202": 3.225611448287964, + "203": 3.2391726970672607, + "204": 3.237396240234375, + "205": 3.248345375061035, + "206": 3.235475540161133, + "207": 3.240050792694092, + "208": 3.207780599594116, + "209": 3.2279539108276367, + "210": 3.205918788909912, + "211": 3.22377872467041, + "212": 3.217679023742676, + "213": 3.2277183532714844, + "214": 3.1825857162475586, + "215": 3.1997873783111572 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "train_epoch_time": 4.787698745727539, + "train_loss": 3.195310854125057, + "train_score": 0.16691400641775883, + "val_loss": 3.205283027291982, + "val_score": 0.16692827949909886 + }, + { + "epoch": 4, + "grad_norm": 0.5832625031471252, + "learning_rate": 0.464, + "model_norm": 95.16639709472656, + "step_logs": { + "grad_norm": { + "216": 0.6614340543746948, + "217": 0.6891858577728271, + "218": 0.6676728129386902, + "219": 0.5995890498161316, + "220": 0.5358002185821533, + "221": 0.536672830581665, + "222": 0.594521701335907, + "223": 0.6287701725959778, + "224": 0.6931347846984863, + "225": 0.8357718586921692, + "226": 0.6993051171302795, + "227": 0.6106818318367004, + "228": 0.5817131996154785, + "229": 0.637666642665863, + "230": 0.6744968295097351, + "231": 0.647172749042511, + "232": 0.6165562272071838, + "233": 0.632185697555542, + "234": 0.6141465306282043, + "235": 0.5918753743171692, + "236": 0.5487201809883118, + "237": 0.5572559237480164, + "238": 0.5455392599105835, + "239": 0.5610880255699158, + "240": 0.6105849146842957, + "241": 0.5911791324615479, + "242": 0.5344687104225159, + "243": 0.515195369720459, + "244": 0.4881223142147064, + "245": 0.5291801691055298, + "246": 0.5750199556350708, + "247": 0.5984045267105103, + "248": 0.5724953413009644, + "249": 0.6173244714736938, + "250": 0.5688831210136414, + "251": 0.5364689230918884, + "252": 0.5486172437667847, + "253": 0.5572051405906677, + "254": 0.5692540407180786, + "255": 0.5646526217460632, + "256": 0.5788152813911438, + "257": 0.5705143809318542, + "258": 0.5741739273071289, + "259": 0.5943071842193604, + "260": 0.5959339141845703, + "261": 0.5554969906806946, + "262": 0.5691060423851013, + "263": 0.6079320311546326, + "264": 0.5677531957626343, + "265": 0.5119683742523193, + "266": 0.4963459074497223, + "267": 0.5055669546127319, + "268": 0.5560286045074463, + "269": 0.5832625031471252 + }, + "loss": { + "216": 3.1965291500091553, + "217": 3.2185592651367188, + "218": 3.2174973487854004, + "219": 3.1988306045532227, + "220": 3.1923441886901855, + "221": 3.1768062114715576, + "222": 3.155893564224243, + "223": 3.197890281677246, + "224": 3.1688971519470215, + "225": 3.241753101348877, + "226": 3.208406686782837, + "227": 3.191225528717041, + "228": 3.1688451766967773, + "229": 3.165341854095459, + "230": 3.1427555084228516, + "231": 3.205348014831543, + "232": 3.146683692932129, + "233": 3.168276786804199, + "234": 3.171910285949707, + "235": 3.1608381271362305, + "236": 3.126901626586914, + "237": 3.1705613136291504, + "238": 3.151902198791504, + "239": 3.150212287902832, + "240": 3.126461982727051, + "241": 3.149775981903076, + "242": 3.1044583320617676, + "243": 3.1385936737060547, + "244": 3.091449737548828, + "245": 3.118412971496582, + "246": 3.119467258453369, + "247": 3.147890567779541, + "248": 3.1146764755249023, + "249": 3.1356582641601562, + "250": 3.1246023178100586, + "251": 3.119462251663208, + "252": 3.0996222496032715, + "253": 3.123623847961426, + "254": 3.1057260036468506, + "255": 3.117973804473877, + "256": 3.088528633117676, + "257": 3.1137094497680664, + "258": 3.0836539268493652, + "259": 3.108476161956787, + "260": 3.094310760498047, + "261": 3.105084180831909, + "262": 3.098038673400879, + "263": 3.105175495147705, + "264": 3.076687812805176, + "265": 3.083289623260498, + "266": 3.077672004699707, + "267": 3.0758309364318848, + "268": 3.0616202354431152, + "269": 3.098269462585449 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "train_epoch_time": 4.787846326828003, + "train_loss": 3.0851088610064865, + "train_score": 0.17949807206903673, + "val_loss": 3.1028489352093773, + "val_score": 0.1790910234886524 + }, + { + "epoch": 5, + "grad_norm": 0.721752405166626, + "learning_rate": 0.464, + "model_norm": 95.19732666015625, + "step_logs": { + "grad_norm": { + "270": 0.7902553081512451, + "271": 0.6040201187133789, + "272": 0.44922083616256714, + "273": 0.44981932640075684, + "274": 0.44080644845962524, + "275": 0.45439624786376953, + "276": 0.5262670516967773, + "277": 0.8339715600013733, + "278": 0.6985093355178833, + "279": 0.5644071102142334, + "280": 0.5074340105056763, + "281": 0.49407148361206055, + "282": 0.5077321529388428, + "283": 0.5668754577636719, + "284": 0.5768797993659973, + "285": 0.5454105138778687, + "286": 0.5155891180038452, + "287": 0.5330159664154053, + "288": 0.5265529751777649, + "289": 0.6538668274879456, + "290": 0.6643021106719971, + "291": 0.8174726963043213, + "292": 0.6895253658294678, + "293": 0.6218353509902954, + "294": 0.5209974050521851, + "295": 0.5527642965316772, + "296": 0.5057004690170288, + "297": 0.5349284410476685, + "298": 0.4257163405418396, + "299": 0.37334802746772766, + "300": 0.41469070315361023, + "301": 0.5852785110473633, + "302": 0.587361752986908, + "303": 0.7149254083633423, + "304": 0.6898226737976074, + "305": 0.7398832440376282, + "306": 0.670011043548584, + "307": 0.6290679574012756, + "308": 0.5894433856010437, + "309": 0.5336933135986328, + "310": 0.5109879970550537, + "311": 0.5104011297225952, + "312": 0.5443515181541443, + "313": 0.5967521667480469, + "314": 0.680535614490509, + "315": 0.7892389297485352, + "316": 1.1440263986587524, + "317": 0.7391549944877625, + "318": 0.5006719827651978, + "319": 0.47819140553474426, + "320": 0.4830920100212097, + "321": 0.573211669921875, + "322": 0.6276507377624512, + "323": 0.721752405166626 + }, + "loss": { + "270": 3.0802083015441895, + "271": 3.1248939037323, + "272": 3.086836814880371, + "273": 3.0596683025360107, + "274": 3.0586657524108887, + "275": 3.0552639961242676, + "276": 3.0669636726379395, + "277": 3.0884647369384766, + "278": 3.140105724334717, + "279": 3.0839905738830566, + "280": 3.0772690773010254, + "281": 3.0480806827545166, + "282": 3.04884672164917, + "283": 3.0641770362854004, + "284": 3.060135841369629, + "285": 3.0600745677948, + "286": 3.0486268997192383, + "287": 3.0439209938049316, + "288": 3.0395689010620117, + "289": 3.05881404876709, + "290": 3.057131767272949, + "291": 3.0702338218688965, + "292": 3.0868217945098877, + "293": 3.055919885635376, + "294": 3.036402940750122, + "295": 3.042041778564453, + "296": 3.042891025543213, + "297": 3.0303261280059814, + "298": 3.0434207916259766, + "299": 2.99887752532959, + "300": 3.007185935974121, + "301": 3.0253050327301025, + "302": 3.0507731437683105, + "303": 3.0386862754821777, + "304": 3.0646677017211914, + "305": 3.0318522453308105, + "306": 3.030485153198242, + "307": 3.0205886363983154, + "308": 3.0392098426818848, + "309": 3.008427143096924, + "310": 3.015249729156494, + "311": 3.0068020820617676, + "312": 3.008622169494629, + "313": 2.9995484352111816, + "314": 3.018354654312134, + "315": 3.0260226726531982, + "316": 3.0838632583618164, + "317": 3.096536636352539, + "318": 3.0217180252075195, + "319": 2.9939684867858887, + "320": 2.99055552482605, + "321": 2.9938814640045166, + "322": 3.0175423622131348, + "323": 3.006381034851074 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "train_epoch_time": 4.788123607635498, + "train_loss": 3.0367380023857784, + "train_score": 0.1396251793229255, + "val_loss": 3.049307380833938, + "val_score": 0.13896562884445168 + }, + { + "epoch": 6, + "grad_norm": 0.4812910854816437, + "learning_rate": 0.464, + "model_norm": 95.22705841064453, + "step_logs": { + "grad_norm": { + "324": 0.7046613097190857, + "325": 0.684482216835022, + "326": 0.6434972882270813, + "327": 0.6167725324630737, + "328": 0.5184486508369446, + "329": 0.42307910323143005, + "330": 0.4216797649860382, + "331": 0.43924927711486816, + "332": 0.44578421115875244, + "333": 0.4958129823207855, + "334": 0.5587369203567505, + "335": 0.6142587661743164, + "336": 0.6828575134277344, + "337": 0.7260221242904663, + "338": 0.7251009941101074, + "339": 0.6530268788337708, + "340": 0.5546654462814331, + "341": 0.45238181948661804, + "342": 0.5362229943275452, + "343": 0.6245988607406616, + "344": 0.648998498916626, + "345": 0.6236292123794556, + "346": 0.5856001973152161, + "347": 0.5445979237556458, + "348": 0.5174869894981384, + "349": 0.531870424747467, + "350": 0.6193946003913879, + "351": 0.7220349311828613, + "352": 0.8519195318222046, + "353": 0.8194084763526917, + "354": 0.6350876688957214, + "355": 0.5142744183540344, + "356": 0.4926293194293976, + "357": 0.540449321269989, + "358": 0.6703569889068604, + "359": 0.7354685068130493, + "360": 0.731411337852478, + "361": 0.5776702761650085, + "362": 0.46868500113487244, + "363": 0.390006422996521, + "364": 0.4021241366863251, + "365": 0.466878741979599, + "366": 0.5145150423049927, + "367": 0.5767860412597656, + "368": 0.6581503748893738, + "369": 0.662085771560669, + "370": 0.6042476892471313, + "371": 0.5649664402008057, + "372": 0.49768534302711487, + "373": 0.4435543417930603, + "374": 0.45051810145378113, + "375": 0.458556592464447, + "376": 0.45275017619132996, + "377": 0.4812910854816437 + }, + "loss": { + "324": 3.0322296619415283, + "325": 3.01348614692688, + "326": 3.016357898712158, + "327": 2.993896007537842, + "328": 2.990234613418579, + "329": 2.967379331588745, + "330": 2.9529285430908203, + "331": 2.9590184688568115, + "332": 2.9784467220306396, + "333": 2.9573917388916016, + "334": 2.984755039215088, + "335": 2.977762222290039, + "336": 3.0143656730651855, + "337": 2.9890060424804688, + "338": 3.015803575515747, + "339": 2.980998992919922, + "340": 2.985811710357666, + "341": 2.9627182483673096, + "342": 2.970905303955078, + "343": 2.9666411876678467, + "344": 2.97985577583313, + "345": 2.9735774993896484, + "346": 2.978839874267578, + "347": 2.983078956604004, + "348": 2.974592685699463, + "349": 2.950813055038452, + "350": 2.9643874168395996, + "351": 2.987494707107544, + "352": 3.0066113471984863, + "353": 3.026047706604004, + "354": 2.9934444427490234, + "355": 2.9399914741516113, + "356": 2.9650113582611084, + "357": 2.9558963775634766, + "358": 2.982348918914795, + "359": 2.9659438133239746, + "360": 2.9718358516693115, + "361": 2.9699904918670654, + "362": 2.967891216278076, + "363": 2.933591842651367, + "364": 2.921778440475464, + "365": 2.939035415649414, + "366": 2.969357490539551, + "367": 2.946373224258423, + "368": 2.9747369289398193, + "369": 2.956366777420044, + "370": 2.9639105796813965, + "371": 2.9503540992736816, + "372": 2.938356876373291, + "373": 2.927687644958496, + "374": 2.947812080383301, + "375": 2.9191367626190186, + "376": 2.932507276535034, + "377": 2.9295830726623535 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "train_epoch_time": 4.788327217102051, + "train_loss": 2.937072514461479, + "train_score": 0.17194561509781622, + "val_loss": 2.9482095980616854, + "val_score": 0.16894643400484605 + }, + { + "epoch": 7, + "grad_norm": 0.5930202603340149, + "learning_rate": 0.464, + "model_norm": 95.25932312011719, + "step_logs": { + "grad_norm": { + "378": 0.5123597383499146, + "379": 0.5268654823303223, + "380": 0.5088984370231628, + "381": 0.4990869462490082, + "382": 0.5021896362304688, + "383": 0.47970518469810486, + "384": 0.46689966320991516, + "385": 0.49518582224845886, + "386": 0.5549895167350769, + "387": 0.5953436493873596, + "388": 0.5516526103019714, + "389": 0.48941537737846375, + "390": 0.41924116015434265, + "391": 0.4126428961753845, + "392": 0.42909085750579834, + "393": 0.4847986698150635, + "394": 0.5366307497024536, + "395": 0.5880151987075806, + "396": 0.5039798021316528, + "397": 0.4468422532081604, + "398": 0.4354686737060547, + "399": 0.4592638313770294, + "400": 0.4618544280529022, + "401": 0.5446250438690186, + "402": 0.7834435105323792, + "403": 0.87884920835495, + "404": 0.8685675859451294, + "405": 0.7562757134437561, + "406": 0.6640731692314148, + "407": 0.5349342823028564, + "408": 0.47209784388542175, + "409": 0.426312118768692, + "410": 0.4473778307437897, + "411": 0.47478505969047546, + "412": 0.524868369102478, + "413": 0.568409264087677, + "414": 0.6179620027542114, + "415": 0.6348097324371338, + "416": 0.6079012155532837, + "417": 0.6017040014266968, + "418": 0.6105506420135498, + "419": 0.6828820705413818, + "420": 0.8697967529296875, + "421": 0.8649794459342957, + "422": 0.7182846069335938, + "423": 0.5542111396789551, + "424": 0.4734981954097748, + "425": 0.43867775797843933, + "426": 0.4955150783061981, + "427": 0.5763095021247864, + "428": 0.6123806834220886, + "429": 0.7042171955108643, + "430": 0.6293308138847351, + "431": 0.5930202603340149 + }, + "loss": { + "378": 2.9278626441955566, + "379": 2.9308393001556396, + "380": 2.9338629245758057, + "381": 2.9160118103027344, + "382": 2.9421284198760986, + "383": 2.9190926551818848, + "384": 2.91929292678833, + "385": 2.919095277786255, + "386": 2.924923896789551, + "387": 2.940929651260376, + "388": 2.938507556915283, + "389": 2.9132022857666016, + "390": 2.914498805999756, + "391": 2.8911900520324707, + "392": 2.9084277153015137, + "393": 2.9068715572357178, + "394": 2.9306201934814453, + "395": 2.9106624126434326, + "396": 2.9211835861206055, + "397": 2.901524543762207, + "398": 2.9183766841888428, + "399": 2.8904476165771484, + "400": 2.9231927394866943, + "401": 2.9081082344055176, + "402": 2.9449961185455322, + "403": 2.9645495414733887, + "404": 2.989840030670166, + "405": 2.933091640472412, + "406": 2.9595439434051514, + "407": 2.9268269538879395, + "408": 2.9075257778167725, + "409": 2.900203227996826, + "410": 2.9099552631378174, + "411": 2.909003257751465, + "412": 2.9015204906463623, + "413": 2.9033491611480713, + "414": 2.9276843070983887, + "415": 2.920766830444336, + "416": 2.9083471298217773, + "417": 2.8954715728759766, + "418": 2.912308692932129, + "419": 2.907003164291382, + "420": 2.936640977859497, + "421": 2.936184883117676, + "422": 2.9318740367889404, + "423": 2.9092180728912354, + "424": 2.8859424591064453, + "425": 2.865473985671997, + "426": 2.9003617763519287, + "427": 2.8803653717041016, + "428": 2.8993115425109863, + "429": 2.8887863159179688, + "430": 2.91388201713562, + "431": 2.8706154823303223 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "train_epoch_time": 4.787798166275024, + "train_loss": 2.8939731666313184, + "train_score": 0.17380626796820928, + "val_loss": 2.901402291693178, + "val_score": 0.17138615839106344 + }, + { + "epoch": 8, + "grad_norm": 0.8885157108306885, + "learning_rate": 0.464, + "model_norm": 95.30177307128906, + "step_logs": { + "grad_norm": { + "432": 0.6617476940155029, + "433": 0.6248102188110352, + "434": 0.5668144822120667, + "435": 0.5151016116142273, + "436": 0.58073890209198, + "437": 0.7222148776054382, + "438": 1.1242494583129883, + "439": 1.1053086519241333, + "440": 0.90779048204422, + "441": 0.6062111258506775, + "442": 0.4770806133747101, + "443": 0.4220946729183197, + "444": 0.4529079794883728, + "445": 0.5120531916618347, + "446": 0.6154143810272217, + "447": 0.7112812399864197, + "448": 0.7018038630485535, + "449": 0.6837787628173828, + "450": 0.6769550442695618, + "451": 0.6928111910820007, + "452": 0.9153744578361511, + "453": 0.9256760478019714, + "454": 0.767328679561615, + "455": 0.6256396174430847, + "456": 0.6242458820343018, + "457": 0.6252599954605103, + "458": 0.6766478419303894, + "459": 0.6967686414718628, + "460": 0.7694593071937561, + "461": 0.8051020503044128, + "462": 0.7642512321472168, + "463": 0.7067453861236572, + "464": 0.6544169187545776, + "465": 0.7174378633499146, + "466": 0.7884799838066101, + "467": 0.812394380569458, + "468": 0.7971006631851196, + "469": 0.6915827393531799, + "470": 0.6738126277923584, + "471": 0.7205873131752014, + "472": 0.8300636410713196, + "473": 0.7758504748344421, + "474": 0.6803692579269409, + "475": 0.6284384727478027, + "476": 0.6289702653884888, + "477": 0.7247979044914246, + "478": 0.8562721610069275, + "479": 0.9490899443626404, + "480": 0.8300268650054932, + "481": 0.6356168389320374, + "482": 0.6143249273300171, + "483": 0.7047991156578064, + "484": 0.832797646522522, + "485": 0.8885157108306885 + }, + "loss": { + "432": 2.8969850540161133, + "433": 2.8764634132385254, + "434": 2.8807573318481445, + "435": 2.850984573364258, + "436": 2.868626356124878, + "437": 2.8706536293029785, + "438": 2.9357008934020996, + "439": 2.9886069297790527, + "440": 2.9636659622192383, + "441": 2.8769421577453613, + "442": 2.8727664947509766, + "443": 2.8487119674682617, + "444": 2.838052272796631, + "445": 2.826735019683838, + "446": 2.8392481803894043, + "447": 2.843799114227295, + "448": 2.8656325340270996, + "449": 2.848928451538086, + "450": 2.8602981567382812, + "451": 2.8328380584716797, + "452": 2.8663294315338135, + "453": 2.8875441551208496, + "454": 2.8956637382507324, + "455": 2.823615550994873, + "456": 2.827363967895508, + "457": 2.8147706985473633, + "458": 2.8390417098999023, + "459": 2.8198049068450928, + "460": 2.8387603759765625, + "461": 2.839061737060547, + "462": 2.8402538299560547, + "463": 2.821221113204956, + "464": 2.810823917388916, + "465": 2.8279409408569336, + "466": 2.839804172515869, + "467": 2.835284948348999, + "468": 2.8370203971862793, + "469": 2.7924611568450928, + "470": 2.8063201904296875, + "471": 2.7872331142425537, + "472": 2.836988687515259, + "473": 2.823427677154541, + "474": 2.812631607055664, + "475": 2.7815423011779785, + "476": 2.7804808616638184, + "477": 2.7773475646972656, + "478": 2.8141560554504395, + "479": 2.8172104358673096, + "480": 2.8529977798461914, + "481": 2.780829429626465, + "482": 2.7638890743255615, + "483": 2.74462890625, + "484": 2.804307460784912, + "485": 2.8235862255096436 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "train_epoch_time": 4.787794589996338, + "train_loss": 2.8234112734774093, + "train_score": 0.17195234042158086, + "val_loss": 2.831862328931193, + "val_score": 0.16844413748564868 + }, + { + "epoch": 9, + "grad_norm": 0.7335805892944336, + "learning_rate": 0.464, + "model_norm": 95.34683990478516, + "step_logs": { + "grad_norm": { + "486": 0.7950608134269714, + "487": 0.619175136089325, + "488": 0.69770348072052, + "489": 0.8962931632995605, + "490": 0.8749693632125854, + "491": 0.6809191703796387, + "492": 0.5763744711875916, + "493": 0.6200893521308899, + "494": 0.7120007276535034, + "495": 0.8273748159408569, + "496": 0.8230582475662231, + "497": 0.7934614419937134, + "498": 0.7708424925804138, + "499": 0.78073650598526, + "500": 0.8738507628440857, + "501": 0.9576129913330078, + "502": 1.0112968683242798, + "503": 0.8379231691360474, + "504": 0.7411358952522278, + "505": 0.6115418076515198, + "506": 0.5339595079421997, + "507": 0.6205567717552185, + "508": 0.6820998787879944, + "509": 0.7188384532928467, + "510": 0.7029887437820435, + "511": 0.6189846992492676, + "512": 0.6333284378051758, + "513": 0.6914864778518677, + "514": 0.681087076663971, + "515": 0.5931110382080078, + "516": 0.6183187961578369, + "517": 0.7948628067970276, + "518": 0.7336117625236511, + "519": 0.5850400924682617, + "520": 0.5836624503135681, + "521": 0.6116194725036621, + "522": 0.667742908000946, + "523": 0.7010170221328735, + "524": 0.7001011371612549, + "525": 0.6831109523773193, + "526": 0.6719421148300171, + "527": 0.711574375629425, + "528": 0.8238739371299744, + "529": 0.8185665011405945, + "530": 0.7372053861618042, + "531": 0.6520942449569702, + "532": 0.6019648909568787, + "533": 0.6640612483024597, + "534": 0.638088047504425, + "535": 0.5757718682289124, + "536": 0.6625477075576782, + "537": 0.682201623916626, + "538": 0.6337475180625916, + "539": 0.7335805892944336 + }, + "loss": { + "486": 2.8294472694396973, + "487": 2.7400765419006348, + "488": 2.7507190704345703, + "489": 2.793379306793213, + "490": 2.818857192993164, + "491": 2.7769064903259277, + "492": 2.7376935482025146, + "493": 2.740218162536621, + "494": 2.7673373222351074, + "495": 2.7299458980560303, + "496": 2.7706587314605713, + "497": 2.751803159713745, + "498": 2.772120952606201, + "499": 2.7298178672790527, + "500": 2.76345157623291, + "501": 2.7723190784454346, + "502": 2.8080880641937256, + "503": 2.77487850189209, + "504": 2.76108455657959, + "505": 2.7070536613464355, + "506": 2.6962361335754395, + "507": 2.7109580039978027, + "508": 2.715606689453125, + "509": 2.7147459983825684, + "510": 2.7383029460906982, + "511": 2.6887598037719727, + "512": 2.701934814453125, + "513": 2.7053263187408447, + "514": 2.743013858795166, + "515": 2.681004762649536, + "516": 2.691380739212036, + "517": 2.681699275970459, + "518": 2.7380852699279785, + "519": 2.6838488578796387, + "520": 2.686288833618164, + "521": 2.6749088764190674, + "522": 2.701096534729004, + "523": 2.697679042816162, + "524": 2.707214593887329, + "525": 2.687723159790039, + "526": 2.70268177986145, + "527": 2.6728389263153076, + "528": 2.696200370788574, + "529": 2.688584327697754, + "530": 2.7217512130737305, + "531": 2.6711459159851074, + "532": 2.681565523147583, + "533": 2.6599719524383545, + "534": 2.6768884658813477, + "535": 2.6378355026245117, + "536": 2.655148983001709, + "537": 2.64034366607666, + "538": 2.669140100479126, + "539": 2.6393368244171143 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "train_epoch_time": 4.787980079650879, + "train_loss": 2.685435557673275, + "train_score": 0.23642059713815855, + "val_loss": 2.699703412816938, + "val_score": 0.23265284185226148 + }, + { + "epoch": 10, + "grad_norm": 0.4752683639526367, + "learning_rate": 0.464, + "model_norm": 95.39339447021484, + "step_logs": { + "grad_norm": { + "540": 0.6320156455039978, + "541": 0.4787788987159729, + "542": 0.4651777744293213, + "543": 0.46387532353401184, + "544": 0.531488299369812, + "545": 0.6712128520011902, + "546": 0.7085956335067749, + "547": 0.7731642723083496, + "548": 0.7668701410293579, + "549": 0.8050109148025513, + "550": 0.9345594048500061, + "551": 0.8671684861183167, + "552": 0.6863387227058411, + "553": 0.5661612749099731, + "554": 0.517009973526001, + "555": 0.45309242606163025, + "556": 0.4611679017543793, + "557": 0.4993745982646942, + "558": 0.559796929359436, + "559": 0.732742428779602, + "560": 0.6820537447929382, + "561": 0.6208508610725403, + "562": 0.607408344745636, + "563": 0.7566462159156799, + "564": 0.7901949286460876, + "565": 0.7086068987846375, + "566": 0.6591873168945312, + "567": 0.6047205924987793, + "568": 0.6425679922103882, + "569": 0.6278573274612427, + "570": 0.575109601020813, + "571": 0.5234408378601074, + "572": 0.4931306540966034, + "573": 0.5067612528800964, + "574": 0.5322205424308777, + "575": 0.5439607501029968, + "576": 0.5016165971755981, + "577": 0.5407265424728394, + "578": 0.7372181415557861, + "579": 0.7476486563682556, + "580": 0.748710572719574, + "581": 0.9689611792564392, + "582": 1.2562280893325806, + "583": 1.292590856552124, + "584": 0.973041296005249, + "585": 0.6824032068252563, + "586": 0.5244526267051697, + "587": 0.554132342338562, + "588": 0.5782356858253479, + "589": 0.6434018015861511, + "590": 0.6371096968650818, + "591": 0.590674102306366, + "592": 0.5333691835403442, + "593": 0.4752683639526367 + }, + "loss": { + "540": 2.686978340148926, + "541": 2.6249241828918457, + "542": 2.6504530906677246, + "543": 2.6456995010375977, + "544": 2.622481346130371, + "545": 2.6247398853302, + "546": 2.6595399379730225, + "547": 2.6740851402282715, + "548": 2.675379991531372, + "549": 2.6552562713623047, + "550": 2.681993007659912, + "551": 2.685206890106201, + "552": 2.6768953800201416, + "553": 2.612220287322998, + "554": 2.61747670173645, + "555": 2.6182384490966797, + "556": 2.6112167835235596, + "557": 2.5930275917053223, + "558": 2.5975537300109863, + "559": 2.6249661445617676, + "560": 2.624742031097412, + "561": 2.638888359069824, + "562": 2.6378297805786133, + "563": 2.624378204345703, + "564": 2.669511318206787, + "565": 2.6083102226257324, + "566": 2.6195950508117676, + "567": 2.582709789276123, + "568": 2.6142172813415527, + "569": 2.627603530883789, + "570": 2.5986456871032715, + "571": 2.594231128692627, + "572": 2.571992874145508, + "573": 2.590059280395508, + "574": 2.575410842895508, + "575": 2.5851104259490967, + "576": 2.5679733753204346, + "577": 2.574068546295166, + "578": 2.592306613922119, + "579": 2.6290454864501953, + "580": 2.5941970348358154, + "581": 2.625878095626831, + "582": 2.6908907890319824, + "583": 2.7271928787231445, + "584": 2.694558620452881, + "585": 2.6065611839294434, + "586": 2.56463885307312, + "587": 2.5697219371795654, + "588": 2.5664005279541016, + "589": 2.5572540760040283, + "590": 2.590574264526367, + "591": 2.5723307132720947, + "592": 2.554614543914795, + "593": 2.539903163909912 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "train_epoch_time": 4.789448976516724, + "train_loss": 2.5473097831310123, + "train_score": 0.29266835552034964, + "val_loss": 2.572094604150573, + "val_score": 0.28362245219583215 + }, + { + "epoch": 11, + "grad_norm": 0.7030220627784729, + "learning_rate": 0.464, + "model_norm": 95.4269790649414, + "step_logs": { + "grad_norm": { + "594": 0.4165057837963104, + "595": 0.38980862498283386, + "596": 0.4164181649684906, + "597": 0.4445800483226776, + "598": 0.5343098044395447, + "599": 0.5596135258674622, + "600": 0.6174715161323547, + "601": 0.5824212431907654, + "602": 0.5237164497375488, + "603": 0.454979807138443, + "604": 0.4843762218952179, + "605": 0.41267240047454834, + "606": 0.4199749827384949, + "607": 0.45983171463012695, + "608": 0.5696313977241516, + "609": 0.8060118556022644, + "610": 0.9275069832801819, + "611": 0.9513506889343262, + "612": 0.8771731853485107, + "613": 0.7656822204589844, + "614": 0.7516440749168396, + "615": 0.6336190700531006, + "616": 0.4952774941921234, + "617": 0.41883477568626404, + "618": 0.3993341326713562, + "619": 0.41083458065986633, + "620": 0.5018238425254822, + "621": 0.6153827905654907, + "622": 0.7203396558761597, + "623": 0.7669448256492615, + "624": 0.7399802207946777, + "625": 0.7138455510139465, + "626": 0.6816047430038452, + "627": 0.651770830154419, + "628": 0.6485459804534912, + "629": 0.6119530200958252, + "630": 0.5728703141212463, + "631": 0.6326759457588196, + "632": 0.6876031160354614, + "633": 0.7211130261421204, + "634": 0.7122551798820496, + "635": 0.632091760635376, + "636": 0.5928220748901367, + "637": 0.6026967763900757, + "638": 0.5505890846252441, + "639": 0.4651810824871063, + "640": 0.4886285066604614, + "641": 0.5196266174316406, + "642": 0.605863630771637, + "643": 0.7037306427955627, + "644": 0.9081894159317017, + "645": 0.9372041821479797, + "646": 0.8744444847106934, + "647": 0.7030220627784729 + }, + "loss": { + "594": 2.5411648750305176, + "595": 2.55806827545166, + "596": 2.5419533252716064, + "597": 2.5229320526123047, + "598": 2.5441854000091553, + "599": 2.5458807945251465, + "600": 2.538055658340454, + "601": 2.5621376037597656, + "602": 2.550751209259033, + "603": 2.5387682914733887, + "604": 2.513301134109497, + "605": 2.534552574157715, + "606": 2.5238735675811768, + "607": 2.5166447162628174, + "608": 2.516538143157959, + "609": 2.577965259552002, + "610": 2.6280593872070312, + "611": 2.589290142059326, + "612": 2.632606029510498, + "613": 2.5650429725646973, + "614": 2.595564126968384, + "615": 2.563746452331543, + "616": 2.530768871307373, + "617": 2.538038730621338, + "618": 2.542367458343506, + "619": 2.5250368118286133, + "620": 2.516429901123047, + "621": 2.55241060256958, + "622": 2.541691541671753, + "623": 2.5550291538238525, + "624": 2.5688633918762207, + "625": 2.5351779460906982, + "626": 2.5427422523498535, + "627": 2.5638809204101562, + "628": 2.5520267486572266, + "629": 2.50813889503479, + "630": 2.5204803943634033, + "631": 2.534245491027832, + "632": 2.540843963623047, + "633": 2.5320701599121094, + "634": 2.539372444152832, + "635": 2.5022246837615967, + "636": 2.5396337509155273, + "637": 2.535313606262207, + "638": 2.5398499965667725, + "639": 2.4777631759643555, + "640": 2.4896185398101807, + "641": 2.511262893676758, + "642": 2.501821756362915, + "643": 2.525163173675537, + "644": 2.5627331733703613, + "645": 2.60943603515625, + "646": 2.563251256942749, + "647": 2.5517003536224365 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "train_epoch_time": 4.788496732711792, + "train_loss": 2.5154153128094445, + "train_score": 0.29197341278088484, + "val_loss": 2.5424890282780925, + "val_score": 0.28494098081106983 + }, + { + "epoch": 12, + "grad_norm": 0.3136054575443268, + "learning_rate": 0.464, + "model_norm": 95.45179748535156, + "step_logs": { + "grad_norm": { + "648": 0.5140668153762817, + "649": 0.5310370922088623, + "650": 0.5972155332565308, + "651": 0.6897873282432556, + "652": 0.6560055613517761, + "653": 0.5812339782714844, + "654": 0.571418285369873, + "655": 0.5198261737823486, + "656": 0.4848625361919403, + "657": 0.43155333399772644, + "658": 0.39806169271469116, + "659": 0.3970857262611389, + "660": 0.41489464044570923, + "661": 0.4632144868373871, + "662": 0.5339930057525635, + "663": 0.5290558338165283, + "664": 0.41746291518211365, + "665": 0.31858471035957336, + "666": 0.2990455627441406, + "667": 0.2865983247756958, + "668": 0.2888813614845276, + "669": 0.295500785112381, + "670": 0.3475888669490814, + "671": 0.37309378385543823, + "672": 0.3875153660774231, + "673": 0.42623019218444824, + "674": 0.46908703446388245, + "675": 0.48923808336257935, + "676": 0.48527809977531433, + "677": 0.44631049036979675, + "678": 0.42582863569259644, + "679": 0.49393194913864136, + "680": 0.5919609665870667, + "681": 0.6775600910186768, + "682": 0.6533501744270325, + "683": 0.6141091585159302, + "684": 0.5120543241500854, + "685": 0.4202282428741455, + "686": 0.38074174523353577, + "687": 0.34943413734436035, + "688": 0.3027404844760895, + "689": 0.24933558702468872, + "690": 0.2531324028968811, + "691": 0.23845671117305756, + "692": 0.24858500063419342, + "693": 0.2719728648662567, + "694": 0.2981884777545929, + "695": 0.32925498485565186, + "696": 0.3263830840587616, + "697": 0.32748541235923767, + "698": 0.3687443733215332, + "699": 0.38323426246643066, + "700": 0.3244877755641937, + "701": 0.3136054575443268 + }, + "loss": { + "648": 2.5151448249816895, + "649": 2.506683826446533, + "650": 2.4910826683044434, + "651": 2.5092737674713135, + "652": 2.5533409118652344, + "653": 2.503725051879883, + "654": 2.5076305866241455, + "655": 2.498969078063965, + "656": 2.4992101192474365, + "657": 2.480461835861206, + "658": 2.487180709838867, + "659": 2.4801599979400635, + "660": 2.4802134037017822, + "661": 2.46563720703125, + "662": 2.494999885559082, + "663": 2.5054945945739746, + "664": 2.4839906692504883, + "665": 2.4889817237854004, + "666": 2.4599452018737793, + "667": 2.4579591751098633, + "668": 2.468801736831665, + "669": 2.446441888809204, + "670": 2.460641384124756, + "671": 2.462390899658203, + "672": 2.4728314876556396, + "673": 2.4499056339263916, + "674": 2.473860025405884, + "675": 2.467082977294922, + "676": 2.460484027862549, + "677": 2.4631881713867188, + "678": 2.4538638591766357, + "679": 2.4610934257507324, + "680": 2.4810004234313965, + "681": 2.484571933746338, + "682": 2.4824466705322266, + "683": 2.479504346847534, + "684": 2.471201181411743, + "685": 2.4623706340789795, + "686": 2.4673476219177246, + "687": 2.4720041751861572, + "688": 2.444331407546997, + "689": 2.4316089153289795, + "690": 2.4574413299560547, + "691": 2.4243526458740234, + "692": 2.423112630844116, + "693": 2.4112918376922607, + "694": 2.434623956680298, + "695": 2.462160587310791, + "696": 2.438055992126465, + "697": 2.455031633377075, + "698": 2.4295156002044678, + "699": 2.4481005668640137, + "700": 2.449397325515747, + "701": 2.429503917694092 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "train_epoch_time": 4.7901530265808105, + "train_loss": 2.438967116217702, + "train_score": 0.3005682837415802, + "val_loss": 2.465615165767385, + "val_score": 0.29338135097248547 + }, + { + "epoch": 13, + "grad_norm": 0.16016384959220886, + "learning_rate": 0.3093333333333334, + "model_norm": 95.464599609375, + "step_logs": { + "grad_norm": { + "702": 0.3453993797302246, + "703": 0.3100011646747589, + "704": 0.26915502548217773, + "705": 0.24128884077072144, + "706": 0.22970464825630188, + "707": 0.2259349524974823, + "708": 0.2878847122192383, + "709": 0.2669242024421692, + "710": 0.2583456039428711, + "711": 0.27287745475769043, + "712": 0.2391832172870636, + "713": 0.27245578169822693, + "714": 0.3058371841907501, + "715": 0.3215775191783905, + "716": 0.3002978563308716, + "717": 0.2868741452693939, + "718": 0.27294203639030457, + "719": 0.28981003165245056, + "720": 0.30295103788375854, + "721": 0.2705759108066559, + "722": 0.2438855767250061, + "723": 0.265132874250412, + "724": 0.3027491867542267, + "725": 0.3553493916988373, + "726": 0.384289413690567, + "727": 0.3158110976219177, + "728": 0.26802176237106323, + "729": 0.19784598052501678, + "730": 0.17569728195667267, + "731": 0.22731731832027435, + "732": 0.23690709471702576, + "733": 0.22688141465187073, + "734": 0.21649880707263947, + "735": 0.2058725506067276, + "736": 0.2107367068529129, + "737": 0.2478368878364563, + "738": 0.20716924965381622, + "739": 0.18654270470142365, + "740": 0.20285052061080933, + "741": 0.21022991836071014, + "742": 0.17788375914096832, + "743": 0.1758386343717575, + "744": 0.19142788648605347, + "745": 0.15469466149806976, + "746": 0.1731296330690384, + "747": 0.16940422356128693, + "748": 0.16589288413524628, + "749": 0.17014259099960327, + "750": 0.19174227118492126, + "751": 0.20674866437911987, + "752": 0.19658075273036957, + "753": 0.16921284794807434, + "754": 0.1723858118057251, + "755": 0.16016384959220886 + }, + "loss": { + "702": 2.4194765090942383, + "703": 2.4304871559143066, + "704": 2.438307046890259, + "705": 2.423537254333496, + "706": 2.4442191123962402, + "707": 2.4261655807495117, + "708": 2.443248987197876, + "709": 2.4373044967651367, + "710": 2.4286811351776123, + "711": 2.4274113178253174, + "712": 2.4244940280914307, + "713": 2.4421350955963135, + "714": 2.4272165298461914, + "715": 2.4516987800598145, + "716": 2.426527976989746, + "717": 2.457326650619507, + "718": 2.445324420928955, + "719": 2.424609899520874, + "720": 2.425920248031616, + "721": 2.4272212982177734, + "722": 2.445246458053589, + "723": 2.427469491958618, + "724": 2.409996747970581, + "725": 2.4332985877990723, + "726": 2.432913064956665, + "727": 2.4027180671691895, + "728": 2.4143729209899902, + "729": 2.432358741760254, + "730": 2.429215669631958, + "731": 2.412034034729004, + "732": 2.412898540496826, + "733": 2.4283459186553955, + "734": 2.405430316925049, + "735": 2.4155373573303223, + "736": 2.4246621131896973, + "737": 2.409104108810425, + "738": 2.4167308807373047, + "739": 2.4168617725372314, + "740": 2.4213085174560547, + "741": 2.4189791679382324, + "742": 2.441124439239502, + "743": 2.418696403503418, + "744": 2.4251351356506348, + "745": 2.4194159507751465, + "746": 2.405216693878174, + "747": 2.4216737747192383, + "748": 2.4059338569641113, + "749": 2.3975815773010254, + "750": 2.411850690841675, + "751": 2.40053129196167, + "752": 2.4194886684417725, + "753": 2.413079261779785, + "754": 2.409118890762329, + "755": 2.4035377502441406 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "train_epoch_time": 4.7884681224823, + "train_loss": 2.41268866606046, + "train_score": 0.3084951128644957, + "val_loss": 2.4423450591506697, + "val_score": 0.30263795210080635 + }, + { + "epoch": 14, + "grad_norm": 0.16784948110580444, + "learning_rate": 0.1546666666666667, + "model_norm": 95.46870422363281, + "step_logs": { + "grad_norm": { + "756": 0.19645656645298004, + "757": 0.18018025159835815, + "758": 0.17488263547420502, + "759": 0.2123248130083084, + "760": 0.20626197755336761, + "761": 0.16895000636577606, + "762": 0.18747922778129578, + "763": 0.1822664439678192, + "764": 0.21675723791122437, + "765": 0.2158898562192917, + "766": 0.18203391134738922, + "767": 0.17938371002674103, + "768": 0.18146370351314545, + "769": 0.18185514211654663, + "770": 0.16450749337673187, + "771": 0.16111457347869873, + "772": 0.1627318561077118, + "773": 0.2093825489282608, + "774": 0.17623504996299744, + "775": 0.16924811899662018, + "776": 0.16619442403316498, + "777": 0.15962553024291992, + "778": 0.17313867807388306, + "779": 0.1702631264925003, + "780": 0.1643819659948349, + "781": 0.17640475928783417, + "782": 0.16850781440734863, + "783": 0.20662371814250946, + "784": 0.17208142578601837, + "785": 0.16308902204036713, + "786": 0.1885741949081421, + "787": 0.18951012194156647, + "788": 0.19879771769046783, + "789": 0.1690634936094284, + "790": 0.18761664628982544, + "791": 0.14956939220428467, + "792": 0.16341783106327057, + "793": 0.15777888894081116, + "794": 0.16458484530448914, + "795": 0.16985629498958588, + "796": 0.1632569581270218, + "797": 0.15049952268600464, + "798": 0.15547393262386322, + "799": 0.17240214347839355, + "800": 0.14596056938171387, + "801": 0.1486940234899521, + "802": 0.19244736433029175, + "803": 0.16164113581180573, + "804": 0.16381162405014038, + "805": 0.2064862698316574, + "806": 0.1532782018184662, + "807": 0.17337344586849213, + "808": 0.1766686886548996, + "809": 0.16784948110580444 + }, + "loss": { + "756": 2.4169013500213623, + "757": 2.422706365585327, + "758": 2.4225950241088867, + "759": 2.4113571643829346, + "760": 2.3900463581085205, + "761": 2.4160799980163574, + "762": 2.4278297424316406, + "763": 2.405268669128418, + "764": 2.39888858795166, + "765": 2.4069385528564453, + "766": 2.420778512954712, + "767": 2.415346145629883, + "768": 2.4281342029571533, + "769": 2.410311222076416, + "770": 2.4197232723236084, + "771": 2.402217388153076, + "772": 2.4118905067443848, + "773": 2.418978691101074, + "774": 2.433053970336914, + "775": 2.398118019104004, + "776": 2.3858675956726074, + "777": 2.3922033309936523, + "778": 2.4053726196289062, + "779": 2.4241819381713867, + "780": 2.4094724655151367, + "781": 2.401900291442871, + "782": 2.40262508392334, + "783": 2.429072856903076, + "784": 2.3976826667785645, + "785": 2.41078519821167, + "786": 2.4068260192871094, + "787": 2.4138550758361816, + "788": 2.4098353385925293, + "789": 2.4246461391448975, + "790": 2.3871753215789795, + "791": 2.40929913520813, + "792": 2.408252239227295, + "793": 2.3965396881103516, + "794": 2.4261937141418457, + "795": 2.399303436279297, + "796": 2.3932385444641113, + "797": 2.4012861251831055, + "798": 2.4015824794769287, + "799": 2.410233497619629, + "800": 2.3908591270446777, + "801": 2.392645835876465, + "802": 2.398653745651245, + "803": 2.3908660411834717, + "804": 2.402310848236084, + "805": 2.427328109741211, + "806": 2.410106658935547, + "807": 2.3900437355041504, + "808": 2.421699285507202, + "809": 2.4033288955688477 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "train_epoch_time": 4.789684534072876, + "train_loss": 2.4054617320105884, + "train_score": 0.31077048951276237, + "val_loss": 2.4350329337793433, + "val_score": 0.30439150391572106 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:33:13.281734", + "final_model_norm": 95.46870422363281, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:31:32.423702", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.3940119743347168, + "learning_rate": 4.64e-11, + "model_norm": 87.59406280517578, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.522193908691406, + "3": 8.844858169555664, + "4": 14.65994644165039, + "5": 4.685466289520264, + "6": 3.948777437210083, + "7": 3.7569050788879395, + "8": 6.890072345733643, + "9": 4.172502040863037, + "10": 4.804870128631592, + "11": 8.966069221496582, + "12": 6.553073883056641, + "13": 11.516484260559082, + "14": 33.73067855834961, + "15": 11.882366180419922, + "16": 4.092479705810547, + "17": 9.66065502166748, + "18": 52.35045623779297, + "19": 19.61064338684082, + "20": 36.703125, + "21": 16.85268211364746, + "22": 8.551225662231445, + "23": 6.886287689208984, + "24": 21.28398323059082, + "25": 9.168475151062012, + "26": 5.5196638107299805, + "27": 4.271469593048096, + "28": 6.910177230834961, + "29": 3.036736488342285, + "30": 12.238639831542969, + "31": 9.314115524291992, + "32": 4.548000335693359, + "33": 3.6965489387512207, + "34": 3.354375123977661, + "35": 3.745252847671509, + "36": 10.948062896728516, + "37": 13.788848876953125, + "38": 7.157576560974121, + "39": 3.2581188678741455, + "40": 2.4912819862365723, + "41": 8.811918258666992, + "42": 2.675812244415283, + "43": 1.9945507049560547, + "44": 1.610080599784851, + "45": 1.4955765008926392, + "46": 1.4362869262695312, + "47": 2.4751739501953125, + "48": 1.579730749130249, + "49": 2.897298574447632, + "50": 1.3345760107040405, + "51": 1.125062346458435, + "52": 2.776702404022217, + "53": 1.3940119743347168 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.798859119415283, + "3": 3.903656005859375, + "4": 4.252171516418457, + "5": 4.753815650939941, + "6": 4.088006019592285, + "7": 3.6133923530578613, + "8": 3.781853675842285, + "9": 4.502691745758057, + "10": 3.767045497894287, + "11": 4.191555023193359, + "12": 5.594882011413574, + "13": 5.5175395011901855, + "14": 5.409316539764404, + "15": 5.792328834533691, + "16": 4.137396812438965, + "17": 4.846114158630371, + "18": 14.33014965057373, + "19": 13.757341384887695, + "20": 12.278861999511719, + "21": 14.776731491088867, + "22": 18.466028213500977, + "23": 15.10760498046875, + "24": 14.427495956420898, + "25": 16.616600036621094, + "26": 13.928690910339355, + "27": 10.716838836669922, + "28": 10.541747093200684, + "29": 8.431756973266602, + "30": 7.79141902923584, + "31": 13.087114334106445, + "32": 12.545670509338379, + "33": 9.743616104125977, + "34": 7.695677280426025, + "35": 5.937138557434082, + "36": 7.195171356201172, + "37": 13.259464263916016, + "38": 13.314964294433594, + "39": 10.051591873168945, + "40": 7.038140296936035, + "41": 7.468445777893066, + "42": 9.320440292358398, + "43": 7.310545444488525, + "44": 6.042028427124023, + "45": 5.097186088562012, + "46": 4.168429374694824, + "47": 3.7739603519439697, + "48": 4.4266486167907715, + "49": 3.9670939445495605, + "50": 4.661612510681152, + "51": 3.9164187908172607, + "52": 3.7672781944274902, + "53": 4.303747653961182 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "train_epoch_time": 4.789660692214966, + "train_loss": 3.8168039754952385, + "train_score": 0.15260827655515163, + "val_loss": 3.825397946941319, + "val_score": 0.15114182700861745 + }, + { + "epoch": 1, + "grad_norm": 0.514480471611023, + "learning_rate": 0.464, + "model_norm": 87.60951232910156, + "step_logs": { + "grad_norm": { + "54": 1.2567001581192017, + "55": 1.182987093925476, + "56": 0.7642905116081238, + "57": 0.809528648853302, + "58": 1.6764920949935913, + "59": 1.1489003896713257, + "60": 0.41451770067214966, + "61": 0.6309722661972046, + "62": 0.8026121258735657, + "63": 1.4508488178253174, + "64": 1.0871254205703735, + "65": 0.2284901887178421, + "66": 0.14119938015937805, + "67": 0.1006789430975914, + "68": 0.1068229004740715, + "69": 0.13464471697807312, + "70": 0.23608392477035522, + "71": 0.4231679439544678, + "72": 0.9119272232055664, + "73": 0.9914507269859314, + "74": 1.168287754058838, + "75": 0.9777126908302307, + "76": 0.44396352767944336, + "77": 0.526111364364624, + "78": 0.7891499400138855, + "79": 0.8528002500534058, + "80": 0.9843778014183044, + "81": 0.905493438243866, + "82": 0.642676591873169, + "83": 0.6600900292396545, + "84": 0.7537866830825806, + "85": 0.7418822050094604, + "86": 0.6859331727027893, + "87": 0.677162230014801, + "88": 0.6110721230506897, + "89": 0.6294000148773193, + "90": 0.6997990012168884, + "91": 0.6818952560424805, + "92": 0.6383385062217712, + "93": 0.646510124206543, + "94": 0.6488845944404602, + "95": 0.6384627223014832, + "96": 0.5880641937255859, + "97": 0.5724895596504211, + "98": 0.5346968173980713, + "99": 0.47721046209335327, + "100": 0.3965725898742676, + "101": 0.4267416000366211, + "102": 0.4850084185600281, + "103": 0.5026825070381165, + "104": 0.5574753880500793, + "105": 0.5630974769592285, + "106": 0.5525953769683838, + "107": 0.514480471611023 + }, + "loss": { + "54": 3.829538345336914, + "55": 3.900240421295166, + "56": 3.4838783740997314, + "57": 3.4705233573913574, + "58": 3.5344412326812744, + "59": 3.915027618408203, + "60": 3.4037322998046875, + "61": 3.3808951377868652, + "62": 3.448050022125244, + "63": 3.5262856483459473, + "64": 3.7423739433288574, + "65": 3.374730110168457, + "66": 3.3496668338775635, + "67": 3.3476765155792236, + "68": 3.3487114906311035, + "69": 3.3233907222747803, + "70": 3.354370594024658, + "71": 3.3133368492126465, + "72": 3.3949131965637207, + "73": 3.532787799835205, + "74": 3.4515738487243652, + "75": 3.6085033416748047, + "76": 3.355710983276367, + "77": 3.3929359912872314, + "78": 3.385227680206299, + "79": 3.442448139190674, + "80": 3.383800745010376, + "81": 3.5130300521850586, + "82": 3.344487190246582, + "83": 3.4356167316436768, + "84": 3.4065611362457275, + "85": 3.4435362815856934, + "86": 3.3797593116760254, + "87": 3.4136552810668945, + "88": 3.3321127891540527, + "89": 3.4122955799102783, + "90": 3.3863933086395264, + "91": 3.3977856636047363, + "92": 3.367650270462036, + "93": 3.388272285461426, + "94": 3.3971071243286133, + "95": 3.394378185272217, + "96": 3.3742644786834717, + "97": 3.3986124992370605, + "98": 3.3466382026672363, + "99": 3.391096591949463, + "100": 3.3336567878723145, + "101": 3.362348794937134, + "102": 3.341663360595703, + "103": 3.3433074951171875, + "104": 3.3885936737060547, + "105": 3.3710618019104004, + "106": 3.3358426094055176, + "107": 3.35544490814209 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "train_epoch_time": 4.787289142608643, + "train_loss": 3.3426029892870823, + "train_score": 0.15260827655515163, + "val_loss": 3.3629409692043826, + "val_score": 0.15114182700861745 + }, + { + "epoch": 2, + "grad_norm": 0.443915456533432, + "learning_rate": 0.464, + "model_norm": 87.62141418457031, + "step_logs": { + "grad_norm": { + "108": 0.42400360107421875, + "109": 0.4330315887928009, + "110": 0.48329660296440125, + "111": 0.5137515068054199, + "112": 0.5598971247673035, + "113": 0.531995952129364, + "114": 0.48908567428588867, + "115": 0.48043346405029297, + "116": 0.45190975069999695, + "117": 0.4398237466812134, + "118": 0.4266398549079895, + "119": 0.4553142189979553, + "120": 0.5500909090042114, + "121": 0.5831237435340881, + "122": 0.5732588171958923, + "123": 0.6220180988311768, + "124": 0.6419893503189087, + "125": 0.6264800429344177, + "126": 0.5333755016326904, + "127": 0.4559817612171173, + "128": 0.42108386754989624, + "129": 0.3868430256843567, + "130": 0.3417876958847046, + "131": 0.3228040933609009, + "132": 0.30709001421928406, + "133": 0.3044731616973877, + "134": 0.3061954975128174, + "135": 0.3246074616909027, + "136": 0.3661918640136719, + "137": 0.39639613032341003, + "138": 0.46898069977760315, + "139": 0.47347989678382874, + "140": 0.4593355059623718, + "141": 0.4557546079158783, + "142": 0.45358777046203613, + "143": 0.47689199447631836, + "144": 0.4844415485858917, + "145": 0.49156394600868225, + "146": 0.45105409622192383, + "147": 0.3989025950431824, + "148": 0.3486165702342987, + "149": 0.3875811994075775, + "150": 0.3904741704463959, + "151": 0.3574006259441376, + "152": 0.37521421909332275, + "153": 0.4080508053302765, + "154": 0.3980672061443329, + "155": 0.3725684881210327, + "156": 0.3599435091018677, + "157": 0.38388964533805847, + "158": 0.42521315813064575, + "159": 0.4154224991798401, + "160": 0.4152551293373108, + "161": 0.443915456533432 + }, + "loss": { + "108": 3.312521457672119, + "109": 3.34661865234375, + "110": 3.3654656410217285, + "111": 3.321208953857422, + "112": 3.337642192840576, + "113": 3.395747184753418, + "114": 3.378725290298462, + "115": 3.363778591156006, + "116": 3.3873653411865234, + "117": 3.3523335456848145, + "118": 3.3114991188049316, + "119": 3.335526943206787, + "120": 3.366734504699707, + "121": 3.3786239624023438, + "122": 3.337869167327881, + "123": 3.3490445613861084, + "124": 3.370058536529541, + "125": 3.3664517402648926, + "126": 3.372699022293091, + "127": 3.3372371196746826, + "128": 3.3657479286193848, + "129": 3.3951103687286377, + "130": 3.3745837211608887, + "131": 3.336942672729492, + "132": 3.3377182483673096, + "133": 3.339109420776367, + "134": 3.324085235595703, + "135": 3.3301825523376465, + "136": 3.365748405456543, + "137": 3.3036351203918457, + "138": 3.3139443397521973, + "139": 3.380897045135498, + "140": 3.328117609024048, + "141": 3.3355884552001953, + "142": 3.3178296089172363, + "143": 3.346303939819336, + "144": 3.3225009441375732, + "145": 3.3662266731262207, + "146": 3.3394882678985596, + "147": 3.335334300994873, + "148": 3.3007912635803223, + "149": 3.331977367401123, + "150": 3.3437604904174805, + "151": 3.3364009857177734, + "152": 3.329472064971924, + "153": 3.3540613651275635, + "154": 3.3724470138549805, + "155": 3.344165802001953, + "156": 3.34220027923584, + "157": 3.3188018798828125, + "158": 3.321608543395996, + "159": 3.3312065601348877, + "160": 3.315763235092163, + "161": 3.3427977561950684 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "train_epoch_time": 4.787276983261108, + "train_loss": 3.3392115976068175, + "train_score": 0.15260827655515163, + "val_loss": 3.35697967176733, + "val_score": 0.15114182700861745 + }, + { + "epoch": 3, + "grad_norm": 0.4752013683319092, + "learning_rate": 0.464, + "model_norm": 87.64305877685547, + "step_logs": { + "grad_norm": { + "162": 0.453897088766098, + "163": 0.4476003646850586, + "164": 0.42216330766677856, + "165": 0.3942452669143677, + "166": 0.36772090196609497, + "167": 0.3478882908821106, + "168": 0.33396175503730774, + "169": 0.32168522477149963, + "170": 0.333763062953949, + "171": 0.32779353857040405, + "172": 0.3211841583251953, + "173": 0.30887407064437866, + "174": 0.3044631779193878, + "175": 0.3327837586402893, + "176": 0.37369370460510254, + "177": 0.3706324100494385, + "178": 0.37023842334747314, + "179": 0.3882354497909546, + "180": 0.42095234990119934, + "181": 0.41753268241882324, + "182": 0.37341630458831787, + "183": 0.39300069212913513, + "184": 0.42618292570114136, + "185": 0.4241190552711487, + "186": 0.43219563364982605, + "187": 0.43676042556762695, + "188": 0.43012484908103943, + "189": 0.42461398243904114, + "190": 0.40710654854774475, + "191": 0.39479538798332214, + "192": 0.37511155009269714, + "193": 0.3561560809612274, + "194": 0.3406989276409149, + "195": 0.33156925439834595, + "196": 0.3386594355106354, + "197": 0.35771849751472473, + "198": 0.35407596826553345, + "199": 0.3526514768600464, + "200": 0.36908161640167236, + "201": 0.39491814374923706, + "202": 0.3781105875968933, + "203": 0.3868880867958069, + "204": 0.3794122636318207, + "205": 0.3846893012523651, + "206": 0.3612799048423767, + "207": 0.38112694025039673, + "208": 0.4422636926174164, + "209": 0.48456239700317383, + "210": 0.5463977456092834, + "211": 0.5163854956626892, + "212": 0.46022534370422363, + "213": 0.4628199636936188, + "214": 0.4827488958835602, + "215": 0.4752013683319092 + }, + "loss": { + "162": 3.3317155838012695, + "163": 3.3516845703125, + "164": 3.3407416343688965, + "165": 3.359863042831421, + "166": 3.3250513076782227, + "167": 3.3077006340026855, + "168": 3.3049709796905518, + "169": 3.328124523162842, + "170": 3.3555619716644287, + "171": 3.3292555809020996, + "172": 3.3281049728393555, + "173": 3.328152656555176, + "174": 3.324321746826172, + "175": 3.3027453422546387, + "176": 3.325338840484619, + "177": 3.360889196395874, + "178": 3.3480138778686523, + "179": 3.32133150100708, + "180": 3.3484320640563965, + "181": 3.360474109649658, + "182": 3.3174314498901367, + "183": 3.320765733718872, + "184": 3.3069543838500977, + "185": 3.336766004562378, + "186": 3.3459224700927734, + "187": 3.3552560806274414, + "188": 3.32393217086792, + "189": 3.366337299346924, + "190": 3.3016326427459717, + "191": 3.333552122116089, + "192": 3.311038017272949, + "193": 3.3524298667907715, + "194": 3.339522361755371, + "195": 3.3452847003936768, + "196": 3.356109619140625, + "197": 3.3189916610717773, + "198": 3.2981154918670654, + "199": 3.320248603820801, + "200": 3.312007188796997, + "201": 3.3280959129333496, + "202": 3.332509756088257, + "203": 3.356332778930664, + "204": 3.322726249694824, + "205": 3.3292572498321533, + "206": 3.3012309074401855, + "207": 3.2981183528900146, + "208": 3.312960147857666, + "209": 3.3088507652282715, + "210": 3.311923027038574, + "211": 3.3157012462615967, + "212": 3.2933835983276367, + "213": 3.2858994007110596, + "214": 3.3187434673309326, + "215": 3.2820088863372803 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "train_epoch_time": 4.7873804569244385, + "train_loss": 3.2897771251766037, + "train_score": 0.15260379305443428, + "val_loss": 3.310802425500583, + "val_score": 0.15114631179621793 + }, + { + "epoch": 4, + "grad_norm": 0.5237197279930115, + "learning_rate": 0.464, + "model_norm": 87.70458984375, + "step_logs": { + "grad_norm": { + "216": 0.44777214527130127, + "217": 0.48899611830711365, + "218": 0.5638218522071838, + "219": 0.5643922686576843, + "220": 0.5390493869781494, + "221": 0.5048683881759644, + "222": 0.4525969624519348, + "223": 0.45847010612487793, + "224": 0.4864107370376587, + "225": 4.742197036743164, + "226": 0.8389106392860413, + "227": 0.686756432056427, + "228": 0.4833715856075287, + "229": 0.4567817449569702, + "230": 0.42351213097572327, + "231": 0.4084383547306061, + "232": 0.3881327211856842, + "233": 0.3714514970779419, + "234": 0.35957810282707214, + "235": 0.356612890958786, + "236": 0.3716936707496643, + "237": 0.4051712453365326, + "238": 0.4738687574863434, + "239": 0.5021004676818848, + "240": 0.5018496513366699, + "241": 0.46320876479148865, + "242": 0.42263951897621155, + "243": 0.4185473322868347, + "244": 0.9376572966575623, + "245": 0.4129084646701813, + "246": 0.4135898947715759, + "247": 0.41051092743873596, + "248": 0.4149532914161682, + "249": 0.632099449634552, + "250": 0.5338677167892456, + "251": 0.44909510016441345, + "252": 0.38973763585090637, + "253": 0.3813479244709015, + "254": 0.37471944093704224, + "255": 0.40992334485054016, + "256": 0.4109664559364319, + "257": 0.4014109969139099, + "258": 0.36270540952682495, + "259": 0.36905181407928467, + "260": 0.3724833130836487, + "261": 0.37833690643310547, + "262": 0.37102121114730835, + "263": 0.36926594376564026, + "264": 0.3439177870750427, + "265": 0.3319527506828308, + "266": 0.3107961118221283, + "267": 0.3148708939552307, + "268": 0.33677348494529724, + "269": 0.5237197279930115 + }, + "loss": { + "216": 3.270552396774292, + "217": 3.2687864303588867, + "218": 3.291767120361328, + "219": 3.3108158111572266, + "220": 3.3068325519561768, + "221": 3.290863513946533, + "222": 3.2585535049438477, + "223": 3.255937337875366, + "224": 3.250828742980957, + "225": 3.306823253631592, + "226": 3.3667616844177246, + "227": 3.344569206237793, + "228": 3.286442279815674, + "229": 3.317892074584961, + "230": 3.2904434204101562, + "231": 3.2846639156341553, + "232": 3.2664108276367188, + "233": 3.2574217319488525, + "234": 3.227783441543579, + "235": 3.2278926372528076, + "236": 3.220323085784912, + "237": 3.224609851837158, + "238": 3.210706949234009, + "239": 3.242043972015381, + "240": 3.178334951400757, + "241": 3.2063045501708984, + "242": 3.2051327228546143, + "243": 3.2179551124572754, + "244": 3.183863639831543, + "245": 3.1867003440856934, + "246": 3.183915138244629, + "247": 3.1895570755004883, + "248": 3.172560453414917, + "249": 3.1769180297851562, + "250": 3.171640396118164, + "251": 3.2026853561401367, + "252": 3.193422794342041, + "253": 3.1722874641418457, + "254": 3.1597189903259277, + "255": 3.1747875213623047, + "256": 3.1717894077301025, + "257": 3.1529808044433594, + "258": 3.1525895595550537, + "259": 3.1705822944641113, + "260": 3.121812582015991, + "261": 3.1705031394958496, + "262": 3.1393752098083496, + "263": 3.144162654876709, + "264": 3.157045364379883, + "265": 3.1581740379333496, + "266": 3.1955366134643555, + "267": 3.166224956512451, + "268": 3.156071424484253, + "269": 3.1477420330047607 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "train_epoch_time": 4.787662506103516, + "train_loss": 3.1613572962143843, + "train_score": 0.1667313037763538, + "val_loss": 3.181593383631394, + "val_score": 0.16378444296347425 + }, + { + "epoch": 5, + "grad_norm": 0.35353803634643555, + "learning_rate": 0.464, + "model_norm": 87.74584197998047, + "step_logs": { + "grad_norm": { + "270": 0.42171791195869446, + "271": 0.3841199278831482, + "272": 0.3562532961368561, + "273": 0.32652315497398376, + "274": 0.2897869348526001, + "275": 0.30895140767097473, + "276": 0.32444262504577637, + "277": 0.31616899371147156, + "278": 0.3206145763397217, + "279": 0.3470594882965088, + "280": 0.3585662543773651, + "281": 0.3872945308685303, + "282": 0.39947181940078735, + "283": 0.37214016914367676, + "284": 0.37557560205459595, + "285": 0.44474145770072937, + "286": 0.38327527046203613, + "287": 0.33758410811424255, + "288": 0.3330875635147095, + "289": 1.0608270168304443, + "290": 5.615816593170166, + "291": 0.714798092842102, + "292": 0.470763623714447, + "293": 0.44693851470947266, + "294": 0.45375385880470276, + "295": 0.42669814825057983, + "296": 0.4325542449951172, + "297": 0.4207967519760132, + "298": 0.44864749908447266, + "299": 0.3580751121044159, + "300": 0.3035534918308258, + "301": 0.26924169063568115, + "302": 0.25898274779319763, + "303": 0.2553577423095703, + "304": 0.3023153841495514, + "305": 0.3230116665363312, + "306": 0.3489471673965454, + "307": 0.345846951007843, + "308": 0.3564895689487457, + "309": 0.32305246591567993, + "310": 0.3027469515800476, + "311": 0.2787008583545685, + "312": 0.2635951340198517, + "313": 0.27753347158432007, + "314": 0.27476975321769714, + "315": 0.23849482834339142, + "316": 0.2239506095647812, + "317": 0.23103581368923187, + "318": 0.2659413814544678, + "319": 0.2500379681587219, + "320": 0.22238916158676147, + "321": 0.2682419419288635, + "322": 0.34568262100219727, + "323": 0.35353803634643555 + }, + "loss": { + "270": 3.1640477180480957, + "271": 3.1651968955993652, + "272": 3.141981363296509, + "273": 3.1493163108825684, + "274": 3.122767448425293, + "275": 3.135230541229248, + "276": 3.1407628059387207, + "277": 3.1474385261535645, + "278": 3.143263339996338, + "279": 3.1593728065490723, + "280": 3.1396913528442383, + "281": 3.1290547847747803, + "282": 3.1341495513916016, + "283": 3.138803720474243, + "284": 3.1228044033050537, + "285": 3.1064062118530273, + "286": 3.1627774238586426, + "287": 3.128211498260498, + "288": 3.114041328430176, + "289": 3.1342344284057617, + "290": 3.5254242420196533, + "291": 3.417147636413574, + "292": 3.3069357872009277, + "293": 3.2699406147003174, + "294": 3.2923851013183594, + "295": 3.287499189376831, + "296": 3.2823872566223145, + "297": 3.2895798683166504, + "298": 3.2938082218170166, + "299": 3.253305435180664, + "300": 3.238431692123413, + "301": 3.2401161193847656, + "302": 3.2279105186462402, + "303": 3.221372127532959, + "304": 3.2219200134277344, + "305": 3.2237117290496826, + "306": 3.2213127613067627, + "307": 3.208238363265991, + "308": 3.1887784004211426, + "309": 3.1917471885681152, + "310": 3.190892219543457, + "311": 3.184481382369995, + "312": 3.178776741027832, + "313": 3.1682095527648926, + "314": 3.1639552116394043, + "315": 3.1805777549743652, + "316": 3.1627583503723145, + "317": 3.1545886993408203, + "318": 3.1599767208099365, + "319": 3.1733086109161377, + "320": 3.1594300270080566, + "321": 3.1545329093933105, + "322": 3.1688942909240723, + "323": 3.1808645725250244 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "train_epoch_time": 4.787666082382202, + "train_loss": 3.152705482294091, + "train_score": 0.141473502497933, + "val_loss": 3.1700148270406623, + "val_score": 0.1428001221401957 + }, + { + "epoch": 6, + "grad_norm": 0.501187801361084, + "learning_rate": 0.464, + "model_norm": 87.79143524169922, + "step_logs": { + "grad_norm": { + "324": 0.28567054867744446, + "325": 0.27429282665252686, + "326": 0.28167828917503357, + "327": 0.32508182525634766, + "328": 0.3271867036819458, + "329": 0.35623452067375183, + "330": 0.4134608507156372, + "331": 0.44421863555908203, + "332": 0.4447523355484009, + "333": 0.4654664695262909, + "334": 0.4743514657020569, + "335": 0.507012128829956, + "336": 0.5169128775596619, + "337": 0.5209842920303345, + "338": 0.5336570739746094, + "339": 0.4899309277534485, + "340": 0.42174527049064636, + "341": 0.4082046151161194, + "342": 0.4352353513240814, + "343": 0.4328165650367737, + "344": 0.4127974510192871, + "345": 0.3974686861038208, + "346": 0.40610817074775696, + "347": 0.3848150670528412, + "348": 0.38284167647361755, + "349": 0.4253147542476654, + "350": 0.4837416112422943, + "351": 0.49535873532295227, + "352": 0.6893389225006104, + "353": 0.536414384841919, + "354": 0.4423315227031708, + "355": 0.624416172504425, + "356": 0.43107542395591736, + "357": 0.45929062366485596, + "358": 0.4513593912124634, + "359": 0.446321576833725, + "360": 0.4107964336872101, + "361": 0.39709919691085815, + "362": 0.4834073781967163, + "363": 0.5084345936775208, + "364": 0.5302790403366089, + "365": 0.4411664605140686, + "366": 0.3516263961791992, + "367": 0.31146740913391113, + "368": 0.3488292396068573, + "369": 0.4345119595527649, + "370": 0.37957683205604553, + "371": 0.41497910022735596, + "372": 0.37643009424209595, + "373": 0.4022415280342102, + "374": 0.46977072954177856, + "375": 0.5382270812988281, + "376": 0.4984232783317566, + "377": 0.501187801361084 + }, + "loss": { + "324": 3.150212287902832, + "325": 3.1622352600097656, + "326": 3.155548095703125, + "327": 3.1390395164489746, + "328": 3.141885280609131, + "329": 3.13614559173584, + "330": 3.1545543670654297, + "331": 3.1406188011169434, + "332": 3.1378555297851562, + "333": 3.1276278495788574, + "334": 3.1326255798339844, + "335": 3.115692615509033, + "336": 3.1312341690063477, + "337": 3.1147375106811523, + "338": 3.147418975830078, + "339": 3.1518070697784424, + "340": 3.1034297943115234, + "341": 3.114619255065918, + "342": 3.1150705814361572, + "343": 3.0952627658843994, + "344": 3.101466655731201, + "345": 3.1012301445007324, + "346": 3.1000776290893555, + "347": 3.0812013149261475, + "348": 3.0796804428100586, + "349": 3.0606911182403564, + "350": 3.061680793762207, + "351": 3.0804953575134277, + "352": 3.1108388900756836, + "353": 3.0975003242492676, + "354": 3.068204402923584, + "355": 3.050950765609741, + "356": 3.04215931892395, + "357": 3.069270610809326, + "358": 3.0697364807128906, + "359": 3.0631752014160156, + "360": 3.0354509353637695, + "361": 3.038780927658081, + "362": 3.048603057861328, + "363": 3.052165985107422, + "364": 3.0707545280456543, + "365": 3.051659345626831, + "366": 3.0290098190307617, + "367": 3.0111002922058105, + "368": 3.006466865539551, + "369": 3.0310111045837402, + "370": 3.0067758560180664, + "371": 3.016589641571045, + "372": 3.006666660308838, + "373": 2.9676527976989746, + "374": 2.9829344749450684, + "375": 3.001209259033203, + "376": 2.973054885864258, + "377": 2.9742369651794434 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "train_epoch_time": 4.788262367248535, + "train_loss": 2.9948131232897897, + "train_score": 0.15421224891540822, + "val_loss": 3.021159153444485, + "val_score": 0.1515768514400782 + }, + { + "epoch": 7, + "grad_norm": 0.42807021737098694, + "learning_rate": 0.464, + "model_norm": 87.8177261352539, + "step_logs": { + "grad_norm": { + "378": 0.5912654995918274, + "379": 0.5346393585205078, + "380": 0.4574964940547943, + "381": 0.4305146038532257, + "382": 0.4266318082809448, + "383": 0.4264521598815918, + "384": 0.4047684371471405, + "385": 0.4431267976760864, + "386": 0.5706725120544434, + "387": 0.5285161137580872, + "388": 0.49054163694381714, + "389": 0.45295250415802, + "390": 0.4346877932548523, + "391": 0.4331419765949249, + "392": 0.5483351945877075, + "393": 0.5667582154273987, + "394": 0.524700403213501, + "395": 0.4524461328983307, + "396": 0.44014936685562134, + "397": 0.4243784248828888, + "398": 0.4710800051689148, + "399": 0.48999878764152527, + "400": 0.48598068952560425, + "401": 0.516622006893158, + "402": 0.5722930431365967, + "403": 0.5258074998855591, + "404": 0.49011051654815674, + "405": 0.47478213906288147, + "406": 0.481942743062973, + "407": 0.475354939699173, + "408": 0.4578336477279663, + "409": 0.4217711389064789, + "410": 0.4343506097793579, + "411": 0.4680066704750061, + "412": 0.514274537563324, + "413": 0.7060795426368713, + "414": 0.7704524397850037, + "415": 0.5010085701942444, + "416": 0.4054775536060333, + "417": 0.3802575170993805, + "418": 0.41707009077072144, + "419": 0.6428954005241394, + "420": 0.5348367691040039, + "421": 0.38486918807029724, + "422": 0.369230180978775, + "423": 0.5235440731048584, + "424": 0.4992254972457886, + "425": 0.3032305836677551, + "426": 0.26986604928970337, + "427": 0.3310961425304413, + "428": 0.36691445112228394, + "429": 0.4646666347980499, + "430": 0.4643303453922272, + "431": 0.42807021737098694 + }, + "loss": { + "378": 3.010556221008301, + "379": 2.9800565242767334, + "380": 2.991891384124756, + "381": 2.9811596870422363, + "382": 3.013350486755371, + "383": 2.9468960762023926, + "384": 2.9682846069335938, + "385": 2.9865987300872803, + "386": 2.9837841987609863, + "387": 2.992126941680908, + "388": 2.981083393096924, + "389": 2.9554102420806885, + "390": 2.96353816986084, + "391": 2.9641830921173096, + "392": 2.995023727416992, + "393": 2.978753089904785, + "394": 2.9782819747924805, + "395": 2.969633102416992, + "396": 2.9756650924682617, + "397": 2.9608676433563232, + "398": 2.957498073577881, + "399": 2.9687013626098633, + "400": 2.984408378601074, + "401": 2.962040424346924, + "402": 2.9563395977020264, + "403": 2.9446516036987305, + "404": 2.979127883911133, + "405": 2.95468807220459, + "406": 2.9572196006774902, + "407": 2.9507298469543457, + "408": 2.9528446197509766, + "409": 2.9478797912597656, + "410": 2.9589197635650635, + "411": 2.931971549987793, + "412": 2.955918312072754, + "413": 2.958726644515991, + "414": 2.9883460998535156, + "415": 2.953390121459961, + "416": 2.9446823596954346, + "417": 2.949601650238037, + "418": 2.9266226291656494, + "419": 2.940394401550293, + "420": 2.976118564605713, + "421": 2.925241470336914, + "422": 2.937685489654541, + "423": 2.9466919898986816, + "424": 2.9619574546813965, + "425": 2.9219822883605957, + "426": 2.9320974349975586, + "427": 2.918477773666382, + "428": 2.9186582565307617, + "429": 2.9233531951904297, + "430": 2.9192821979522705, + "431": 2.9219703674316406 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "train_epoch_time": 4.788901329040527, + "train_loss": 2.92969334436797, + "train_score": 0.17221462515796104, + "val_loss": 2.9486451439140313, + "val_score": 0.1700272675599351 + }, + { + "epoch": 8, + "grad_norm": 0.39331939816474915, + "learning_rate": 0.464, + "model_norm": 87.84315490722656, + "step_logs": { + "grad_norm": { + "432": 0.40006187558174133, + "433": 0.36844465136528015, + "434": 0.3952924609184265, + "435": 0.48806363344192505, + "436": 0.5495551228523254, + "437": 0.668045699596405, + "438": 0.6224932074546814, + "439": 0.4230979084968567, + "440": 0.3479797840118408, + "441": 0.38340625166893005, + "442": 0.3424837291240692, + "443": 0.40772682428359985, + "444": 0.47499582171440125, + "445": 0.5305530428886414, + "446": 0.5541837811470032, + "447": 0.7858777046203613, + "448": 0.5743862390518188, + "449": 0.5429847240447998, + "450": 0.5184003710746765, + "451": 0.5168136358261108, + "452": 0.5578228235244751, + "453": 0.535378098487854, + "454": 0.4767427444458008, + "455": 0.41185852885246277, + "456": 0.3904164433479309, + "457": 0.39373108744621277, + "458": 0.3877347409725189, + "459": 0.35361406207084656, + "460": 0.3541043996810913, + "461": 0.35564976930618286, + "462": 0.37229400873184204, + "463": 0.36379274725914, + "464": 0.35928118228912354, + "465": 0.3857515752315521, + "466": 0.42022520303726196, + "467": 0.4710654020309448, + "468": 0.471584677696228, + "469": 0.5499950647354126, + "470": 0.5296297073364258, + "471": 0.5159841179847717, + "472": 0.6447048783302307, + "473": 0.4973290264606476, + "474": 0.4191618859767914, + "475": 0.28815633058547974, + "476": 0.2634706199169159, + "477": 0.2851393222808838, + "478": 0.3108154535293579, + "479": 0.3626827895641327, + "480": 0.4542961120605469, + "481": 0.4695046544075012, + "482": 0.4535657465457916, + "483": 0.44085997343063354, + "484": 0.408387690782547, + "485": 0.39331939816474915 + }, + "loss": { + "432": 2.9427413940429688, + "433": 2.8971974849700928, + "434": 2.9285292625427246, + "435": 2.9271202087402344, + "436": 2.9371798038482666, + "437": 2.936964511871338, + "438": 2.949596881866455, + "439": 2.9100966453552246, + "440": 2.9197194576263428, + "441": 2.9102163314819336, + "442": 2.9230966567993164, + "443": 2.91611909866333, + "444": 2.9378106594085693, + "445": 2.9030349254608154, + "446": 2.932502269744873, + "447": 2.9274721145629883, + "448": 2.95884370803833, + "449": 2.934290885925293, + "450": 2.9383561611175537, + "451": 2.925407886505127, + "452": 2.938593864440918, + "453": 2.9345874786376953, + "454": 2.9134750366210938, + "455": 2.8959689140319824, + "456": 2.9116811752319336, + "457": 2.9018585681915283, + "458": 2.914478063583374, + "459": 2.8870625495910645, + "460": 2.8916149139404297, + "461": 2.899754524230957, + "462": 2.930670976638794, + "463": 2.8785715103149414, + "464": 2.8861770629882812, + "465": 2.9090828895568848, + "466": 2.897146701812744, + "467": 2.8996613025665283, + "468": 2.924686908721924, + "469": 2.906843662261963, + "470": 2.9266715049743652, + "471": 2.92411208152771, + "472": 2.9472403526306152, + "473": 2.916522979736328, + "474": 2.924605369567871, + "475": 2.8895838260650635, + "476": 2.8787221908569336, + "477": 2.876397132873535, + "478": 2.8896255493164062, + "479": 2.876448392868042, + "480": 2.9053635597229004, + "481": 2.9030919075012207, + "482": 2.9148945808410645, + "483": 2.9046378135681152, + "484": 2.9086437225341797, + "485": 2.9065005779266357 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "train_epoch_time": 4.787418603897095, + "train_loss": 2.8975904314897667, + "train_score": 0.16385401723802176, + "val_loss": 2.91943799368139, + "val_score": 0.16317451219197393 + }, + { + "epoch": 9, + "grad_norm": 0.30390891432762146, + "learning_rate": 0.464, + "model_norm": 87.86610412597656, + "step_logs": { + "grad_norm": { + "486": 0.3884833753108978, + "487": 0.36869633197784424, + "488": 0.419179767370224, + "489": 0.44545257091522217, + "490": 0.42153453826904297, + "491": 0.4108070433139801, + "492": 0.43754008412361145, + "493": 0.3962385356426239, + "494": 0.3834441304206848, + "495": 0.3728509843349457, + "496": 0.36539870500564575, + "497": 0.3631349802017212, + "498": 0.36462244391441345, + "499": 0.3862491250038147, + "500": 0.3902868330478668, + "501": 0.40250545740127563, + "502": 0.38569462299346924, + "503": 0.3716394007205963, + "504": 0.38129302859306335, + "505": 0.3877372443675995, + "506": 0.41685324907302856, + "507": 0.4101344048976898, + "508": 0.40829846262931824, + "509": 0.3952760696411133, + "510": 0.3784595727920532, + "511": 0.37070387601852417, + "512": 0.3860388696193695, + "513": 0.39683663845062256, + "514": 0.40479129552841187, + "515": 0.42428261041641235, + "516": 0.4738938808441162, + "517": 0.46346062421798706, + "518": 0.43982386589050293, + "519": 0.3877086639404297, + "520": 0.35644033551216125, + "521": 0.35609158873558044, + "522": 0.376585990190506, + "523": 0.35381796956062317, + "524": 0.366868793964386, + "525": 0.32213616371154785, + "526": 0.29177966713905334, + "527": 0.3044784367084503, + "528": 0.4109605550765991, + "529": 0.30389589071273804, + "530": 0.2883964478969574, + "531": 0.29107925295829773, + "532": 0.30268076062202454, + "533": 0.31257936358451843, + "534": 0.3079054057598114, + "535": 0.331559419631958, + "536": 0.2933565378189087, + "537": 0.31729722023010254, + "538": 0.3078087568283081, + "539": 0.30390891432762146 + }, + "loss": { + "486": 2.891432285308838, + "487": 2.895451068878174, + "488": 2.894822120666504, + "489": 2.8899145126342773, + "490": 2.9043304920196533, + "491": 2.9070327281951904, + "492": 2.9097061157226562, + "493": 2.8964169025421143, + "494": 2.8906633853912354, + "495": 2.8931174278259277, + "496": 2.8730173110961914, + "497": 2.892716407775879, + "498": 2.8784871101379395, + "499": 2.896024227142334, + "500": 2.8959951400756836, + "501": 2.883206367492676, + "502": 2.8832459449768066, + "503": 2.909162998199463, + "504": 2.8844075202941895, + "505": 2.8829445838928223, + "506": 2.8923609256744385, + "507": 2.862466812133789, + "508": 2.887761354446411, + "509": 2.8792409896850586, + "510": 2.8858485221862793, + "511": 2.8859100341796875, + "512": 2.885378122329712, + "513": 2.8858842849731445, + "514": 2.9066884517669678, + "515": 2.885735511779785, + "516": 2.89491868019104, + "517": 2.87711238861084, + "518": 2.8932738304138184, + "519": 2.8749780654907227, + "520": 2.872653007507324, + "521": 2.8594064712524414, + "522": 2.8818066120147705, + "523": 2.8652231693267822, + "524": 2.8875558376312256, + "525": 2.8826684951782227, + "526": 2.8749003410339355, + "527": 2.860959053039551, + "528": 2.8698220252990723, + "529": 2.868011474609375, + "530": 2.8935108184814453, + "531": 2.8524913787841797, + "532": 2.8598310947418213, + "533": 2.8692216873168945, + "534": 2.865520715713501, + "535": 2.860454559326172, + "536": 2.8765625953674316, + "537": 2.871222972869873, + "538": 2.8787503242492676, + "539": 2.852886199951172 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "train_epoch_time": 4.788470983505249, + "train_loss": 2.8674550178232288, + "train_score": 0.20611885760829302, + "val_loss": 2.8892866270413493, + "val_score": 0.19857294099612843 + }, + { + "epoch": 10, + "grad_norm": 0.3852273225784302, + "learning_rate": 0.464, + "model_norm": 87.88851165771484, + "step_logs": { + "grad_norm": { + "540": 0.31072601675987244, + "541": 0.33412304520606995, + "542": 0.3782663643360138, + "543": 0.4295848309993744, + "544": 0.47475501894950867, + "545": 0.45222991704940796, + "546": 0.4470860958099365, + "547": 0.4074265658855438, + "548": 0.3899801969528198, + "549": 0.37065261602401733, + "550": 0.35630953311920166, + "551": 0.33831164240837097, + "552": 0.33550557494163513, + "553": 0.32936620712280273, + "554": 0.30802252888679504, + "555": 0.3193916082382202, + "556": 0.3542165756225586, + "557": 0.4319803714752197, + "558": 0.44829341769218445, + "559": 0.454242467880249, + "560": 0.468975692987442, + "561": 0.46344709396362305, + "562": 0.4485126733779907, + "563": 0.3968540132045746, + "564": 0.3787069618701935, + "565": 0.3727272152900696, + "566": 0.3637979030609131, + "567": 0.3713497519493103, + "568": 0.3550596237182617, + "569": 0.31624555587768555, + "570": 0.3268010914325714, + "571": 0.3223540484905243, + "572": 0.32249465584754944, + "573": 0.33169621229171753, + "574": 0.32700756192207336, + "575": 0.328964501619339, + "576": 0.3778802156448364, + "577": 0.44009077548980713, + "578": 0.4457842707633972, + "579": 0.4381271004676819, + "580": 0.4028886556625366, + "581": 0.3914549648761749, + "582": 0.3791936933994293, + "583": 0.37189045548439026, + "584": 0.4217931032180786, + "585": 0.442810595035553, + "586": 0.40438714623451233, + "587": 0.41019293665885925, + "588": 0.3906777799129486, + "589": 0.3975141644477844, + "590": 0.4016437828540802, + "591": 0.39046815037727356, + "592": 0.38027289509773254, + "593": 0.3852273225784302 + }, + "loss": { + "540": 2.8652682304382324, + "541": 2.856447696685791, + "542": 2.872041702270508, + "543": 2.8867805004119873, + "544": 2.8739094734191895, + "545": 2.892620086669922, + "546": 2.877148151397705, + "547": 2.8690030574798584, + "548": 2.8649845123291016, + "549": 2.864064931869507, + "550": 2.863469123840332, + "551": 2.876431941986084, + "552": 2.8608522415161133, + "553": 2.8575010299682617, + "554": 2.8603713512420654, + "555": 2.8452115058898926, + "556": 2.864285945892334, + "557": 2.865006685256958, + "558": 2.862560272216797, + "559": 2.87034273147583, + "560": 2.8586273193359375, + "561": 2.8775405883789062, + "562": 2.8730568885803223, + "563": 2.8686351776123047, + "564": 2.865934371948242, + "565": 2.850599527359009, + "566": 2.8591935634613037, + "567": 2.8511219024658203, + "568": 2.8655948638916016, + "569": 2.8376872539520264, + "570": 2.854034423828125, + "571": 2.8605992794036865, + "572": 2.8525283336639404, + "573": 2.8447864055633545, + "574": 2.842714548110962, + "575": 2.8467721939086914, + "576": 2.8557801246643066, + "577": 2.876159191131592, + "578": 2.861581325531006, + "579": 2.87233567237854, + "580": 2.8691887855529785, + "581": 2.877211093902588, + "582": 2.8669381141662598, + "583": 2.855844259262085, + "584": 2.8703207969665527, + "585": 2.8799691200256348, + "586": 2.867830276489258, + "587": 2.839306116104126, + "588": 2.8359737396240234, + "589": 2.8488521575927734, + "590": 2.8584985733032227, + "591": 2.851200819015503, + "592": 2.864673137664795, + "593": 2.8435373306274414 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "train_epoch_time": 4.7877092361450195, + "train_loss": 2.8496891605973755, + "train_score": 0.20287280311030328, + "val_loss": 2.872913730130814, + "val_score": 0.19633503174398578 + }, + { + "epoch": 11, + "grad_norm": 0.5239464044570923, + "learning_rate": 0.464, + "model_norm": 87.9194107055664, + "step_logs": { + "grad_norm": { + "594": 0.42257383465766907, + "595": 0.44964295625686646, + "596": 0.4130430817604065, + "597": 0.36789682507514954, + "598": 0.32851096987724304, + "599": 0.3788467049598694, + "600": 0.3752865195274353, + "601": 0.36830654740333557, + "602": 0.399733304977417, + "603": 0.48297855257987976, + "604": 0.4707321524620056, + "605": 0.4288770258426666, + "606": 0.35812294483184814, + "607": 0.33333203196525574, + "608": 0.3405986726284027, + "609": 0.3559945821762085, + "610": 0.35224106907844543, + "611": 0.3492664694786072, + "612": 0.35338935256004333, + "613": 0.357338011264801, + "614": 0.4009723663330078, + "615": 0.46710577607154846, + "616": 0.45859748125076294, + "617": 0.4375164806842804, + "618": 0.3868687152862549, + "619": 0.37531808018684387, + "620": 0.2936478853225708, + "621": 0.29129859805107117, + "622": 0.3046726882457733, + "623": 0.33380258083343506, + "624": 0.3612520396709442, + "625": 0.41978561878204346, + "626": 0.5552738904953003, + "627": 0.5705365538597107, + "628": 0.4846811592578888, + "629": 0.3962310254573822, + "630": 0.4062882959842682, + "631": 0.43522927165031433, + "632": 0.43066173791885376, + "633": 0.4276736080646515, + "634": 0.41746219992637634, + "635": 0.42552584409713745, + "636": 0.42190685868263245, + "637": 0.4003181457519531, + "638": 0.3897378742694855, + "639": 0.35833361744880676, + "640": 0.4152391850948334, + "641": 0.4509317874908447, + "642": 0.4552031457424164, + "643": 0.4360000491142273, + "644": 0.4611087441444397, + "645": 0.46938836574554443, + "646": 1.1099835634231567, + "647": 0.5239464044570923 + }, + "loss": { + "594": 2.8699967861175537, + "595": 2.854156494140625, + "596": 2.8469033241271973, + "597": 2.8562097549438477, + "598": 2.8361098766326904, + "599": 2.8483147621154785, + "600": 2.8401103019714355, + "601": 2.839268684387207, + "602": 2.840738296508789, + "603": 2.86029314994812, + "604": 2.831247091293335, + "605": 2.8455135822296143, + "606": 2.8481359481811523, + "607": 2.8126351833343506, + "608": 2.826653480529785, + "609": 2.8214635848999023, + "610": 2.8351025581359863, + "611": 2.8463010787963867, + "612": 2.8268990516662598, + "613": 2.8380680084228516, + "614": 2.848926305770874, + "615": 2.8463730812072754, + "616": 2.8471193313598633, + "617": 2.829418897628784, + "618": 2.8207077980041504, + "619": 2.832714080810547, + "620": 2.7979800701141357, + "621": 2.811311721801758, + "622": 2.8190622329711914, + "623": 2.8195295333862305, + "624": 2.8217153549194336, + "625": 2.811366558074951, + "626": 2.816267490386963, + "627": 2.853815793991089, + "628": 2.8188250064849854, + "629": 2.8165621757507324, + "630": 2.810520887374878, + "631": 2.834625005722046, + "632": 2.8090646266937256, + "633": 2.8065078258514404, + "634": 2.818776845932007, + "635": 2.830660343170166, + "636": 2.7966115474700928, + "637": 2.811253547668457, + "638": 2.7917771339416504, + "639": 2.803710460662842, + "640": 2.8038313388824463, + "641": 2.8246309757232666, + "642": 2.7982208728790283, + "643": 2.7998270988464355, + "644": 2.8005104064941406, + "645": 2.803518056869507, + "646": 2.7915449142456055, + "647": 2.925145149230957 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "train_epoch_time": 4.788415431976318, + "train_loss": 2.888531571994382, + "train_score": 0.20063329446777553, + "val_loss": 2.9095747722687046, + "val_score": 0.1982007033686687 + }, + { + "epoch": 12, + "grad_norm": 0.14469276368618011, + "learning_rate": 0.464, + "model_norm": 87.9462890625, + "step_logs": { + "grad_norm": { + "648": 0.5318052172660828, + "649": 0.6321051716804504, + "650": 0.42080065608024597, + "651": 0.4259883761405945, + "652": 0.4047864079475403, + "653": 0.39888811111450195, + "654": 0.4499017298221588, + "655": 0.5054547786712646, + "656": 0.5361528992652893, + "657": 0.5558052659034729, + "658": 0.5358248353004456, + "659": 0.4887659549713135, + "660": 0.485589861869812, + "661": 0.43086305260658264, + "662": 0.3795575797557831, + "663": 0.3434537649154663, + "664": 0.30627480149269104, + "665": 0.3067757189273834, + "666": 0.3441285490989685, + "667": 0.37962791323661804, + "668": 0.35083821415901184, + "669": 0.3440358340740204, + "670": 0.3448472321033478, + "671": 0.3453265130519867, + "672": 0.3232404291629791, + "673": 0.3418600857257843, + "674": 0.34718480706214905, + "675": 0.3254640996456146, + "676": 0.2992471754550934, + "677": 0.30947911739349365, + "678": 0.2990650236606598, + "679": 0.3076228201389313, + "680": 0.29717665910720825, + "681": 0.2664761245250702, + "682": 0.324785441160202, + "683": 0.6659073829650879, + "684": 0.5112767815589905, + "685": 0.3169627785682678, + "686": 0.23131200671195984, + "687": 0.21421672403812408, + "688": 0.19934771955013275, + "689": 0.20982535183429718, + "690": 0.2480393946170807, + "691": 0.2140820473432541, + "692": 0.1506846398115158, + "693": 0.1834387481212616, + "694": 0.22321200370788574, + "695": 0.18303877115249634, + "696": 0.18095025420188904, + "697": 0.18474681675434113, + "698": 0.1542045921087265, + "699": 0.18722376227378845, + "700": 0.12826938927173615, + "701": 0.14469276368618011 + }, + "loss": { + "648": 2.9101409912109375, + "649": 2.8857178688049316, + "650": 2.80180025100708, + "651": 2.823981285095215, + "652": 2.8009486198425293, + "653": 2.7925796508789062, + "654": 2.8104872703552246, + "655": 2.8123631477355957, + "656": 2.7992591857910156, + "657": 2.7989237308502197, + "658": 2.8018665313720703, + "659": 2.78963565826416, + "660": 2.7830281257629395, + "661": 2.802065134048462, + "662": 2.7985219955444336, + "663": 2.7972140312194824, + "664": 2.769890546798706, + "665": 2.7823495864868164, + "666": 2.770477771759033, + "667": 2.7753307819366455, + "668": 2.7468605041503906, + "669": 2.7742881774902344, + "670": 2.7920119762420654, + "671": 2.7548999786376953, + "672": 2.7518343925476074, + "673": 2.778820037841797, + "674": 2.762505292892456, + "675": 2.7465221881866455, + "676": 2.74069881439209, + "677": 2.7722177505493164, + "678": 2.7563436031341553, + "679": 2.740978240966797, + "680": 2.7344717979431152, + "681": 2.7557859420776367, + "682": 2.778360366821289, + "683": 2.753188133239746, + "684": 2.7890872955322266, + "685": 2.7625885009765625, + "686": 2.7399306297302246, + "687": 2.7669248580932617, + "688": 2.743842124938965, + "689": 2.727412223815918, + "690": 2.735049247741699, + "691": 2.7443788051605225, + "692": 2.73051118850708, + "693": 2.724133014678955, + "694": 2.7359414100646973, + "695": 2.74139404296875, + "696": 2.734097480773926, + "697": 2.73106050491333, + "698": 2.742445707321167, + "699": 2.7274019718170166, + "700": 2.7255194187164307, + "701": 2.7200241088867188 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "train_epoch_time": 4.788705825805664, + "train_loss": 2.7261262793794083, + "train_score": 0.25663782269232926, + "val_loss": 2.7493379597822645, + "val_score": 0.24852002010895655 + }, + { + "epoch": 13, + "grad_norm": 0.16334109008312225, + "learning_rate": 0.3093333333333334, + "model_norm": 87.9638900756836, + "step_logs": { + "grad_norm": { + "702": 0.13187536597251892, + "703": 0.129169762134552, + "704": 0.14398512244224548, + "705": 0.19418491423130035, + "706": 0.2328021377325058, + "707": 0.24253851175308228, + "708": 0.23169022798538208, + "709": 0.20916405320167542, + "710": 0.1637910008430481, + "711": 0.15132522583007812, + "712": 0.16344644129276276, + "713": 0.15693415701389313, + "714": 0.16148744523525238, + "715": 0.18595477938652039, + "716": 0.19046254456043243, + "717": 0.2558163106441498, + "718": 0.2811249792575836, + "719": 0.2495967596769333, + "720": 0.21979032456874847, + "721": 0.21898052096366882, + "722": 0.2105591744184494, + "723": 0.25884491205215454, + "724": 0.22478868067264557, + "725": 0.2866717278957367, + "726": 0.2695862948894501, + "727": 0.16498975455760956, + "728": 0.17787492275238037, + "729": 0.19540585577487946, + "730": 0.22890213131904602, + "731": 0.3541260063648224, + "732": 0.2720671594142914, + "733": 0.2354908436536789, + "734": 0.14638392627239227, + "735": 0.1424023061990738, + "736": 0.16258752346038818, + "737": 0.1467246413230896, + "738": 0.2034228891134262, + "739": 0.19281704723834991, + "740": 0.1750289648771286, + "741": 0.15293042361736298, + "742": 0.12029548734426498, + "743": 0.16493894159793854, + "744": 0.15983934700489044, + "745": 0.2205306738615036, + "746": 0.2871588468551636, + "747": 0.3217866122722626, + "748": 0.36361536383628845, + "749": 0.28642401099205017, + "750": 0.23048517107963562, + "751": 0.21606577932834625, + "752": 0.1832922399044037, + "753": 0.15014003217220306, + "754": 0.13805142045021057, + "755": 0.16334109008312225 + }, + "loss": { + "702": 2.7210288047790527, + "703": 2.71040678024292, + "704": 2.7128617763519287, + "705": 2.7106144428253174, + "706": 2.7419934272766113, + "707": 2.7281219959259033, + "708": 2.7229061126708984, + "709": 2.6910910606384277, + "710": 2.703174114227295, + "711": 2.7133238315582275, + "712": 2.715045928955078, + "713": 2.7020325660705566, + "714": 2.735429286956787, + "715": 2.7259769439697266, + "716": 2.6886775493621826, + "717": 2.718719005584717, + "718": 2.7142601013183594, + "719": 2.7110254764556885, + "720": 2.7338175773620605, + "721": 2.703158378601074, + "722": 2.707919120788574, + "723": 2.7115983963012695, + "724": 2.7157111167907715, + "725": 2.7386438846588135, + "726": 2.6857311725616455, + "727": 2.6828131675720215, + "728": 2.7332677841186523, + "729": 2.6948840618133545, + "730": 2.695504665374756, + "731": 2.7100348472595215, + "732": 2.718676805496216, + "733": 2.700397491455078, + "734": 2.7122879028320312, + "735": 2.7046704292297363, + "736": 2.6769018173217773, + "737": 2.698110580444336, + "738": 2.697983503341675, + "739": 2.695675849914551, + "740": 2.6761045455932617, + "741": 2.6895012855529785, + "742": 2.690903663635254, + "743": 2.6934428215026855, + "744": 2.6842222213745117, + "745": 2.692636013031006, + "746": 2.6920151710510254, + "747": 2.699789047241211, + "748": 2.6843533515930176, + "749": 2.696812152862549, + "750": 2.6809630393981934, + "751": 2.6825451850891113, + "752": 2.687816619873047, + "753": 2.6947576999664307, + "754": 2.684507131576538, + "755": 2.681462526321411 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "train_epoch_time": 4.791311264038086, + "train_loss": 2.684370730904972, + "train_score": 0.26502757361492774, + "val_loss": 2.7086549054330855, + "val_score": 0.2551844140897525 + }, + { + "epoch": 14, + "grad_norm": 0.16301603615283966, + "learning_rate": 0.1546666666666667, + "model_norm": 87.96991729736328, + "step_logs": { + "grad_norm": { + "756": 0.1665090024471283, + "757": 0.15137776732444763, + "758": 0.14224958419799805, + "759": 0.17822395265102386, + "760": 0.1344822645187378, + "761": 0.13008250296115875, + "762": 0.121334969997406, + "763": 0.12930189073085785, + "764": 0.13042472302913666, + "765": 0.13586494326591492, + "766": 0.15217404067516327, + "767": 0.18378371000289917, + "768": 0.21408049762248993, + "769": 0.18494872748851776, + "770": 0.13941864669322968, + "771": 0.15975496172904968, + "772": 0.18425214290618896, + "773": 0.15536095201969147, + "774": 0.1300903707742691, + "775": 0.13136355578899384, + "776": 0.1413564831018448, + "777": 0.13182124495506287, + "778": 0.15004104375839233, + "779": 0.14577293395996094, + "780": 0.15609191358089447, + "781": 0.18846388161182404, + "782": 0.12447446584701538, + "783": 0.18269097805023193, + "784": 0.16804993152618408, + "785": 0.13790851831436157, + "786": 0.14341942965984344, + "787": 0.13903860747814178, + "788": 0.13624010980129242, + "789": 0.18383239209651947, + "790": 0.17174266278743744, + "791": 0.14851796627044678, + "792": 0.14885656535625458, + "793": 0.13885612785816193, + "794": 0.1830388605594635, + "795": 0.12799538671970367, + "796": 0.1385277658700943, + "797": 0.11917927861213684, + "798": 0.14329096674919128, + "799": 0.1376391053199768, + "800": 0.14844174683094025, + "801": 0.14833678305149078, + "802": 0.13612188398838043, + "803": 0.14774993062019348, + "804": 0.143861785531044, + "805": 0.13204163312911987, + "806": 0.13631823658943176, + "807": 0.13291941583156586, + "808": 0.1458321511745453, + "809": 0.16301603615283966 + }, + "loss": { + "756": 2.672776937484741, + "757": 2.677155017852783, + "758": 2.6865949630737305, + "759": 2.688605785369873, + "760": 2.652066707611084, + "761": 2.6863296031951904, + "762": 2.6873316764831543, + "763": 2.674668312072754, + "764": 2.674224615097046, + "765": 2.6821889877319336, + "766": 2.683260917663574, + "767": 2.677043914794922, + "768": 2.678922414779663, + "769": 2.6896395683288574, + "770": 2.6841695308685303, + "771": 2.695735454559326, + "772": 2.6675703525543213, + "773": 2.6772587299346924, + "774": 2.6956119537353516, + "775": 2.6756885051727295, + "776": 2.699130058288574, + "777": 2.6842451095581055, + "778": 2.653803586959839, + "779": 2.67105770111084, + "780": 2.676814556121826, + "781": 2.675985097885132, + "782": 2.6740026473999023, + "783": 2.6857800483703613, + "784": 2.677624225616455, + "785": 2.672084331512451, + "786": 2.654049873352051, + "787": 2.6669039726257324, + "788": 2.6923561096191406, + "789": 2.6649057865142822, + "790": 2.6838855743408203, + "791": 2.6713709831237793, + "792": 2.687340259552002, + "793": 2.6601858139038086, + "794": 2.66515851020813, + "795": 2.6606554985046387, + "796": 2.665588855743408, + "797": 2.6693201065063477, + "798": 2.669257402420044, + "799": 2.6660897731781006, + "800": 2.6808218955993652, + "801": 2.680556297302246, + "802": 2.6595144271850586, + "803": 2.6731250286102295, + "804": 2.6824917793273926, + "805": 2.676051616668701, + "806": 2.647117853164673, + "807": 2.681386709213257, + "808": 2.67110013961792, + "809": 2.6987640857696533 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "train_epoch_time": 4.7905213832855225, + "train_loss": 2.671449064763752, + "train_score": 0.2684305506765073, + "val_loss": 2.6970659364378413, + "val_score": 0.25746717090995386 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:34:54.268286", + "final_model_norm": 87.96991729736328, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:33:13.426974", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.255052924156189, + "learning_rate": 4.64e-11, + "model_norm": 87.92820739746094, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 6.244149684906006, + "3": 8.440503120422363, + "4": 16.79232406616211, + "5": 4.453291893005371, + "6": 3.6784656047821045, + "7": 3.89975905418396, + "8": 6.6434431076049805, + "9": 4.686246871948242, + "10": 4.644492149353027, + "11": 3.9039089679718018, + "12": 6.489428997039795, + "13": 4.205042362213135, + "14": 4.445206642150879, + "15": 24.46038246154785, + "16": 80.50111389160156, + "17": 4.711960792541504, + "18": 16.075946807861328, + "19": 12.762138366699219, + "20": 5.1727776527404785, + "21": 4.746818542480469, + "22": 6.861893653869629, + "23": 5.371772289276123, + "24": 5.298776626586914, + "25": 11.80709171295166, + "26": 9.1215181350708, + "27": 6.4599714279174805, + "28": 6.367079734802246, + "29": 5.568437099456787, + "30": 8.229534149169922, + "31": 14.150362014770508, + "32": 15.036437034606934, + "33": 8.959894180297852, + "34": 6.683808326721191, + "35": 4.862767219543457, + "36": 4.8636980056762695, + "37": 20.078683853149414, + "38": 3.1052157878875732, + "39": 2.5379903316497803, + "40": 1.894242763519287, + "41": 4.942265033721924, + "42": 13.566061973571777, + "43": 9.855283737182617, + "44": 5.184023380279541, + "45": 2.8199052810668945, + "46": 2.1188459396362305, + "47": 1.5920944213867188, + "48": 1.099333643913269, + "49": 1.2937748432159424, + "50": 1.3464598655700684, + "51": 1.9658997058868408, + "52": 3.244523286819458, + "53": 1.255052924156189 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.7835984230041504, + "3": 3.8444766998291016, + "4": 4.425124645233154, + "5": 4.850791931152344, + "6": 4.1621551513671875, + "7": 3.6580116748809814, + "8": 3.812798500061035, + "9": 4.489406585693359, + "10": 3.7161145210266113, + "11": 3.8169572353363037, + "12": 4.414116859436035, + "13": 4.110918045043945, + "14": 4.620980262756348, + "15": 6.789256572723389, + "16": 5.334522247314453, + "17": 5.190731048583984, + "18": 5.546748161315918, + "19": 10.742549896240234, + "20": 8.087642669677734, + "21": 5.821972370147705, + "22": 5.938457489013672, + "23": 8.681151390075684, + "24": 5.574102401733398, + "25": 6.620218276977539, + "26": 13.632080078125, + "27": 11.717260360717773, + "28": 8.583717346191406, + "29": 6.647965908050537, + "30": 8.752342224121094, + "31": 7.99849796295166, + "32": 14.822242736816406, + "33": 20.757707595825195, + "34": 18.493423461914062, + "35": 14.1813325881958, + "36": 8.413106918334961, + "37": 10.946731567382812, + "38": 8.983861923217773, + "39": 6.206698894500732, + "40": 4.411967754364014, + "41": 4.126418113708496, + "42": 10.625564575195312, + "43": 10.824403762817383, + "44": 9.947172164916992, + "45": 10.309802055358887, + "46": 8.01710319519043, + "47": 6.207764148712158, + "48": 5.267107009887695, + "49": 4.831733226776123, + "50": 4.455212116241455, + "51": 3.931161642074585, + "52": 4.3249993324279785, + "53": 4.623747825622559 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "train_epoch_time": 4.789364814758301, + "train_loss": 4.180719149984965, + "train_score": 0.1526105183055103, + "val_loss": 4.180160900755399, + "val_score": 0.15114182700861745 + }, + { + "epoch": 1, + "grad_norm": 0.7922757267951965, + "learning_rate": 0.464, + "model_norm": 87.93119812011719, + "step_logs": { + "grad_norm": { + "54": 1.3509052991867065, + "55": 1.072674036026001, + "56": 0.925609827041626, + "57": 0.8375693559646606, + "58": 1.6903800964355469, + "59": 1.0724360942840576, + "60": 0.7637794613838196, + "61": 1.409530520439148, + "62": 3.699445962905884, + "63": 1.0121699571609497, + "64": 0.9652178287506104, + "65": 0.9076206684112549, + "66": 0.7453747987747192, + "67": 0.7306337356567383, + "68": 1.1381064653396606, + "69": 0.9752975106239319, + "70": 0.6664982438087463, + "71": 0.7316480278968811, + "72": 1.0717217922210693, + "73": 0.8266065716743469, + "74": 0.20798030495643616, + "75": 0.24004042148590088, + "76": 0.3069019913673401, + "77": 0.3994874656200409, + "78": 0.6231363415718079, + "79": 0.6981455087661743, + "80": 0.9245103597640991, + "81": 0.9434074759483337, + "82": 0.9260461330413818, + "83": 0.8412745594978333, + "84": 0.6647123098373413, + "85": 0.7116034626960754, + "86": 0.9867504835128784, + "87": 0.8474131226539612, + "88": 0.5606356859207153, + "89": 0.998553454875946, + "90": 1.701401710510254, + "91": 1.2479462623596191, + "92": 0.8529561758041382, + "93": 0.4944674074649811, + "94": 0.49748218059539795, + "95": 0.593855619430542, + "96": 0.5744707584381104, + "97": 0.6321374773979187, + "98": 0.7070459127426147, + "99": 0.8582203388214111, + "100": 1.1194994449615479, + "101": 1.1126043796539307, + "102": 0.8445208668708801, + "103": 0.6512200832366943, + "104": 0.8280763626098633, + "105": 0.9169391393661499, + "106": 0.8577074408531189, + "107": 0.7922757267951965 + }, + "loss": { + "54": 4.181344985961914, + "55": 3.9017367362976074, + "56": 3.5386595726013184, + "57": 3.505521297454834, + "58": 3.5246646404266357, + "59": 3.856013298034668, + "60": 3.4496750831604004, + "61": 3.545161724090576, + "62": 3.93764066696167, + "63": 4.640109539031982, + "64": 4.22357702255249, + "65": 3.822941780090332, + "66": 3.55462646484375, + "67": 3.4582881927490234, + "68": 3.4612998962402344, + "69": 3.5940842628479004, + "70": 3.408729076385498, + "71": 3.454644203186035, + "72": 3.394103527069092, + "73": 3.568695068359375, + "74": 3.3560407161712646, + "75": 3.341549873352051, + "76": 3.3487112522125244, + "77": 3.3470993041992188, + "78": 3.36503267288208, + "79": 3.424377918243408, + "80": 3.398441791534424, + "81": 3.4952120780944824, + "82": 3.370410442352295, + "83": 3.4830679893493652, + "84": 3.373124599456787, + "85": 3.3951807022094727, + "86": 3.3787293434143066, + "87": 3.5009942054748535, + "88": 3.3334760665893555, + "89": 3.3573577404022217, + "90": 3.5122151374816895, + "91": 3.734975576400757, + "92": 3.4867894649505615, + "93": 3.3376197814941406, + "94": 3.3570609092712402, + "95": 3.3508875370025635, + "96": 3.3901641368865967, + "97": 3.3443620204925537, + "98": 3.3447489738464355, + "99": 3.3522868156433105, + "100": 3.426947832107544, + "101": 3.4931211471557617, + "102": 3.416281223297119, + "103": 3.3241302967071533, + "104": 3.3692924976348877, + "105": 3.385265588760376, + "106": 3.4028725624084473, + "107": 3.377155303955078 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "train_epoch_time": 4.787489175796509, + "train_loss": 3.3512276984697777, + "train_score": 0.16325546989225417, + "val_loss": 3.3691477931599665, + "val_score": 0.16154653442987207 + }, + { + "epoch": 2, + "grad_norm": 0.6837993264198303, + "learning_rate": 0.464, + "model_norm": 87.95731353759766, + "step_logs": { + "grad_norm": { + "108": 0.7729028463363647, + "109": 0.8577842712402344, + "110": 0.8788442015647888, + "111": 0.7919557094573975, + "112": 0.7402887940406799, + "113": 0.8491641283035278, + "114": 0.8003381490707397, + "115": 0.6720640659332275, + "116": 0.6806644201278687, + "117": 0.7202664613723755, + "118": 0.6973781585693359, + "119": 0.710938572883606, + "120": 0.7061183452606201, + "121": 0.6684529185295105, + "122": 0.5951650738716125, + "123": 0.5564042925834656, + "124": 0.5777530670166016, + "125": 0.603927731513977, + "126": 0.6163882613182068, + "127": 0.6118666529655457, + "128": 0.5854691863059998, + "129": 0.6328118443489075, + "130": 0.656987726688385, + "131": 0.6404881477355957, + "132": 0.6307449340820312, + "133": 0.6298400163650513, + "134": 0.6094688177108765, + "135": 0.5367652177810669, + "136": 0.5335031747817993, + "137": 0.4940011203289032, + "138": 0.4786943793296814, + "139": 0.5247909426689148, + "140": 0.5546916127204895, + "141": 0.6646736264228821, + "142": 0.6263370513916016, + "143": 0.5697901844978333, + "144": 0.5052744150161743, + "145": 0.4368767738342285, + "146": 0.4330008924007416, + "147": 0.37531915307044983, + "148": 0.39945143461227417, + "149": 0.4583311975002289, + "150": 0.4343625605106354, + "151": 0.42306220531463623, + "152": 0.4210350215435028, + "153": 0.48869818449020386, + "154": 0.5197953581809998, + "155": 0.5794380307197571, + "156": 0.534491240978241, + "157": 0.47136905789375305, + "158": 0.5203049182891846, + "159": 0.6415740847587585, + "160": 0.6665178537368774, + "161": 0.6837993264198303 + }, + "loss": { + "108": 3.343027353286743, + "109": 3.3579330444335938, + "110": 3.394780158996582, + "111": 3.352787971496582, + "112": 3.332044839859009, + "113": 3.349801540374756, + "114": 3.3971102237701416, + "115": 3.322275400161743, + "116": 3.332723379135132, + "117": 3.3307383060455322, + "118": 3.32688570022583, + "119": 3.3205206394195557, + "120": 3.341777801513672, + "121": 3.3131961822509766, + "122": 3.339186429977417, + "123": 3.288053512573242, + "124": 3.3153223991394043, + "125": 3.3331809043884277, + "126": 3.3103647232055664, + "127": 3.2892110347747803, + "128": 3.318519115447998, + "129": 3.31482195854187, + "130": 3.3041484355926514, + "131": 3.284627914428711, + "132": 3.2961630821228027, + "133": 3.2825570106506348, + "134": 3.298042058944702, + "135": 3.2494163513183594, + "136": 3.2979965209960938, + "137": 3.2948079109191895, + "138": 3.271838665008545, + "139": 3.283370018005371, + "140": 3.2520718574523926, + "141": 3.240216016769409, + "142": 3.2973480224609375, + "143": 3.2993502616882324, + "144": 3.2675976753234863, + "145": 3.252697229385376, + "146": 3.255619525909424, + "147": 3.23370099067688, + "148": 3.2278897762298584, + "149": 3.235509157180786, + "150": 3.2723007202148438, + "151": 3.2624588012695312, + "152": 3.2469358444213867, + "153": 3.2500171661376953, + "154": 3.268277645111084, + "155": 3.2700719833374023, + "156": 3.280336380004883, + "157": 3.257960319519043, + "158": 3.2380642890930176, + "159": 3.2370452880859375, + "160": 3.3121280670166016, + "161": 3.239056348800659 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "train_epoch_time": 4.787842035293579, + "train_loss": 3.2856115087375066, + "train_score": 0.12368633428981349, + "val_loss": 3.3047467003459907, + "val_score": 0.11694980645391889 + }, + { + "epoch": 3, + "grad_norm": 0.6413682699203491, + "learning_rate": 0.464, + "model_norm": 87.99307250976562, + "step_logs": { + "grad_norm": { + "162": 0.6811285018920898, + "163": 0.6921555995941162, + "164": 0.6935116052627563, + "165": 0.6758252382278442, + "166": 0.6695922613143921, + "167": 0.6933759450912476, + "168": 0.6095072031021118, + "169": 0.47101956605911255, + "170": 0.43185028433799744, + "171": 0.40144869685173035, + "172": 0.4124176800251007, + "173": 0.39040708541870117, + "174": 0.3865910470485687, + "175": 0.3941298723220825, + "176": 0.39663565158843994, + "177": 0.45247504115104675, + "178": 0.4442135691642761, + "179": 0.38476887345314026, + "180": 0.3770773708820343, + "181": 0.4056793749332428, + "182": 0.41073769330978394, + "183": 0.41719454526901245, + "184": 0.4165584146976471, + "185": 0.3881615698337555, + "186": 0.34230518341064453, + "187": 0.30825161933898926, + "188": 0.29052141308784485, + "189": 0.30808180570602417, + "190": 0.2942551374435425, + "191": 0.2962765395641327, + "192": 0.3311639726161957, + "193": 0.4142826497554779, + "194": 0.44628313183784485, + "195": 0.5034071207046509, + "196": 0.5449778437614441, + "197": 0.5193648934364319, + "198": 0.4610893726348877, + "199": 0.3979952931404114, + "200": 0.39102810621261597, + "201": 0.43607431650161743, + "202": 0.42258667945861816, + "203": 0.42442119121551514, + "204": 0.4446522891521454, + "205": 0.47429367899894714, + "206": 0.4473903179168701, + "207": 0.46548813581466675, + "208": 0.4778720736503601, + "209": 0.48176315426826477, + "210": 0.4720529615879059, + "211": 0.4752734899520874, + "212": 0.5729631185531616, + "213": 0.6581975221633911, + "214": 0.6999406814575195, + "215": 0.6413682699203491 + }, + "loss": { + "162": 3.2915611267089844, + "163": 3.259284257888794, + "164": 3.2547430992126465, + "165": 3.2619223594665527, + "166": 3.270458221435547, + "167": 3.2979471683502197, + "168": 3.280670166015625, + "169": 3.229694366455078, + "170": 3.2294466495513916, + "171": 3.2429614067077637, + "172": 3.2495737075805664, + "173": 3.172105312347412, + "174": 3.226919174194336, + "175": 3.1982710361480713, + "176": 3.193342924118042, + "177": 3.2205593585968018, + "178": 3.224392890930176, + "179": 3.1983819007873535, + "180": 3.2073307037353516, + "181": 3.2270095348358154, + "182": 3.199389934539795, + "183": 3.1954102516174316, + "184": 3.1985745429992676, + "185": 3.174147605895996, + "186": 3.2371246814727783, + "187": 3.188760280609131, + "188": 3.198084831237793, + "189": 3.1940371990203857, + "190": 3.1846954822540283, + "191": 3.1881535053253174, + "192": 3.1859548091888428, + "193": 3.209023952484131, + "194": 3.2014236450195312, + "195": 3.2187161445617676, + "196": 3.191277027130127, + "197": 3.192574977874756, + "198": 3.2317447662353516, + "199": 3.1665432453155518, + "200": 3.1838431358337402, + "201": 3.1726231575012207, + "202": 3.17507266998291, + "203": 3.195721387863159, + "204": 3.168273448944092, + "205": 3.1400740146636963, + "206": 3.188877582550049, + "207": 3.180706739425659, + "208": 3.1650032997131348, + "209": 3.1470534801483154, + "210": 3.189568042755127, + "211": 3.1283812522888184, + "212": 3.1814656257629395, + "213": 3.168051242828369, + "214": 3.2089481353759766, + "215": 3.174100399017334 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "train_epoch_time": 4.787900686264038, + "train_loss": 3.1774323951223145, + "train_score": 0.17367624640037202, + "val_loss": 3.19929821072435, + "val_score": 0.1669731274777521 + }, + { + "epoch": 4, + "grad_norm": 0.3717726171016693, + "learning_rate": 0.464, + "model_norm": 88.0340347290039, + "step_logs": { + "grad_norm": { + "216": 0.5664742588996887, + "217": 0.5571743249893188, + "218": 0.5474212169647217, + "219": 0.5158417820930481, + "220": 0.47308504581451416, + "221": 0.4393671452999115, + "222": 0.47447481751441956, + "223": 0.5323047041893005, + "224": 0.5777488350868225, + "225": 0.5992069244384766, + "226": 0.5938617587089539, + "227": 0.5525433421134949, + "228": 0.5450514554977417, + "229": 0.5197822451591492, + "230": 0.4942905604839325, + "231": 1.0696790218353271, + "232": 0.5042442083358765, + "233": 0.5214129686355591, + "234": 0.5233376622200012, + "235": 0.6291642785072327, + "236": 0.5193829536437988, + "237": 0.5011067986488342, + "238": 0.45605939626693726, + "239": 0.4285101294517517, + "240": 0.4953652322292328, + "241": 0.47314420342445374, + "242": 0.49985113739967346, + "243": 0.49697956442832947, + "244": 0.5121140480041504, + "245": 0.5653178691864014, + "246": 0.6336447596549988, + "247": 0.6101993918418884, + "248": 0.5862594246864319, + "249": 0.6142231225967407, + "250": 0.5897337794303894, + "251": 0.554370641708374, + "252": 0.5044097304344177, + "253": 0.4481051564216614, + "254": 0.43696653842926025, + "255": 0.48997175693511963, + "256": 0.5274620652198792, + "257": 0.5600911974906921, + "258": 0.5656968951225281, + "259": 0.538122832775116, + "260": 0.49221640825271606, + "261": 0.46678784489631653, + "262": 0.48531803488731384, + "263": 0.5195136666297913, + "264": 0.5314480662345886, + "265": 0.49032315611839294, + "266": 0.4635796844959259, + "267": 0.422687292098999, + "268": 0.3832295536994934, + "269": 0.3717726171016693 + }, + "loss": { + "216": 3.187713146209717, + "217": 3.1606132984161377, + "218": 3.1345620155334473, + "219": 3.1614551544189453, + "220": 3.1735520362854004, + "221": 3.1460280418395996, + "222": 3.1611826419830322, + "223": 3.1580722332000732, + "224": 3.1597862243652344, + "225": 3.128140687942505, + "226": 3.1753532886505127, + "227": 3.1484668254852295, + "228": 3.157589912414551, + "229": 3.124562978744507, + "230": 3.1389760971069336, + "231": 3.128298759460449, + "232": 3.1525557041168213, + "233": 3.1237292289733887, + "234": 3.1393890380859375, + "235": 3.1168925762176514, + "236": 3.144312858581543, + "237": 3.084429979324341, + "238": 3.133145570755005, + "239": 3.089230537414551, + "240": 3.1452391147613525, + "241": 3.107693672180176, + "242": 3.1169419288635254, + "243": 3.1070642471313477, + "244": 3.104316234588623, + "245": 3.1013269424438477, + "246": 3.135496139526367, + "247": 3.1035571098327637, + "248": 3.141105890274048, + "249": 3.138065814971924, + "250": 3.112060070037842, + "251": 3.082488536834717, + "252": 3.1439249515533447, + "253": 3.050626277923584, + "254": 3.086163282394409, + "255": 3.091043472290039, + "256": 3.107741594314575, + "257": 3.0875916481018066, + "258": 3.0926926136016846, + "259": 3.0839829444885254, + "260": 3.0700325965881348, + "261": 3.055593490600586, + "262": 3.070647716522217, + "263": 3.0679359436035156, + "264": 3.106372833251953, + "265": 3.05061674118042, + "266": 3.0802764892578125, + "267": 3.068589448928833, + "268": 3.0574254989624023, + "269": 3.0175788402557373 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "train_epoch_time": 4.788339138031006, + "train_loss": 3.032273162830851, + "train_score": 0.2070626344879183, + "val_loss": 3.0533037478559737, + "val_score": 0.20032200812609133 + }, + { + "epoch": 5, + "grad_norm": 0.5066221356391907, + "learning_rate": 0.464, + "model_norm": 88.08234405517578, + "step_logs": { + "grad_norm": { + "270": 0.3661964535713196, + "271": 0.33545616269111633, + "272": 0.32537969946861267, + "273": 0.42328375577926636, + "274": 0.469877153635025, + "275": 0.4564831256866455, + "276": 0.4466836750507355, + "277": 0.4794429540634155, + "278": 0.5175257325172424, + "279": 0.7766371965408325, + "280": 0.6751675605773926, + "281": 0.5785229206085205, + "282": 0.5068453550338745, + "283": 0.4388604462146759, + "284": 0.3626843988895416, + "285": 0.34235700964927673, + "286": 0.2892029285430908, + "287": 0.30587390065193176, + "288": 0.48994946479797363, + "289": 0.9075611233711243, + "290": 0.6866692900657654, + "291": 0.49183326959609985, + "292": 0.4632605016231537, + "293": 0.4485301673412323, + "294": 0.4725918769836426, + "295": 0.5011042952537537, + "296": 0.4562439024448395, + "297": 0.4210890233516693, + "298": 0.4249041676521301, + "299": 0.4469127357006073, + "300": 0.4342898428440094, + "301": 0.41566258668899536, + "302": 0.38698825240135193, + "303": 0.4272957444190979, + "304": 0.42278552055358887, + "305": 0.7098525166511536, + "306": 0.481454461812973, + "307": 0.4574628174304962, + "308": 0.37931933999061584, + "309": 0.41069918870925903, + "310": 0.4405274987220764, + "311": 0.46115586161613464, + "312": 0.4353087842464447, + "313": 0.45827800035476685, + "314": 0.5010109543800354, + "315": 0.5244488716125488, + "316": 0.4856067895889282, + "317": 0.5176835656166077, + "318": 0.5285840034484863, + "319": 0.5231578946113586, + "320": 0.4556329548358917, + "321": 0.43136730790138245, + "322": 0.49187132716178894, + "323": 0.5066221356391907 + }, + "loss": { + "270": 3.0431125164031982, + "271": 3.036886692047119, + "272": 3.011997699737549, + "273": 3.0410447120666504, + "274": 3.037245035171509, + "275": 3.03704571723938, + "276": 2.9901628494262695, + "277": 3.015126943588257, + "278": 3.0377092361450195, + "279": 3.031620979309082, + "280": 3.011617660522461, + "281": 2.985450506210327, + "282": 3.0058674812316895, + "283": 2.9588656425476074, + "284": 2.955876350402832, + "285": 2.942842721939087, + "286": 2.9260411262512207, + "287": 2.9128530025482178, + "288": 2.94216251373291, + "289": 2.9741129875183105, + "290": 3.0020530223846436, + "291": 2.945091962814331, + "292": 2.9349443912506104, + "293": 2.9089603424072266, + "294": 2.942735433578491, + "295": 2.9312779903411865, + "296": 2.9092414379119873, + "297": 2.9327523708343506, + "298": 2.9214487075805664, + "299": 2.898831605911255, + "300": 2.896671772003174, + "301": 2.909642457962036, + "302": 2.9002127647399902, + "303": 2.8919129371643066, + "304": 2.883169651031494, + "305": 2.9125776290893555, + "306": 2.9245355129241943, + "307": 2.8759021759033203, + "308": 2.8774468898773193, + "309": 2.8868656158447266, + "310": 2.8862600326538086, + "311": 2.8969311714172363, + "312": 2.866103410720825, + "313": 2.8573989868164062, + "314": 2.886366367340088, + "315": 2.868023633956909, + "316": 2.840973377227783, + "317": 2.875199794769287, + "318": 2.864121437072754, + "319": 2.853104591369629, + "320": 2.8590290546417236, + "321": 2.8588643074035645, + "322": 2.8435912132263184, + "323": 2.85921573638916 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "train_epoch_time": 4.788949251174927, + "train_loss": 2.854194995012653, + "train_score": 0.23856595222713958, + "val_loss": 2.885586470331308, + "val_score": 0.23270217408816385 + }, + { + "epoch": 6, + "grad_norm": 0.3864153325557709, + "learning_rate": 0.464, + "model_norm": 88.11467742919922, + "step_logs": { + "grad_norm": { + "324": 0.5820532441139221, + "325": 0.5842010378837585, + "326": 0.5383394360542297, + "327": 0.5198785662651062, + "328": 0.4956585466861725, + "329": 0.4951489567756653, + "330": 0.45185908675193787, + "331": 0.47827890515327454, + "332": 0.38754040002822876, + "333": 0.323329359292984, + "334": 0.31251347064971924, + "335": 0.327478289604187, + "336": 0.32662010192871094, + "337": 0.322544127702713, + "338": 0.27384263277053833, + "339": 0.32691052556037903, + "340": 0.47355544567108154, + "341": 0.519230306148529, + "342": 0.518526017665863, + "343": 0.47064462304115295, + "344": 0.4253055155277252, + "345": 0.4822605550289154, + "346": 0.49681612849235535, + "347": 0.5098667740821838, + "348": 0.5298381447792053, + "349": 0.5110343098640442, + "350": 0.45480433106422424, + "351": 0.47317343950271606, + "352": 0.48920899629592896, + "353": 0.4198671877384186, + "354": 0.36844274401664734, + "355": 0.3354429006576538, + "356": 0.344651460647583, + "357": 0.4050646424293518, + "358": 0.43140238523483276, + "359": 0.4874323606491089, + "360": 0.5904595851898193, + "361": 0.4948265254497528, + "362": 0.35415011644363403, + "363": 0.23451873660087585, + "364": 0.23185992240905762, + "365": 0.24718642234802246, + "366": 0.30144110321998596, + "367": 0.28273436427116394, + "368": 0.32693031430244446, + "369": 0.4312284290790558, + "370": 0.722140908241272, + "371": 1.135475516319275, + "372": 1.2115484476089478, + "373": 0.6165918707847595, + "374": 0.42361322045326233, + "375": 0.39272767305374146, + "376": 0.3820077180862427, + "377": 0.3864153325557709 + }, + "loss": { + "324": 2.875011444091797, + "325": 2.8820018768310547, + "326": 2.851672887802124, + "327": 2.847370147705078, + "328": 2.862888813018799, + "329": 2.8270115852355957, + "330": 2.8452157974243164, + "331": 2.8523366451263428, + "332": 2.8111085891723633, + "333": 2.8223836421966553, + "334": 2.794248104095459, + "335": 2.8467233180999756, + "336": 2.8062939643859863, + "337": 2.8120603561401367, + "338": 2.831743001937866, + "339": 2.810534715652466, + "340": 2.8184683322906494, + "341": 2.8421192169189453, + "342": 2.80037784576416, + "343": 2.8311898708343506, + "344": 2.8118858337402344, + "345": 2.8088016510009766, + "346": 2.8150391578674316, + "347": 2.8277053833007812, + "348": 2.8275394439697266, + "349": 2.820631504058838, + "350": 2.7985544204711914, + "351": 2.8206357955932617, + "352": 2.8019726276397705, + "353": 2.8063628673553467, + "354": 2.771907091140747, + "355": 2.768950939178467, + "356": 2.7780933380126953, + "357": 2.776686429977417, + "358": 2.786099910736084, + "359": 2.7869672775268555, + "360": 2.786921977996826, + "361": 2.798215627670288, + "362": 2.769136905670166, + "363": 2.783421039581299, + "364": 2.7585668563842773, + "365": 2.767833709716797, + "366": 2.722777843475342, + "367": 2.769777774810791, + "368": 2.7783427238464355, + "369": 2.7714133262634277, + "370": 2.800468921661377, + "371": 2.8357183933258057, + "372": 2.8647027015686035, + "373": 2.853703260421753, + "374": 2.783780097961426, + "375": 2.7631924152374268, + "376": 2.778256416320801, + "377": 2.7554824352264404 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "train_epoch_time": 4.789652109146118, + "train_loss": 2.763375854286949, + "train_score": 0.23777349358651695, + "val_loss": 2.785479051511132, + "val_score": 0.23190388184395777 + }, + { + "epoch": 7, + "grad_norm": 0.5307855606079102, + "learning_rate": 0.464, + "model_norm": 88.15125274658203, + "step_logs": { + "grad_norm": { + "378": 0.41669026017189026, + "379": 0.3636738657951355, + "380": 0.36276158690452576, + "381": 0.3732846677303314, + "382": 0.38989880681037903, + "383": 0.380859375, + "384": 0.4410206377506256, + "385": 0.48034048080444336, + "386": 0.49346110224723816, + "387": 0.5446798205375671, + "388": 0.5968800187110901, + "389": 0.574354887008667, + "390": 0.5114524364471436, + "391": 0.4674214720726013, + "392": 0.4792308509349823, + "393": 0.46935907006263733, + "394": 0.48663243651390076, + "395": 0.5008202791213989, + "396": 0.5456418395042419, + "397": 0.49052104353904724, + "398": 0.5148676633834839, + "399": 0.5309667587280273, + "400": 0.5169989466667175, + "401": 0.4969700574874878, + "402": 0.5844029784202576, + "403": 0.5727942585945129, + "404": 0.5252848267555237, + "405": 0.5402370095252991, + "406": 0.5842947959899902, + "407": 0.5709476470947266, + "408": 0.5912723541259766, + "409": 0.5950878262519836, + "410": 0.5892427563667297, + "411": 0.5410339832305908, + "412": 0.5276714563369751, + "413": 0.5643038153648376, + "414": 0.5468411445617676, + "415": 0.4935785233974457, + "416": 0.46643027663230896, + "417": 0.44591641426086426, + "418": 0.4631032943725586, + "419": 0.43627774715423584, + "420": 0.4355587959289551, + "421": 0.44590431451797485, + "422": 0.5502045750617981, + "423": 0.5576859712600708, + "424": 0.4811934530735016, + "425": 0.4225294888019562, + "426": 0.4164482355117798, + "427": 0.4014143645763397, + "428": 0.3887057900428772, + "429": 0.41492924094200134, + "430": 0.455078661441803, + "431": 0.5307855606079102 + }, + "loss": { + "378": 2.7654988765716553, + "379": 2.7577126026153564, + "380": 2.75124192237854, + "381": 2.737492561340332, + "382": 2.7479352951049805, + "383": 2.7489991188049316, + "384": 2.7637739181518555, + "385": 2.751579523086548, + "386": 2.777341604232788, + "387": 2.765752077102661, + "388": 2.770010471343994, + "389": 2.751984119415283, + "390": 2.784501552581787, + "391": 2.7589409351348877, + "392": 2.743619441986084, + "393": 2.7433459758758545, + "394": 2.733316421508789, + "395": 2.7305846214294434, + "396": 2.7404184341430664, + "397": 2.7314116954803467, + "398": 2.744497060775757, + "399": 2.7438461780548096, + "400": 2.730891227722168, + "401": 2.7344067096710205, + "402": 2.752774953842163, + "403": 2.746870279312134, + "404": 2.7604639530181885, + "405": 2.7230732440948486, + "406": 2.726456642150879, + "407": 2.7299156188964844, + "408": 2.748684883117676, + "409": 2.7507944107055664, + "410": 2.7420618534088135, + "411": 2.7365851402282715, + "412": 2.747551918029785, + "413": 2.746805191040039, + "414": 2.72627329826355, + "415": 2.7124593257904053, + "416": 2.717761278152466, + "417": 2.7175302505493164, + "418": 2.7076478004455566, + "419": 2.7017266750335693, + "420": 2.7102725505828857, + "421": 2.6964612007141113, + "422": 2.721733570098877, + "423": 2.725191593170166, + "424": 2.705355644226074, + "425": 2.6846797466278076, + "426": 2.6848511695861816, + "427": 2.692171096801758, + "428": 2.678511142730713, + "429": 2.685178279876709, + "430": 2.689267635345459, + "431": 2.7103657722473145 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "train_epoch_time": 4.790506362915039, + "train_loss": 2.7078548082491247, + "train_score": 0.23428196730025358, + "val_loss": 2.7219450424514053, + "val_score": 0.2261454150097789 + }, + { + "epoch": 8, + "grad_norm": 0.5217339992523193, + "learning_rate": 0.464, + "model_norm": 88.1882095336914, + "step_logs": { + "grad_norm": { + "432": 0.5504457354545593, + "433": 0.5215202569961548, + "434": 0.5113682150840759, + "435": 0.5124291777610779, + "436": 0.4730629622936249, + "437": 0.3750542104244232, + "438": 0.399244487285614, + "439": 0.49564921855926514, + "440": 0.4806249737739563, + "441": 0.5737437605857849, + "442": 0.6060463786125183, + "443": 0.4971201419830322, + "444": 0.45268091559410095, + "445": 0.4201361835002899, + "446": 0.3853554129600525, + "447": 0.402225524187088, + "448": 0.47125935554504395, + "449": 0.43564122915267944, + "450": 0.42454615235328674, + "451": 0.48441317677497864, + "452": 0.5431110262870789, + "453": 0.41885140538215637, + "454": 0.38179847598075867, + "455": 0.3720003366470337, + "456": 0.3804495632648468, + "457": 0.40889835357666016, + "458": 0.434234619140625, + "459": 0.4937080144882202, + "460": 0.4979003965854645, + "461": 0.47579073905944824, + "462": 0.5379064679145813, + "463": 0.5342559218406677, + "464": 0.5861862897872925, + "465": 0.5847064852714539, + "466": 0.5869342684745789, + "467": 0.565142810344696, + "468": 0.5108954906463623, + "469": 0.46083521842956543, + "470": 0.4709267318248749, + "471": 0.5020389556884766, + "472": 0.5066967606544495, + "473": 0.49101659655570984, + "474": 0.4528162479400635, + "475": 0.4593248963356018, + "476": 0.45641031861305237, + "477": 0.4859912693500519, + "478": 0.48071128129959106, + "479": 0.4857591986656189, + "480": 0.49276676774024963, + "481": 0.5025609731674194, + "482": 0.4995492994785309, + "483": 0.5039301514625549, + "484": 0.5286762714385986, + "485": 0.5217339992523193 + }, + "loss": { + "432": 2.724982500076294, + "433": 2.688483953475952, + "434": 2.6983938217163086, + "435": 2.7062883377075195, + "436": 2.685987949371338, + "437": 2.689300060272217, + "438": 2.662151336669922, + "439": 2.693514823913574, + "440": 2.685091972351074, + "441": 2.6877806186676025, + "442": 2.725755214691162, + "443": 2.6981377601623535, + "444": 2.7130467891693115, + "445": 2.6893796920776367, + "446": 2.6755263805389404, + "447": 2.687635898590088, + "448": 2.677579402923584, + "449": 2.681330680847168, + "450": 2.682194471359253, + "451": 2.670379400253296, + "452": 2.672797679901123, + "453": 2.6556596755981445, + "454": 2.662994384765625, + "455": 2.667182445526123, + "456": 2.665161609649658, + "457": 2.660501718521118, + "458": 2.6573829650878906, + "459": 2.661374568939209, + "460": 2.6760644912719727, + "461": 2.6895253658294678, + "462": 2.6641812324523926, + "463": 2.6613011360168457, + "464": 2.6649575233459473, + "465": 2.6734819412231445, + "466": 2.668853521347046, + "467": 2.6791417598724365, + "468": 2.680006980895996, + "469": 2.6658997535705566, + "470": 2.6568691730499268, + "471": 2.6597189903259277, + "472": 2.6498641967773438, + "473": 2.671576976776123, + "474": 2.660019874572754, + "475": 2.651566505432129, + "476": 2.656750440597534, + "477": 2.6441211700439453, + "478": 2.6541337966918945, + "479": 2.6541285514831543, + "480": 2.660074234008789, + "481": 2.6316864490509033, + "482": 2.67539644241333, + "483": 2.643857479095459, + "484": 2.6470913887023926, + "485": 2.6522140502929688 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "train_epoch_time": 4.79051399230957, + "train_loss": 2.658898579680937, + "train_score": 0.24920866209345535, + "val_loss": 2.6732382131088204, + "val_score": 0.24083309453814777 + }, + { + "epoch": 9, + "grad_norm": 0.5026582479476929, + "learning_rate": 0.464, + "model_norm": 88.22550964355469, + "step_logs": { + "grad_norm": { + "486": 0.5393915772438049, + "487": 0.48238617181777954, + "488": 0.4742048978805542, + "489": 0.46762123703956604, + "490": 0.4421769678592682, + "491": 0.41129744052886963, + "492": 0.3979552388191223, + "493": 0.3982887268066406, + "494": 0.4035964012145996, + "495": 0.4782795011997223, + "496": 0.5390520691871643, + "497": 0.575866162776947, + "498": 0.5617049336433411, + "499": 0.5540283918380737, + "500": 0.5278716087341309, + "501": 0.4533846080303192, + "502": 0.4401773512363434, + "503": 0.44022315740585327, + "504": 0.43612343072891235, + "505": 0.4604063034057617, + "506": 0.4742715656757355, + "507": 0.49274301528930664, + "508": 0.44611626863479614, + "509": 0.41245028376579285, + "510": 0.4085487425327301, + "511": 0.4094655215740204, + "512": 0.37654945254325867, + "513": 0.34507662057876587, + "514": 0.3232730031013489, + "515": 0.3307872414588928, + "516": 0.3872009515762329, + "517": 0.4701208472251892, + "518": 0.5640180110931396, + "519": 0.6066560745239258, + "520": 0.6159108281135559, + "521": 0.5913978219032288, + "522": 0.5769386887550354, + "523": 0.5220202803611755, + "524": 0.5326650142669678, + "525": 0.5230283141136169, + "526": 0.5230275392532349, + "527": 0.5344657301902771, + "528": 0.5188909769058228, + "529": 0.46427685022354126, + "530": 0.3971073627471924, + "531": 0.3990248143672943, + "532": 0.3895544111728668, + "533": 0.3791446387767792, + "534": 0.43812525272369385, + "535": 0.4937969744205475, + "536": 0.5510608553886414, + "537": 0.5472446084022522, + "538": 0.5284805297851562, + "539": 0.5026582479476929 + }, + "loss": { + "486": 2.6691908836364746, + "487": 2.654679775238037, + "488": 2.6492388248443604, + "489": 2.6355605125427246, + "490": 2.6162900924682617, + "491": 2.6369524002075195, + "492": 2.635028839111328, + "493": 2.636723041534424, + "494": 2.62257719039917, + "495": 2.6289432048797607, + "496": 2.6580381393432617, + "497": 2.638354778289795, + "498": 2.6574854850769043, + "499": 2.632498025894165, + "500": 2.6662492752075195, + "501": 2.627734661102295, + "502": 2.6417078971862793, + "503": 2.626744031906128, + "504": 2.631453275680542, + "505": 2.6300528049468994, + "506": 2.6451854705810547, + "507": 2.620011329650879, + "508": 2.6465907096862793, + "509": 2.6392581462860107, + "510": 2.617265224456787, + "511": 2.6251540184020996, + "512": 2.6369576454162598, + "513": 2.612780809402466, + "514": 2.6150434017181396, + "515": 2.596306800842285, + "516": 2.616943836212158, + "517": 2.6273908615112305, + "518": 2.633963108062744, + "519": 2.6436197757720947, + "520": 2.625952959060669, + "521": 2.6523561477661133, + "522": 2.6393675804138184, + "523": 2.6496291160583496, + "524": 2.6553778648376465, + "525": 2.633091926574707, + "526": 2.6248130798339844, + "527": 2.626410484313965, + "528": 2.618163585662842, + "529": 2.6059889793395996, + "530": 2.5993666648864746, + "531": 2.6000046730041504, + "532": 2.6114938259124756, + "533": 2.611501455307007, + "534": 2.617192268371582, + "535": 2.6252808570861816, + "536": 2.6304426193237305, + "537": 2.624691963195801, + "538": 2.6142218112945557, + "539": 2.6360414028167725 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "train_epoch_time": 4.789802074432373, + "train_loss": 2.6083524298975767, + "train_score": 0.2606415888414835, + "val_loss": 2.6212401179303804, + "val_score": 0.25407218662797376 + }, + { + "epoch": 10, + "grad_norm": 0.5591016411781311, + "learning_rate": 0.464, + "model_norm": 88.26213836669922, + "step_logs": { + "grad_norm": { + "540": 0.4353190064430237, + "541": 0.40878868103027344, + "542": 0.3924320340156555, + "543": 0.38761797547340393, + "544": 0.37114766240119934, + "545": 0.36142200231552124, + "546": 0.3414303958415985, + "547": 0.32516202330589294, + "548": 0.35573261976242065, + "549": 0.43056225776672363, + "550": 0.5892359614372253, + "551": 0.6880606412887573, + "552": 0.675864040851593, + "553": 0.5905904769897461, + "554": 0.4994680881500244, + "555": 0.48662200570106506, + "556": 0.486063152551651, + "557": 0.47349515557289124, + "558": 0.48976269364356995, + "559": 0.5170110464096069, + "560": 0.5126966834068298, + "561": 0.5223647356033325, + "562": 0.553849458694458, + "563": 0.5416709184646606, + "564": 0.5293968915939331, + "565": 0.5338631868362427, + "566": 0.5509567856788635, + "567": 0.5623188018798828, + "568": 0.5809409022331238, + "569": 0.5941538214683533, + "570": 0.6498664021492004, + "571": 0.6203831434249878, + "572": 0.5465275049209595, + "573": 0.5212960243225098, + "574": 0.5370823740959167, + "575": 0.4927505850791931, + "576": 0.43067026138305664, + "577": 0.423566073179245, + "578": 0.45962396264076233, + "579": 0.49076953530311584, + "580": 0.535290002822876, + "581": 0.5534805059432983, + "582": 0.6020460724830627, + "583": 0.6749753952026367, + "584": 0.712170422077179, + "585": 0.6982851624488831, + "586": 0.6124773025512695, + "587": 0.560810387134552, + "588": 0.5183064937591553, + "589": 0.5262145400047302, + "590": 0.5044381618499756, + "591": 0.5089525580406189, + "592": 0.5265288352966309, + "593": 0.5591016411781311 + }, + "loss": { + "540": 2.618333339691162, + "541": 2.593850612640381, + "542": 2.612354278564453, + "543": 2.6000542640686035, + "544": 2.5882675647735596, + "545": 2.6304752826690674, + "546": 2.605113983154297, + "547": 2.595273494720459, + "548": 2.590259075164795, + "549": 2.614989757537842, + "550": 2.609283447265625, + "551": 2.6401658058166504, + "552": 2.633363723754883, + "553": 2.619216203689575, + "554": 2.6152684688568115, + "555": 2.6202988624572754, + "556": 2.6098289489746094, + "557": 2.592923879623413, + "558": 2.580751657485962, + "559": 2.610132932662964, + "560": 2.5940983295440674, + "561": 2.6100430488586426, + "562": 2.6138992309570312, + "563": 2.640338897705078, + "564": 2.5961523056030273, + "565": 2.5897035598754883, + "566": 2.616889476776123, + "567": 2.598578929901123, + "568": 2.6096761226654053, + "569": 2.6189303398132324, + "570": 2.6292848587036133, + "571": 2.6067323684692383, + "572": 2.606438636779785, + "573": 2.563504695892334, + "574": 2.5901501178741455, + "575": 2.6033718585968018, + "576": 2.566986560821533, + "577": 2.5825204849243164, + "578": 2.5854532718658447, + "579": 2.59041690826416, + "580": 2.59226131439209, + "581": 2.600159168243408, + "582": 2.5773916244506836, + "583": 2.6025354862213135, + "584": 2.6265413761138916, + "585": 2.61869478225708, + "586": 2.619028091430664, + "587": 2.62429141998291, + "588": 2.595555067062378, + "589": 2.587710380554199, + "590": 2.5777010917663574, + "591": 2.593646764755249, + "592": 2.5727310180664062, + "593": 2.590376853942871 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "train_epoch_time": 4.7896130084991455, + "train_loss": 2.58598659425076, + "train_score": 0.26898986717720796, + "val_loss": 2.605555377515668, + "val_score": 0.2617905066700398 + }, + { + "epoch": 11, + "grad_norm": 0.6427961587905884, + "learning_rate": 0.464, + "model_norm": 88.30183410644531, + "step_logs": { + "grad_norm": { + "594": 0.6102558970451355, + "595": 0.5830957293510437, + "596": 0.5868234038352966, + "597": 0.5984905362129211, + "598": 0.5819385647773743, + "599": 0.587517499923706, + "600": 0.5767359137535095, + "601": 0.5563687086105347, + "602": 0.5310091376304626, + "603": 0.49605637788772583, + "604": 0.46206727623939514, + "605": 0.4455237090587616, + "606": 0.48990777134895325, + "607": 0.5666103363037109, + "608": 0.6441716551780701, + "609": 0.6580949425697327, + "610": 0.6400120258331299, + "611": 0.6008638739585876, + "612": 0.5176447033882141, + "613": 0.4860977530479431, + "614": 0.4765302836894989, + "615": 0.48777997493743896, + "616": 0.524976372718811, + "617": 0.6055616140365601, + "618": 0.6691855788230896, + "619": 0.6471148133277893, + "620": 0.6173555254936218, + "621": 0.6212912797927856, + "622": 0.5882620215415955, + "623": 0.5502585172653198, + "624": 0.5645357966423035, + "625": 0.5881019234657288, + "626": 0.597263753414154, + "627": 0.5757501125335693, + "628": 0.5418733358383179, + "629": 0.5492876768112183, + "630": 0.6006032228469849, + "631": 0.5849524140357971, + "632": 0.5334150195121765, + "633": 0.5865476131439209, + "634": 0.6621525883674622, + "635": 0.6650758981704712, + "636": 0.47196564078330994, + "637": 0.3868277966976166, + "638": 0.30416712164878845, + "639": 0.2245567888021469, + "640": 0.22286877036094666, + "641": 0.2879146635532379, + "642": 0.4127894341945648, + "643": 0.509972095489502, + "644": 0.6712988615036011, + "645": 0.7168768048286438, + "646": 0.6779754757881165, + "647": 0.6427961587905884 + }, + "loss": { + "594": 2.5911519527435303, + "595": 2.5892903804779053, + "596": 2.5860378742218018, + "597": 2.5969104766845703, + "598": 2.5911648273468018, + "599": 2.5752511024475098, + "600": 2.5722761154174805, + "601": 2.5954272747039795, + "602": 2.5621604919433594, + "603": 2.58282470703125, + "604": 2.5562098026275635, + "605": 2.5705299377441406, + "606": 2.5767621994018555, + "607": 2.5804409980773926, + "608": 2.5848984718322754, + "609": 2.5837836265563965, + "610": 2.5815038681030273, + "611": 2.598987579345703, + "612": 2.5651354789733887, + "613": 2.5845117568969727, + "614": 2.5606136322021484, + "615": 2.5808262825012207, + "616": 2.5814690589904785, + "617": 2.5747976303100586, + "618": 2.570241928100586, + "619": 2.6017098426818848, + "620": 2.565883159637451, + "621": 2.571183681488037, + "622": 2.583958864212036, + "623": 2.56392240524292, + "624": 2.584406614303589, + "625": 2.577767848968506, + "626": 2.576108932495117, + "627": 2.5505712032318115, + "628": 2.557628631591797, + "629": 2.5503249168395996, + "630": 2.5669751167297363, + "631": 2.5596976280212402, + "632": 2.5487265586853027, + "633": 2.5699877738952637, + "634": 2.582122325897217, + "635": 2.5851387977600098, + "636": 2.5492100715637207, + "637": 2.557218551635742, + "638": 2.5428402423858643, + "639": 2.5253586769104004, + "640": 2.5295419692993164, + "641": 2.5135114192962646, + "642": 2.527859926223755, + "643": 2.5553112030029297, + "644": 2.563213348388672, + "645": 2.6114673614501953, + "646": 2.575721263885498, + "647": 2.6093015670776367 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "train_epoch_time": 4.789430379867554, + "train_loss": 2.55411589491145, + "train_score": 0.2774053982203797, + "val_loss": 2.5694819216613243, + "val_score": 0.27186333996283335 + }, + { + "epoch": 12, + "grad_norm": 0.37160810828208923, + "learning_rate": 0.464, + "model_norm": 88.33002471923828, + "step_logs": { + "grad_norm": { + "648": 0.6033352017402649, + "649": 0.6375214457511902, + "650": 0.6416547298431396, + "651": 0.6237403750419617, + "652": 0.5660628080368042, + "653": 0.5259665846824646, + "654": 0.45734545588493347, + "655": 0.4149729311466217, + "656": 0.3758750855922699, + "657": 0.35036203265190125, + "658": 0.36929088830947876, + "659": 0.417061984539032, + "660": 0.4331158995628357, + "661": 0.4113219678401947, + "662": 0.4149918556213379, + "663": 0.39995384216308594, + "664": 0.3589474558830261, + "665": 0.3128139078617096, + "666": 0.28866633772850037, + "667": 0.2722757160663605, + "668": 0.265558123588562, + "669": 0.20236419141292572, + "670": 0.20753850042819977, + "671": 0.32348400354385376, + "672": 0.33176127076148987, + "673": 0.2808423340320587, + "674": 0.2504422664642334, + "675": 0.22587434947490692, + "676": 0.23982255160808563, + "677": 0.1987578123807907, + "678": 0.17108003795146942, + "679": 0.1660272777080536, + "680": 0.20091399550437927, + "681": 0.20667262375354767, + "682": 0.23717541992664337, + "683": 0.24786245822906494, + "684": 0.27075037360191345, + "685": 0.3327227532863617, + "686": 0.29253676533699036, + "687": 0.240653395652771, + "688": 0.21306094527244568, + "689": 0.16863195598125458, + "690": 0.16018062829971313, + "691": 0.17213071882724762, + "692": 0.16818664968013763, + "693": 0.18476681411266327, + "694": 0.2009366899728775, + "695": 0.20096440613269806, + "696": 0.2257307767868042, + "697": 0.15748992562294006, + "698": 0.18098805844783783, + "699": 0.23984956741333008, + "700": 0.2926294207572937, + "701": 0.37160810828208923 + }, + "loss": { + "648": 2.5429294109344482, + "649": 2.563180923461914, + "650": 2.5596656799316406, + "651": 2.583275079727173, + "652": 2.555635452270508, + "653": 2.5495266914367676, + "654": 2.527939558029175, + "655": 2.531337261199951, + "656": 2.5354232788085938, + "657": 2.5216381549835205, + "658": 2.526749610900879, + "659": 2.5052216053009033, + "660": 2.512017011642456, + "661": 2.547964096069336, + "662": 2.537106990814209, + "663": 2.531524181365967, + "664": 2.51654314994812, + "665": 2.522202968597412, + "666": 2.532649040222168, + "667": 2.4954707622528076, + "668": 2.5083999633789062, + "669": 2.525273561477661, + "670": 2.5156142711639404, + "671": 2.5051584243774414, + "672": 2.5163145065307617, + "673": 2.5254039764404297, + "674": 2.504227876663208, + "675": 2.5045571327209473, + "676": 2.5301895141601562, + "677": 2.5105018615722656, + "678": 2.508082866668701, + "679": 2.5128772258758545, + "680": 2.4989960193634033, + "681": 2.5019519329071045, + "682": 2.523742914199829, + "683": 2.5194005966186523, + "684": 2.511171579360962, + "685": 2.507780075073242, + "686": 2.50174617767334, + "687": 2.506113290786743, + "688": 2.496569871902466, + "689": 2.5184364318847656, + "690": 2.4979379177093506, + "691": 2.513768196105957, + "692": 2.4787063598632812, + "693": 2.4990909099578857, + "694": 2.5047292709350586, + "695": 2.5084941387176514, + "696": 2.512087821960449, + "697": 2.492587089538574, + "698": 2.4904065132141113, + "699": 2.5076804161071777, + "700": 2.49479079246521, + "701": 2.501924753189087 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "train_epoch_time": 4.791479825973511, + "train_loss": 2.5056555384713914, + "train_score": 0.28899973107547294, + "val_loss": 2.529800688221315, + "val_score": 0.2841875353992738 + }, + { + "epoch": 13, + "grad_norm": 0.1589818298816681, + "learning_rate": 0.3093333333333334, + "model_norm": 88.34654998779297, + "step_logs": { + "grad_norm": { + "702": 0.43228471279144287, + "703": 0.4150571823120117, + "704": 0.37235188484191895, + "705": 0.34717926383018494, + "706": 0.3039432466030121, + "707": 0.29926228523254395, + "708": 0.2916480600833893, + "709": 0.29013413190841675, + "710": 0.2985641360282898, + "711": 0.27594083547592163, + "712": 0.26492661237716675, + "713": 0.24489927291870117, + "714": 0.26873308420181274, + "715": 0.28540027141571045, + "716": 0.2636706531047821, + "717": 0.2658044993877411, + "718": 0.298604279756546, + "719": 0.3089991807937622, + "720": 0.29304563999176025, + "721": 0.28151825070381165, + "722": 0.30985164642333984, + "723": 0.3154941201210022, + "724": 0.2999439239501953, + "725": 0.300311803817749, + "726": 0.2792191803455353, + "727": 0.25215500593185425, + "728": 0.22900502383708954, + "729": 0.1993046998977661, + "730": 0.22680199146270752, + "731": 0.23790690302848816, + "732": 0.22285096347332, + "733": 0.23289717733860016, + "734": 0.23578020930290222, + "735": 0.24348080158233643, + "736": 0.21835733950138092, + "737": 0.2156786173582077, + "738": 0.24533380568027496, + "739": 0.21243096888065338, + "740": 0.23380620777606964, + "741": 0.2154991626739502, + "742": 0.21168167889118195, + "743": 0.20409566164016724, + "744": 0.17405036091804504, + "745": 0.17353741824626923, + "746": 0.1971268653869629, + "747": 0.18005278706550598, + "748": 0.17533138394355774, + "749": 0.18208526074886322, + "750": 0.19603769481182098, + "751": 0.17800000309944153, + "752": 0.15871545672416687, + "753": 0.1857120394706726, + "754": 0.1774788647890091, + "755": 0.1589818298816681 + }, + "loss": { + "702": 2.500382423400879, + "703": 2.48991060256958, + "704": 2.493062734603882, + "705": 2.5000734329223633, + "706": 2.495919704437256, + "707": 2.5012028217315674, + "708": 2.492306709289551, + "709": 2.500197410583496, + "710": 2.509784460067749, + "711": 2.501002311706543, + "712": 2.4673142433166504, + "713": 2.498098850250244, + "714": 2.4844164848327637, + "715": 2.514371395111084, + "716": 2.4912924766540527, + "717": 2.467846393585205, + "718": 2.4839487075805664, + "719": 2.4937055110931396, + "720": 2.484461784362793, + "721": 2.4859306812286377, + "722": 2.4986188411712646, + "723": 2.46124267578125, + "724": 2.483790159225464, + "725": 2.516995906829834, + "726": 2.4903712272644043, + "727": 2.4950814247131348, + "728": 2.480074405670166, + "729": 2.482741355895996, + "730": 2.4764699935913086, + "731": 2.5072567462921143, + "732": 2.4640614986419678, + "733": 2.4912025928497314, + "734": 2.4776031970977783, + "735": 2.4948501586914062, + "736": 2.4690709114074707, + "737": 2.4752299785614014, + "738": 2.4902820587158203, + "739": 2.4792089462280273, + "740": 2.476484775543213, + "741": 2.4966518878936768, + "742": 2.472245693206787, + "743": 2.455617666244507, + "744": 2.4649314880371094, + "745": 2.4670093059539795, + "746": 2.4900245666503906, + "747": 2.482882261276245, + "748": 2.475067138671875, + "749": 2.4585561752319336, + "750": 2.479755401611328, + "751": 2.4682862758636475, + "752": 2.478303909301758, + "753": 2.4845895767211914, + "754": 2.466521739959717, + "755": 2.483273506164551 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "train_epoch_time": 4.791181325912476, + "train_loss": 2.469497203553253, + "train_score": 0.3004393830018885, + "val_loss": 2.4955126393402485, + "val_score": 0.293143656408472 + }, + { + "epoch": 14, + "grad_norm": 0.1528676450252533, + "learning_rate": 0.1546666666666667, + "model_norm": 88.35224914550781, + "step_logs": { + "grad_norm": { + "756": 0.15225781500339508, + "757": 0.16851821541786194, + "758": 0.18109263479709625, + "759": 0.1627882570028305, + "760": 0.166079580783844, + "761": 0.20054063200950623, + "762": 0.17500917613506317, + "763": 0.16772787272930145, + "764": 0.17451803386211395, + "765": 0.1759415715932846, + "766": 0.17383219301700592, + "767": 0.15727967023849487, + "768": 0.1445913463830948, + "769": 0.16452628374099731, + "770": 0.1758730262517929, + "771": 0.13839362561702728, + "772": 0.1468462347984314, + "773": 0.15077322721481323, + "774": 0.17382842302322388, + "775": 0.16189800202846527, + "776": 0.16291821002960205, + "777": 0.15655256807804108, + "778": 0.15239010751247406, + "779": 0.17736950516700745, + "780": 0.15875975787639618, + "781": 0.17633116245269775, + "782": 0.16365593671798706, + "783": 0.16991767287254333, + "784": 0.16173149645328522, + "785": 0.16624715924263, + "786": 0.15952730178833008, + "787": 0.16946932673454285, + "788": 0.15996164083480835, + "789": 0.16099151968955994, + "790": 0.1575293093919754, + "791": 0.15854594111442566, + "792": 0.15482546389102936, + "793": 0.15897639095783234, + "794": 0.15980958938598633, + "795": 0.166922926902771, + "796": 0.15102678537368774, + "797": 0.15620110929012299, + "798": 0.14779940247535706, + "799": 0.14478686451911926, + "800": 0.15418539941310883, + "801": 0.16687539219856262, + "802": 0.1509760320186615, + "803": 0.14906252920627594, + "804": 0.1507200300693512, + "805": 0.16321063041687012, + "806": 0.14158639311790466, + "807": 0.1515313684940338, + "808": 0.15022914111614227, + "809": 0.1528676450252533 + }, + "loss": { + "756": 2.485177516937256, + "757": 2.471430778503418, + "758": 2.4800238609313965, + "759": 2.4690446853637695, + "760": 2.4698028564453125, + "761": 2.4735207557678223, + "762": 2.4731292724609375, + "763": 2.4609038829803467, + "764": 2.4538493156433105, + "765": 2.4573183059692383, + "766": 2.4712400436401367, + "767": 2.479620933532715, + "768": 2.486509084701538, + "769": 2.4522135257720947, + "770": 2.5024983882904053, + "771": 2.4455902576446533, + "772": 2.460141181945801, + "773": 2.463428258895874, + "774": 2.4634666442871094, + "775": 2.4514567852020264, + "776": 2.4770278930664062, + "777": 2.470583915710449, + "778": 2.459066390991211, + "779": 2.4778878688812256, + "780": 2.463188648223877, + "781": 2.443997621536255, + "782": 2.480278968811035, + "783": 2.4452319145202637, + "784": 2.475268840789795, + "785": 2.4804553985595703, + "786": 2.45963716506958, + "787": 2.467740535736084, + "788": 2.488633155822754, + "789": 2.442119836807251, + "790": 2.4509010314941406, + "791": 2.473454475402832, + "792": 2.4729321002960205, + "793": 2.4500319957733154, + "794": 2.45410418510437, + "795": 2.4627552032470703, + "796": 2.4616622924804688, + "797": 2.4415009021759033, + "798": 2.467247486114502, + "799": 2.4649853706359863, + "800": 2.4506001472473145, + "801": 2.4459095001220703, + "802": 2.4668049812316895, + "803": 2.467935085296631, + "804": 2.4737305641174316, + "805": 2.4719676971435547, + "806": 2.465332508087158, + "807": 2.464211940765381, + "808": 2.4637200832366943, + "809": 2.4400713443756104 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "train_epoch_time": 4.791383504867554, + "train_loss": 2.4618878629320493, + "train_score": 0.30231012381303257, + "val_loss": 2.4886534879182975, + "val_score": 0.2948523610659226 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:36:35.320815", + "final_model_norm": 88.35224914550781, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:34:54.417167", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": NaN, + "learning_rate": 1e-10, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 5.981522560119629, + "3": 7.244385242462158, + "4": 10.633097648620605, + "5": 4.144954681396484, + "6": 6.494001388549805, + "7": 2.9956462383270264, + "8": 14.424826622009277, + "9": 7.633819103240967, + "10": 149.88861083984375, + "11": 6.750445365905762, + "12": 21.091999053955078, + "13": 12.757149696350098, + "14": 5.891195774078369, + "15": 9.666069030761719, + "16": 15.902484893798828, + "17": 6.4736409187316895, + "18": 7.752840995788574, + "19": 6.990000247955322, + "20": 6.225035667419434, + "21": 4.53028678894043, + "22": 8.853759765625, + "23": 4.583296775817871, + "24": 2.9546868801116943, + "25": 3.1095683574676514, + "26": 9.110855102539062, + "27": 3.695751905441284, + "28": 2.2180662155151367, + "29": 1.878195881843567, + "30": 1.5109740495681763, + "31": 1.5007685422897339, + "32": 7.965774059295654, + "33": 6.4047369956970215, + "34": 3.1937880516052246, + "35": 2.0074331760406494, + "36": 7.5528483390808105, + "37": 2.627272844314575, + "38": 8.863873481750488, + "39": 2.131105661392212, + "40": 1.5022618770599365, + "41": 1.4835952520370483, + "42": 8.715837478637695, + "43": 2.574693441390991, + "44": 19.155643463134766, + "45": 36.39860534667969, + "46": 55.313697814941406, + "47": 321.8904113769531, + "48": 3299.11181640625, + "49": 794108.1875, + "50": NaN, + "51": NaN, + "52": NaN, + "53": NaN + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.793055772781372, + "3": 4.178531169891357, + "4": 4.204231262207031, + "5": 5.521515369415283, + "6": 4.58737850189209, + "7": 4.408707618713379, + "8": 5.3844895362854, + "9": 8.882173538208008, + "10": 15.804492950439453, + "11": 5.226345539093018, + "12": 21.98281478881836, + "13": 13.894100189208984, + "14": 14.051280975341797, + "15": 11.401083946228027, + "16": 12.105506896972656, + "17": 21.230819702148438, + "18": 17.732337951660156, + "19": 14.40357780456543, + "20": 11.373653411865234, + "21": 8.966316223144531, + "22": 9.833341598510742, + "23": 12.675477981567383, + "24": 9.089855194091797, + "25": 6.582707405090332, + "26": 6.475600242614746, + "27": 10.64722728729248, + "28": 9.009147644042969, + "29": 6.964444160461426, + "30": 5.376404762268066, + "31": 4.074130535125732, + "32": 4.895949363708496, + "33": 9.36358642578125, + "34": 8.311046600341797, + "35": 6.414035797119141, + "36": 6.510993003845215, + "37": 7.441115379333496, + "38": 6.660740852355957, + "39": 8.212377548217773, + "40": 6.214600563049316, + "41": 4.595078468322754, + "42": 5.635906219482422, + "43": 5.839105129241943, + "44": 20.69137954711914, + "45": 27.671417236328125, + "46": 103.21652221679688, + "47": 367.15301513671875, + "48": 39523.78125, + "49": 400313.9375, + "50": 266901840.0, + "51": NaN, + "52": NaN, + "53": NaN + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "train_epoch_time": 4.789042949676514, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 1, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "loss": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "train_epoch_time": 4.78561806678772, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 2, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "loss": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "train_epoch_time": 4.785696983337402, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 3, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "loss": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "train_epoch_time": 4.7856833934783936, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 4, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "loss": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "train_epoch_time": 4.785582780838013, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 5, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "loss": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "train_epoch_time": 4.785930156707764, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 6, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "loss": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "train_epoch_time": 4.785890817642212, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 7, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "loss": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "train_epoch_time": 4.7862937450408936, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 8, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "loss": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "train_epoch_time": 4.78559136390686, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 9, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "loss": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "train_epoch_time": 4.786235570907593, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 10, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "loss": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "train_epoch_time": 4.78626823425293, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 11, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "loss": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "train_epoch_time": 4.786235809326172, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 12, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "loss": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "train_epoch_time": 4.786622524261475, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 13, + "grad_norm": NaN, + "learning_rate": 0.6666666666666667, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "loss": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "train_epoch_time": 4.786588907241821, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 14, + "grad_norm": NaN, + "learning_rate": 0.33333333333333337, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "loss": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "train_epoch_time": 4.785744667053223, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:38:16.152381", + "final_model_norm": NaN, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:36:35.471996", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": NaN, + "learning_rate": 1e-10, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.240360736846924, + "3": 7.6079630851745605, + "4": 7.2784342765808105, + "5": 4.5539655685424805, + "6": 9.584453582763672, + "7": 5.157559871673584, + "8": 5.092408180236816, + "9": 39.324947357177734, + "10": 5.599187850952148, + "11": 48.273582458496094, + "12": 17.04164695739746, + "13": 13.871618270874023, + "14": 7.465798854827881, + "15": 7.114660263061523, + "16": 6.876247882843018, + "17": 5.5258564949035645, + "18": 17.15142822265625, + "19": 16.66469383239746, + "20": 10.560254096984863, + "21": 3.7484688758850098, + "22": 2.711054563522339, + "23": 1.9748886823654175, + "24": 2.641187906265259, + "25": 1.7857410907745361, + "26": 1.8030115365982056, + "27": 1.501802682876587, + "28": 1.1701369285583496, + "29": 1.241692066192627, + "30": 6.217986106872559, + "31": 2.257883071899414, + "32": 8.467720031738281, + "33": 3.28859543800354, + "34": 9.957716941833496, + "35": 21.710872650146484, + "36": 53.593353271484375, + "37": 354.18780517578125, + "38": 582.2900390625, + "39": 2131870.25, + "40": NaN, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN, + "51": NaN, + "52": NaN, + "53": NaN + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.7900640964508057, + "3": 4.2330145835876465, + "4": 4.0051774978637695, + "5": 4.980250358581543, + "6": 4.304337501525879, + "7": 4.8918304443359375, + "8": 5.82696533203125, + "9": 6.8483099937438965, + "10": 4.372932434082031, + "11": 18.85643768310547, + "12": 20.965839385986328, + "13": 19.575660705566406, + "14": 18.41077423095703, + "15": 16.630413055419922, + "16": 13.304697036743164, + "17": 8.574707984924316, + "18": 23.446083068847656, + "19": 50.79539489746094, + "20": 8.92165470123291, + "21": 12.349089622497559, + "22": 9.883639335632324, + "23": 7.176352024078369, + "24": 5.888399600982666, + "25": 5.101390838623047, + "26": 3.943868637084961, + "27": 4.28470516204834, + "28": 3.5186357498168945, + "29": 3.702208995819092, + "30": 4.546236991882324, + "31": 5.643336296081543, + "32": 4.949770450592041, + "33": 7.9264984130859375, + "34": 6.880195617675781, + "35": 19.588285446166992, + "36": 91.00701904296875, + "37": 21.428569793701172, + "38": 2507.474365234375, + "39": 224428.03125, + "40": 857502.375, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN, + "51": NaN, + "52": NaN, + "53": NaN + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "train_epoch_time": 4.79034948348999, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 1, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "loss": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "train_epoch_time": 4.78636622428894, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 2, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "loss": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "train_epoch_time": 4.787309646606445, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 3, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "loss": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "train_epoch_time": 4.786698818206787, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 4, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "loss": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "train_epoch_time": 4.787599802017212, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 5, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "loss": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "train_epoch_time": 4.788406610488892, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 6, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "loss": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "train_epoch_time": 4.789200305938721, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 7, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "loss": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "train_epoch_time": 4.789167881011963, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 8, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "loss": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "train_epoch_time": 4.789303779602051, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 9, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "loss": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "train_epoch_time": 4.789206266403198, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 10, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "loss": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "train_epoch_time": 4.789727687835693, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 11, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "loss": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "train_epoch_time": 4.789154767990112, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 12, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "loss": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "train_epoch_time": 4.790106773376465, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 13, + "grad_norm": NaN, + "learning_rate": 0.6666666666666667, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "loss": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "train_epoch_time": 4.789757966995239, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 14, + "grad_norm": NaN, + "learning_rate": 0.33333333333333337, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "loss": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "train_epoch_time": 4.788926839828491, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:39:57.039554", + "final_model_norm": NaN, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:38:16.310790", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "sgd", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": NaN, + "learning_rate": 1e-10, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 5.875208854675293, + "3": 7.603043079376221, + "4": 10.991902351379395, + "5": 4.112478733062744, + "6": 3.933785915374756, + "7": 3.6211907863616943, + "8": 10.141874313354492, + "9": 9.137262344360352, + "10": 71.74754333496094, + "11": 23.597883224487305, + "12": 4.8398027420043945, + "13": 17.30185317993164, + "14": 10.70186710357666, + "15": 6.08171272277832, + "16": 5.440594673156738, + "17": 13.690589904785156, + "18": 7.3728742599487305, + "19": 6.940519332885742, + "20": 4.886902809143066, + "21": 3.0086541175842285, + "22": 3.1613903045654297, + "23": 7.931731224060059, + "24": 2.958336591720581, + "25": 2.5366039276123047, + "26": 1.84986412525177, + "27": 1.9589768648147583, + "28": 14.245430946350098, + "29": 6.625199794769287, + "30": 33.479270935058594, + "31": 59.46356964111328, + "32": 113.09754180908203, + "33": 442.8728332519531, + "34": 4547.40234375, + "35": NaN, + "36": NaN, + "37": NaN, + "38": NaN, + "39": NaN, + "40": NaN, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN, + "51": NaN, + "52": NaN, + "53": NaN + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.771186351776123, + "3": 4.13394832611084, + "4": 4.27254581451416, + "5": 5.670802593231201, + "6": 4.482396602630615, + "7": 3.6988816261291504, + "8": 4.275219917297363, + "9": 5.822898864746094, + "10": 15.89161205291748, + "11": 17.373863220214844, + "12": 14.073184967041016, + "13": 14.631933212280273, + "14": 16.587650299072266, + "15": 15.770805358886719, + "16": 12.974701881408691, + "17": 13.431902885437012, + "18": 19.924354553222656, + "19": 15.68300724029541, + "20": 12.40634536743164, + "21": 8.940589904785156, + "22": 5.989598274230957, + "23": 6.098030090332031, + "24": 10.11153793334961, + "25": 7.325209140777588, + "26": 5.361544132232666, + "27": 3.7768332958221436, + "28": 10.208740234375, + "29": 8.026286125183105, + "30": 39.3283805847168, + "31": 122.84185791015625, + "32": 97.03656005859375, + "33": 1425.232666015625, + "34": 31620.880859375, + "35": 349572.59375, + "36": NaN, + "37": NaN, + "38": NaN, + "39": NaN, + "40": NaN, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN, + "51": NaN, + "52": NaN, + "53": NaN + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "train_epoch_time": 4.793226957321167, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 1, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "loss": { + "54": NaN, + "55": NaN, + "56": NaN, + "57": NaN, + "58": NaN, + "59": NaN, + "60": NaN, + "61": NaN, + "62": NaN, + "63": NaN, + "64": NaN, + "65": NaN, + "66": NaN, + "67": NaN, + "68": NaN, + "69": NaN, + "70": NaN, + "71": NaN, + "72": NaN, + "73": NaN, + "74": NaN, + "75": NaN, + "76": NaN, + "77": NaN, + "78": NaN, + "79": NaN, + "80": NaN, + "81": NaN, + "82": NaN, + "83": NaN, + "84": NaN, + "85": NaN, + "86": NaN, + "87": NaN, + "88": NaN, + "89": NaN, + "90": NaN, + "91": NaN, + "92": NaN, + "93": NaN, + "94": NaN, + "95": NaN, + "96": NaN, + "97": NaN, + "98": NaN, + "99": NaN, + "100": NaN, + "101": NaN, + "102": NaN, + "103": NaN, + "104": NaN, + "105": NaN, + "106": NaN, + "107": NaN + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "train_epoch_time": 4.78930401802063, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 2, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "loss": { + "108": NaN, + "109": NaN, + "110": NaN, + "111": NaN, + "112": NaN, + "113": NaN, + "114": NaN, + "115": NaN, + "116": NaN, + "117": NaN, + "118": NaN, + "119": NaN, + "120": NaN, + "121": NaN, + "122": NaN, + "123": NaN, + "124": NaN, + "125": NaN, + "126": NaN, + "127": NaN, + "128": NaN, + "129": NaN, + "130": NaN, + "131": NaN, + "132": NaN, + "133": NaN, + "134": NaN, + "135": NaN, + "136": NaN, + "137": NaN, + "138": NaN, + "139": NaN, + "140": NaN, + "141": NaN, + "142": NaN, + "143": NaN, + "144": NaN, + "145": NaN, + "146": NaN, + "147": NaN, + "148": NaN, + "149": NaN, + "150": NaN, + "151": NaN, + "152": NaN, + "153": NaN, + "154": NaN, + "155": NaN, + "156": NaN, + "157": NaN, + "158": NaN, + "159": NaN, + "160": NaN, + "161": NaN + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "train_epoch_time": 4.789414405822754, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 3, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "loss": { + "162": NaN, + "163": NaN, + "164": NaN, + "165": NaN, + "166": NaN, + "167": NaN, + "168": NaN, + "169": NaN, + "170": NaN, + "171": NaN, + "172": NaN, + "173": NaN, + "174": NaN, + "175": NaN, + "176": NaN, + "177": NaN, + "178": NaN, + "179": NaN, + "180": NaN, + "181": NaN, + "182": NaN, + "183": NaN, + "184": NaN, + "185": NaN, + "186": NaN, + "187": NaN, + "188": NaN, + "189": NaN, + "190": NaN, + "191": NaN, + "192": NaN, + "193": NaN, + "194": NaN, + "195": NaN, + "196": NaN, + "197": NaN, + "198": NaN, + "199": NaN, + "200": NaN, + "201": NaN, + "202": NaN, + "203": NaN, + "204": NaN, + "205": NaN, + "206": NaN, + "207": NaN, + "208": NaN, + "209": NaN, + "210": NaN, + "211": NaN, + "212": NaN, + "213": NaN, + "214": NaN, + "215": NaN + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "train_epoch_time": 4.789056062698364, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 4, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "loss": { + "216": NaN, + "217": NaN, + "218": NaN, + "219": NaN, + "220": NaN, + "221": NaN, + "222": NaN, + "223": NaN, + "224": NaN, + "225": NaN, + "226": NaN, + "227": NaN, + "228": NaN, + "229": NaN, + "230": NaN, + "231": NaN, + "232": NaN, + "233": NaN, + "234": NaN, + "235": NaN, + "236": NaN, + "237": NaN, + "238": NaN, + "239": NaN, + "240": NaN, + "241": NaN, + "242": NaN, + "243": NaN, + "244": NaN, + "245": NaN, + "246": NaN, + "247": NaN, + "248": NaN, + "249": NaN, + "250": NaN, + "251": NaN, + "252": NaN, + "253": NaN, + "254": NaN, + "255": NaN, + "256": NaN, + "257": NaN, + "258": NaN, + "259": NaN, + "260": NaN, + "261": NaN, + "262": NaN, + "263": NaN, + "264": NaN, + "265": NaN, + "266": NaN, + "267": NaN, + "268": NaN, + "269": NaN + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "train_epoch_time": 4.789112329483032, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 5, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "loss": { + "270": NaN, + "271": NaN, + "272": NaN, + "273": NaN, + "274": NaN, + "275": NaN, + "276": NaN, + "277": NaN, + "278": NaN, + "279": NaN, + "280": NaN, + "281": NaN, + "282": NaN, + "283": NaN, + "284": NaN, + "285": NaN, + "286": NaN, + "287": NaN, + "288": NaN, + "289": NaN, + "290": NaN, + "291": NaN, + "292": NaN, + "293": NaN, + "294": NaN, + "295": NaN, + "296": NaN, + "297": NaN, + "298": NaN, + "299": NaN, + "300": NaN, + "301": NaN, + "302": NaN, + "303": NaN, + "304": NaN, + "305": NaN, + "306": NaN, + "307": NaN, + "308": NaN, + "309": NaN, + "310": NaN, + "311": NaN, + "312": NaN, + "313": NaN, + "314": NaN, + "315": NaN, + "316": NaN, + "317": NaN, + "318": NaN, + "319": NaN, + "320": NaN, + "321": NaN, + "322": NaN, + "323": NaN + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "train_epoch_time": 4.788971662521362, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 6, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "loss": { + "324": NaN, + "325": NaN, + "326": NaN, + "327": NaN, + "328": NaN, + "329": NaN, + "330": NaN, + "331": NaN, + "332": NaN, + "333": NaN, + "334": NaN, + "335": NaN, + "336": NaN, + "337": NaN, + "338": NaN, + "339": NaN, + "340": NaN, + "341": NaN, + "342": NaN, + "343": NaN, + "344": NaN, + "345": NaN, + "346": NaN, + "347": NaN, + "348": NaN, + "349": NaN, + "350": NaN, + "351": NaN, + "352": NaN, + "353": NaN, + "354": NaN, + "355": NaN, + "356": NaN, + "357": NaN, + "358": NaN, + "359": NaN, + "360": NaN, + "361": NaN, + "362": NaN, + "363": NaN, + "364": NaN, + "365": NaN, + "366": NaN, + "367": NaN, + "368": NaN, + "369": NaN, + "370": NaN, + "371": NaN, + "372": NaN, + "373": NaN, + "374": NaN, + "375": NaN, + "376": NaN, + "377": NaN + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "train_epoch_time": 4.789371013641357, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 7, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "loss": { + "378": NaN, + "379": NaN, + "380": NaN, + "381": NaN, + "382": NaN, + "383": NaN, + "384": NaN, + "385": NaN, + "386": NaN, + "387": NaN, + "388": NaN, + "389": NaN, + "390": NaN, + "391": NaN, + "392": NaN, + "393": NaN, + "394": NaN, + "395": NaN, + "396": NaN, + "397": NaN, + "398": NaN, + "399": NaN, + "400": NaN, + "401": NaN, + "402": NaN, + "403": NaN, + "404": NaN, + "405": NaN, + "406": NaN, + "407": NaN, + "408": NaN, + "409": NaN, + "410": NaN, + "411": NaN, + "412": NaN, + "413": NaN, + "414": NaN, + "415": NaN, + "416": NaN, + "417": NaN, + "418": NaN, + "419": NaN, + "420": NaN, + "421": NaN, + "422": NaN, + "423": NaN, + "424": NaN, + "425": NaN, + "426": NaN, + "427": NaN, + "428": NaN, + "429": NaN, + "430": NaN, + "431": NaN + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "train_epoch_time": 4.788316488265991, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 8, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "loss": { + "432": NaN, + "433": NaN, + "434": NaN, + "435": NaN, + "436": NaN, + "437": NaN, + "438": NaN, + "439": NaN, + "440": NaN, + "441": NaN, + "442": NaN, + "443": NaN, + "444": NaN, + "445": NaN, + "446": NaN, + "447": NaN, + "448": NaN, + "449": NaN, + "450": NaN, + "451": NaN, + "452": NaN, + "453": NaN, + "454": NaN, + "455": NaN, + "456": NaN, + "457": NaN, + "458": NaN, + "459": NaN, + "460": NaN, + "461": NaN, + "462": NaN, + "463": NaN, + "464": NaN, + "465": NaN, + "466": NaN, + "467": NaN, + "468": NaN, + "469": NaN, + "470": NaN, + "471": NaN, + "472": NaN, + "473": NaN, + "474": NaN, + "475": NaN, + "476": NaN, + "477": NaN, + "478": NaN, + "479": NaN, + "480": NaN, + "481": NaN, + "482": NaN, + "483": NaN, + "484": NaN, + "485": NaN + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "train_epoch_time": 4.789828300476074, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 9, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "loss": { + "486": NaN, + "487": NaN, + "488": NaN, + "489": NaN, + "490": NaN, + "491": NaN, + "492": NaN, + "493": NaN, + "494": NaN, + "495": NaN, + "496": NaN, + "497": NaN, + "498": NaN, + "499": NaN, + "500": NaN, + "501": NaN, + "502": NaN, + "503": NaN, + "504": NaN, + "505": NaN, + "506": NaN, + "507": NaN, + "508": NaN, + "509": NaN, + "510": NaN, + "511": NaN, + "512": NaN, + "513": NaN, + "514": NaN, + "515": NaN, + "516": NaN, + "517": NaN, + "518": NaN, + "519": NaN, + "520": NaN, + "521": NaN, + "522": NaN, + "523": NaN, + "524": NaN, + "525": NaN, + "526": NaN, + "527": NaN, + "528": NaN, + "529": NaN, + "530": NaN, + "531": NaN, + "532": NaN, + "533": NaN, + "534": NaN, + "535": NaN, + "536": NaN, + "537": NaN, + "538": NaN, + "539": NaN + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "train_epoch_time": 4.788756370544434, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 10, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "loss": { + "540": NaN, + "541": NaN, + "542": NaN, + "543": NaN, + "544": NaN, + "545": NaN, + "546": NaN, + "547": NaN, + "548": NaN, + "549": NaN, + "550": NaN, + "551": NaN, + "552": NaN, + "553": NaN, + "554": NaN, + "555": NaN, + "556": NaN, + "557": NaN, + "558": NaN, + "559": NaN, + "560": NaN, + "561": NaN, + "562": NaN, + "563": NaN, + "564": NaN, + "565": NaN, + "566": NaN, + "567": NaN, + "568": NaN, + "569": NaN, + "570": NaN, + "571": NaN, + "572": NaN, + "573": NaN, + "574": NaN, + "575": NaN, + "576": NaN, + "577": NaN, + "578": NaN, + "579": NaN, + "580": NaN, + "581": NaN, + "582": NaN, + "583": NaN, + "584": NaN, + "585": NaN, + "586": NaN, + "587": NaN, + "588": NaN, + "589": NaN, + "590": NaN, + "591": NaN, + "592": NaN, + "593": NaN + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "train_epoch_time": 4.7892677783966064, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 11, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "loss": { + "594": NaN, + "595": NaN, + "596": NaN, + "597": NaN, + "598": NaN, + "599": NaN, + "600": NaN, + "601": NaN, + "602": NaN, + "603": NaN, + "604": NaN, + "605": NaN, + "606": NaN, + "607": NaN, + "608": NaN, + "609": NaN, + "610": NaN, + "611": NaN, + "612": NaN, + "613": NaN, + "614": NaN, + "615": NaN, + "616": NaN, + "617": NaN, + "618": NaN, + "619": NaN, + "620": NaN, + "621": NaN, + "622": NaN, + "623": NaN, + "624": NaN, + "625": NaN, + "626": NaN, + "627": NaN, + "628": NaN, + "629": NaN, + "630": NaN, + "631": NaN, + "632": NaN, + "633": NaN, + "634": NaN, + "635": NaN, + "636": NaN, + "637": NaN, + "638": NaN, + "639": NaN, + "640": NaN, + "641": NaN, + "642": NaN, + "643": NaN, + "644": NaN, + "645": NaN, + "646": NaN, + "647": NaN + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "train_epoch_time": 4.789634943008423, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 12, + "grad_norm": NaN, + "learning_rate": 1.0, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "loss": { + "648": NaN, + "649": NaN, + "650": NaN, + "651": NaN, + "652": NaN, + "653": NaN, + "654": NaN, + "655": NaN, + "656": NaN, + "657": NaN, + "658": NaN, + "659": NaN, + "660": NaN, + "661": NaN, + "662": NaN, + "663": NaN, + "664": NaN, + "665": NaN, + "666": NaN, + "667": NaN, + "668": NaN, + "669": NaN, + "670": NaN, + "671": NaN, + "672": NaN, + "673": NaN, + "674": NaN, + "675": NaN, + "676": NaN, + "677": NaN, + "678": NaN, + "679": NaN, + "680": NaN, + "681": NaN, + "682": NaN, + "683": NaN, + "684": NaN, + "685": NaN, + "686": NaN, + "687": NaN, + "688": NaN, + "689": NaN, + "690": NaN, + "691": NaN, + "692": NaN, + "693": NaN, + "694": NaN, + "695": NaN, + "696": NaN, + "697": NaN, + "698": NaN, + "699": NaN, + "700": NaN, + "701": NaN + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "train_epoch_time": 4.789050579071045, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 13, + "grad_norm": NaN, + "learning_rate": 0.6666666666666667, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "loss": { + "702": NaN, + "703": NaN, + "704": NaN, + "705": NaN, + "706": NaN, + "707": NaN, + "708": NaN, + "709": NaN, + "710": NaN, + "711": NaN, + "712": NaN, + "713": NaN, + "714": NaN, + "715": NaN, + "716": NaN, + "717": NaN, + "718": NaN, + "719": NaN, + "720": NaN, + "721": NaN, + "722": NaN, + "723": NaN, + "724": NaN, + "725": NaN, + "726": NaN, + "727": NaN, + "728": NaN, + "729": NaN, + "730": NaN, + "731": NaN, + "732": NaN, + "733": NaN, + "734": NaN, + "735": NaN, + "736": NaN, + "737": NaN, + "738": NaN, + "739": NaN, + "740": NaN, + "741": NaN, + "742": NaN, + "743": NaN, + "744": NaN, + "745": NaN, + "746": NaN, + "747": NaN, + "748": NaN, + "749": NaN, + "750": NaN, + "751": NaN, + "752": NaN, + "753": NaN, + "754": NaN, + "755": NaN + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "train_epoch_time": 4.789714097976685, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + }, + { + "epoch": 14, + "grad_norm": NaN, + "learning_rate": 0.33333333333333337, + "model_norm": NaN, + "step_logs": { + "grad_norm": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "loss": { + "756": NaN, + "757": NaN, + "758": NaN, + "759": NaN, + "760": NaN, + "761": NaN, + "762": NaN, + "763": NaN, + "764": NaN, + "765": NaN, + "766": NaN, + "767": NaN, + "768": NaN, + "769": NaN, + "770": NaN, + "771": NaN, + "772": NaN, + "773": NaN, + "774": NaN, + "775": NaN, + "776": NaN, + "777": NaN, + "778": NaN, + "779": NaN, + "780": NaN, + "781": NaN, + "782": NaN, + "783": NaN, + "784": NaN, + "785": NaN, + "786": NaN, + "787": NaN, + "788": NaN, + "789": NaN, + "790": NaN, + "791": NaN, + "792": NaN, + "793": NaN, + "794": NaN, + "795": NaN, + "796": NaN, + "797": NaN, + "798": NaN, + "799": NaN, + "800": NaN, + "801": NaN, + "802": NaN, + "803": NaN, + "804": NaN, + "805": NaN, + "806": NaN, + "807": NaN, + "808": NaN, + "809": NaN + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "train_epoch_time": 4.789852619171143, + "train_loss": NaN, + "train_score": 0.03529972203043817, + "val_loss": NaN, + "val_score": 0.038120694663781936 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-01 18:41:38.043501", + "final_model_norm": NaN, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-01 18:39:57.297166", + "step_scheduler_on_epoch": false + } + } +] \ No newline at end of file From 81797e62faa80d257c5df4e3e259f1d7ec20a94e Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Wed, 3 Dec 2025 11:51:53 +0100 Subject: [PATCH 5/6] run --- output/lr-stability/shakespeare-2.json | 43214 +++++++++++++++++++++++ output/lr-stability/shakespeare-3.json | 43214 +++++++++++++++++++++++ 2 files changed, 86428 insertions(+) create mode 100644 output/lr-stability/shakespeare-2.json create mode 100644 output/lr-stability/shakespeare-3.json diff --git a/output/lr-stability/shakespeare-2.json b/output/lr-stability/shakespeare-2.json new file mode 100644 index 0000000..3dca842 --- /dev/null +++ b/output/lr-stability/shakespeare-2.json @@ -0,0 +1,43214 @@ +[ + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.595483422279358, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.43165588378906, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 8.304182052612305, + "3": 5.066382884979248, + "4": 3.986539840698242, + "5": 7.368899822235107, + "6": 22.684249877929688, + "7": 8.206059455871582, + "8": 5.990283966064453, + "9": 3.6286511421203613, + "10": 2.7827494144439697, + "11": 4.019216060638428, + "12": 7.367673873901367, + "13": 5.671923637390137, + "14": 8.214241027832031, + "15": 5.449173450469971, + "16": 53.0404167175293, + "17": 5.609607696533203, + "18": 3.5228676795959473, + "19": 11.873223304748535, + "20": 3.7461884021759033, + "21": 56.21821594238281, + "22": 4.051365852355957, + "23": 5.570032119750977, + "24": 6.279479026794434, + "25": 15.953083992004395, + "26": 5.07693338394165, + "27": 8.598682403564453, + "28": 3.695465326309204, + "29": 39.91961669921875, + "30": 5.038580894470215, + "31": 4.063226699829102, + "32": 3.440243721008301, + "33": 3.7905149459838867, + "34": 4.257300853729248, + "35": 3.8865303993225098, + "36": 6.444921493530273, + "37": 3.5574936866760254, + "38": 28.470718383789062, + "39": 2.530970811843872, + "40": 2.914734363555908, + "41": 3.5695884227752686, + "42": 3.06233549118042, + "43": 3.487231492996216, + "44": 2.910644054412842, + "45": 1.67691171169281, + "46": 1.6890980005264282, + "47": 3.128803253173828, + "48": 2.8322150707244873, + "49": 1.7586274147033691, + "50": 1.6818804740905762, + "51": 2.382829189300537, + "52": 2.2293994426727295, + "53": 1.595483422279358 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.94942569732666, + "3": 3.737607002258301, + "4": 3.612809658050537, + "5": 3.569070816040039, + "6": 4.066908836364746, + "7": 4.043568134307861, + "8": 3.549619436264038, + "9": 3.492824077606201, + "10": 3.3548102378845215, + "11": 3.344089984893799, + "12": 3.4439680576324463, + "13": 3.403204917907715, + "14": 3.536405324935913, + "15": 3.2851245403289795, + "16": 4.4294633865356445, + "17": 3.292044162750244, + "18": 3.1227591037750244, + "19": 3.1227288246154785, + "20": 3.1710028648376465, + "21": 4.861227512359619, + "22": 3.012739896774292, + "23": 3.187997341156006, + "24": 3.2275547981262207, + "25": 3.138049602508545, + "26": 3.1043057441711426, + "27": 3.3548502922058105, + "28": 2.914957046508789, + "29": 3.808320999145508, + "30": 3.0150039196014404, + "31": 3.117154121398926, + "32": 2.905519485473633, + "33": 2.892486810684204, + "34": 2.9366321563720703, + "35": 3.109389305114746, + "36": 2.8898768424987793, + "37": 2.8421175479888916, + "38": 6.3293070793151855, + "39": 2.8310492038726807, + "40": 2.8002824783325195, + "41": 2.942443370819092, + "42": 2.8423304557800293, + "43": 2.849879503250122, + "44": 2.9626104831695557, + "45": 2.724215507507324, + "46": 2.678582191467285, + "47": 2.75137996673584, + "48": 2.9589619636535645, + "49": 2.7027854919433594, + "50": 2.6485424041748047, + "51": 2.6648268699645996, + "52": 2.7782740592956543, + "53": 2.632413387298584 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 0.00839976, + 0.00857642, + 0.0572717, + 0.145612, + 0.227328, + 0.0657279, + 0.00790343, + 0.0600476, + 0.0989207, + 0.265269, + 0.433231, + 0.207012, + 0.0634451, + 0.105786, + 0.0524116, + 0.110634, + 0.00157448, + 0.104617, + 0.25162, + 0.0221512, + 0.225953, + 0.00153813, + 0.183552, + 0.102755, + 0.0818515, + 0.0123302, + 0.120437, + 0.0453742, + 0.213449, + 0.0023898, + 0.11876, + 0.188806, + 0.245497, + 0.201314, + 0.162025, + 0.20585, + 0.0695735, + 0.224571, + 0.00780835, + 0.44195, + 0.329613, + 0.230925, + 0.303088, + 0.23435, + 0.349701, + 0.968771, + 0.938848, + 0.281057, + 0.368882, + 0.873904, + 0.936304, + 0.469335, + 0.558984, + 1.03412 + ], + "train_epoch_time": 5.876681566238403, + "train_loss": 2.619171995480399, + "train_score": 0.25954425208366755, + "val_loss": 2.6478476132919537, + "val_score": 0.25241281593434434 + }, + { + "epoch": 1, + "grad_norm": 1.7122350931167603, + "learning_rate": 0.1, + "model_norm": 87.44861602783203, + "step_logs": { + "grad_norm": { + "54": 1.4994385242462158, + "55": 1.755557656288147, + "56": 2.5974295139312744, + "57": 2.6195151805877686, + "58": 2.341726779937744, + "59": 2.764500856399536, + "60": 2.719050168991089, + "61": 2.9073119163513184, + "62": 2.706298351287842, + "63": 2.1449856758117676, + "64": 1.3226447105407715, + "65": 1.3035261631011963, + "66": 1.8220620155334473, + "67": 2.0108823776245117, + "68": 2.291959524154663, + "69": 1.9728983640670776, + "70": 1.2969900369644165, + "71": 1.5286873579025269, + "72": 2.0283780097961426, + "73": 2.1360676288604736, + "74": 2.1657683849334717, + "75": 1.932334065437317, + "76": 1.7503284215927124, + "77": 2.015733242034912, + "78": 2.3730719089508057, + "79": 2.149228572845459, + "80": 1.7569128274917603, + "81": 1.89297354221344, + "82": 2.1936445236206055, + "83": 2.1091713905334473, + "84": 1.765440821647644, + "85": 1.8004003763198853, + "86": 1.8538652658462524, + "87": 1.8449907302856445, + "88": 1.8676313161849976, + "89": 1.710210919380188, + "90": 1.4306646585464478, + "91": 1.5061683654785156, + "92": 1.8027716875076294, + "93": 1.7483417987823486, + "94": 1.6187306642532349, + "95": 1.6917165517807007, + "96": 1.9544671773910522, + "97": 1.7831960916519165, + "98": 1.5010552406311035, + "99": 1.5395915508270264, + "100": 1.8150750398635864, + "101": 1.779003381729126, + "102": 1.6438370943069458, + "103": 1.6328306198120117, + "104": 1.665625810623169, + "105": 1.6750128269195557, + "106": 1.617784857749939, + "107": 1.7122350931167603 + }, + "loss": { + "54": 2.612858533859253, + "55": 2.6396708488464355, + "56": 2.6654889583587646, + "57": 2.8374953269958496, + "58": 2.677858352661133, + "59": 2.7912232875823975, + "60": 2.718667984008789, + "61": 2.8346023559570312, + "62": 2.71724271774292, + "63": 2.7834033966064453, + "64": 2.5961713790893555, + "65": 2.5818214416503906, + "66": 2.6074576377868652, + "67": 2.678248643875122, + "68": 2.6233010292053223, + "69": 2.725496768951416, + "70": 2.5729541778564453, + "71": 2.5963242053985596, + "72": 2.633251428604126, + "73": 2.642148971557617, + "74": 2.6354236602783203, + "75": 2.683126449584961, + "76": 2.57817006111145, + "77": 2.649172306060791, + "78": 2.6319797039031982, + "79": 2.7170534133911133, + "80": 2.5920491218566895, + "81": 2.6264402866363525, + "82": 2.59210205078125, + "83": 2.6878161430358887, + "84": 2.592118263244629, + "85": 2.6103262901306152, + "86": 2.588736057281494, + "87": 2.6495399475097656, + "88": 2.565886974334717, + "89": 2.646350860595703, + "90": 2.56858491897583, + "91": 2.581890106201172, + "92": 2.570685386657715, + "93": 2.620863437652588, + "94": 2.5663914680480957, + "95": 2.6002283096313477, + "96": 2.569908380508423, + "97": 2.6125473976135254, + "98": 2.5333504676818848, + "99": 2.573561191558838, + "100": 2.541639804840088, + "101": 2.618934154510498, + "102": 2.5519614219665527, + "103": 2.6009321212768555, + "104": 2.5399415493011475, + "105": 2.5924625396728516, + "106": 2.528653144836426, + "107": 2.614945411682129 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 1.16214, + 0.856485, + 0.395084, + 0.413517, + 0.488332, + 0.365226, + 0.367724, + 0.335358, + 0.371003, + 0.604961, + 1.48404, + 1.51945, + 0.7854, + 0.662335, + 0.499384, + 0.700223, + 1.52953, + 1.11102, + 0.640021, + 0.579065, + 0.561858, + 0.718583, + 0.841536, + 0.651995, + 0.46737, + 0.588211, + 0.839736, + 0.732957, + 0.538666, + 0.604193, + 0.831665, + 0.805298, + 0.753237, + 0.778363, + 0.735623, + 0.904791, + 1.25493, + 1.13813, + 0.790984, + 0.857416, + 0.979431, + 0.908566, + 0.672761, + 0.82161, + 1.12435, + 1.08573, + 0.77148, + 0.827506, + 0.944401, + 0.975544, + 0.915522, + 0.924009, + 0.966157, + 0.89194 + ], + "train_epoch_time": 5.049004793167114, + "train_loss": 2.554169839287078, + "train_score": 0.2546673241869107, + "val_loss": 2.5931542007301758, + "val_score": 0.25052920456047584 + }, + { + "epoch": 2, + "grad_norm": 1.5213649272918701, + "learning_rate": 0.1, + "model_norm": 87.4622573852539, + "step_logs": { + "grad_norm": { + "108": 1.7698161602020264, + "109": 1.6863858699798584, + "110": 1.4905215501785278, + "111": 1.484915852546692, + "112": 1.5513427257537842, + "113": 1.5944156646728516, + "114": 1.670814871788025, + "115": 1.6587823629379272, + "116": 1.5405527353286743, + "117": 1.5328104496002197, + "118": 1.5365923643112183, + "119": 1.8154313564300537, + "120": 2.2267980575561523, + "121": 1.9328863620758057, + "122": 1.616337537765503, + "123": 1.5008413791656494, + "124": 1.6445186138153076, + "125": 1.6981024742126465, + "126": 1.7186658382415771, + "127": 1.6501388549804688, + "128": 1.4911203384399414, + "129": 1.481259822845459, + "130": 1.7012332677841187, + "131": 1.6902801990509033, + "132": 1.4128209352493286, + "133": 1.392470359802246, + "134": 1.4971853494644165, + "135": 1.559562087059021, + "136": 1.5591846704483032, + "137": 1.5602431297302246, + "138": 1.621742844581604, + "139": 1.6631306409835815, + "140": 1.6960033178329468, + "141": 1.6355992555618286, + "142": 1.5226027965545654, + "143": 1.499666452407837, + "144": 1.4234119653701782, + "145": 1.4620394706726074, + "146": 1.5172392129898071, + "147": 1.505304217338562, + "148": 1.5338846445083618, + "149": 1.565070629119873, + "150": 1.707519292831421, + "151": 1.6376944780349731, + "152": 1.4706141948699951, + "153": 1.5124589204788208, + "154": 1.70602285861969, + "155": 1.7617076635360718, + "156": 1.6470904350280762, + "157": 1.5048695802688599, + "158": 1.423397183418274, + "159": 1.3689265251159668, + "160": 1.4615544080734253, + "161": 1.5213649272918701 + }, + "loss": { + "108": 2.5415701866149902, + "109": 2.580026149749756, + "110": 2.5348711013793945, + "111": 2.571901321411133, + "112": 2.5279598236083984, + "113": 2.5806150436401367, + "114": 2.53316068649292, + "115": 2.5662384033203125, + "116": 2.538095474243164, + "117": 2.5629286766052246, + "118": 2.542416572570801, + "119": 2.5777578353881836, + "120": 2.5772297382354736, + "121": 2.6121902465820312, + "122": 2.5644259452819824, + "123": 2.5671091079711914, + "124": 2.541909694671631, + "125": 2.562094211578369, + "126": 2.5294177532196045, + "127": 2.5802440643310547, + "128": 2.5231287479400635, + "129": 2.5453009605407715, + "130": 2.5176773071289062, + "131": 2.5860157012939453, + "132": 2.5463199615478516, + "133": 2.5190093517303467, + "134": 2.5388855934143066, + "135": 2.557464122772217, + "136": 2.550321578979492, + "137": 2.535327911376953, + "138": 2.535480260848999, + "139": 2.546576976776123, + "140": 2.5351390838623047, + "141": 2.5489518642425537, + "142": 2.511795997619629, + "143": 2.5451183319091797, + "144": 2.538144588470459, + "145": 2.524411201477051, + "146": 2.5095009803771973, + "147": 2.537546157836914, + "148": 2.5233800411224365, + "149": 2.5372838973999023, + "150": 2.5321202278137207, + "151": 2.5746989250183105, + "152": 2.5178050994873047, + "153": 2.534905433654785, + "154": 2.538670778274536, + "155": 2.5623373985290527, + "156": 2.530207633972168, + "157": 2.5415940284729004, + "158": 2.5066914558410645, + "159": 2.528236150741577, + "160": 2.4945034980773926, + "161": 2.5192644596099854 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 0.81142, + 0.907215, + 1.14098, + 1.16641, + 1.0504, + 1.01513, + 0.907415, + 0.932649, + 1.06944, + 1.09084, + 1.07679, + 0.782136, + 0.519746, + 0.699185, + 0.981581, + 1.13966, + 0.939902, + 0.88852, + 0.856323, + 0.947589, + 1.13479, + 1.16005, + 0.869906, + 0.905136, + 1.27567, + 1.29915, + 1.13264, + 1.05149, + 1.04906, + 1.04148, + 0.964043, + 0.92067, + 0.88135, + 0.952813, + 1.08346, + 1.13167, + 1.25272, + 1.18098, + 1.09013, + 1.11986, + 1.0725, + 1.03586, + 0.868466, + 0.959977, + 1.16419, + 1.10814, + 0.872241, + 0.825598, + 0.932656, + 1.1223, + 1.23722, + 1.34914, + 1.16776, + 1.08845 + ], + "train_epoch_time": 5.04582142829895, + "train_loss": 2.5082411634699686, + "train_score": 0.26282841634203064, + "val_loss": 2.5498671890263167, + "val_score": 0.2559916757209703 + }, + { + "epoch": 3, + "grad_norm": 1.4472090005874634, + "learning_rate": 0.1, + "model_norm": 87.4747314453125, + "step_logs": { + "grad_norm": { + "162": 1.4126492738723755, + "163": 1.2858518362045288, + "164": 1.2448664903640747, + "165": 1.3287618160247803, + "166": 1.4017678499221802, + "167": 1.5514181852340698, + "168": 1.6390444040298462, + "169": 1.5579659938812256, + "170": 1.422958493232727, + "171": 1.3877211809158325, + "172": 1.399069905281067, + "173": 1.4332078695297241, + "174": 1.4144973754882812, + "175": 1.3887548446655273, + "176": 1.3306553363800049, + "177": 1.2968372106552124, + "178": 1.2731873989105225, + "179": 1.2861305475234985, + "180": 1.5044138431549072, + "181": 1.6309152841567993, + "182": 1.5537265539169312, + "183": 1.4571703672409058, + "184": 1.4838138818740845, + "185": 1.5193510055541992, + "186": 1.525577425956726, + "187": 1.5352483987808228, + "188": 1.338010549545288, + "189": 1.2651041746139526, + "190": 1.3229663372039795, + "191": 1.3557943105697632, + "192": 1.3098673820495605, + "193": 1.3461573123931885, + "194": 1.3851524591445923, + "195": 1.4443306922912598, + "196": 1.5883374214172363, + "197": 1.6625257730484009, + "198": 1.7239654064178467, + "199": 1.640328049659729, + "200": 1.651151418685913, + "201": 1.4805493354797363, + "202": 1.3065520524978638, + "203": 1.4018487930297852, + "204": 1.5423328876495361, + "205": 1.6838067770004272, + "206": 1.9795117378234863, + "207": 1.6968928575515747, + "208": 1.1992686986923218, + "209": 1.0217435359954834, + "210": 1.064129114151001, + "211": 1.1120821237564087, + "212": 1.2089051008224487, + "213": 1.3125848770141602, + "214": 1.4519954919815063, + "215": 1.4472090005874634 + }, + "loss": { + "162": 2.5166265964508057, + "163": 2.518171787261963, + "164": 2.482665777206421, + "165": 2.5133309364318848, + "166": 2.5067520141601562, + "167": 2.525150775909424, + "168": 2.5074658393859863, + "169": 2.532771587371826, + "170": 2.507209062576294, + "171": 2.5089704990386963, + "172": 2.5103073120117188, + "173": 2.5220656394958496, + "174": 2.490757942199707, + "175": 2.4923863410949707, + "176": 2.505175828933716, + "177": 2.494217872619629, + "178": 2.4662699699401855, + "179": 2.5162346363067627, + "180": 2.5031118392944336, + "181": 2.5442981719970703, + "182": 2.502875804901123, + "183": 2.5245161056518555, + "184": 2.511025905609131, + "185": 2.5248634815216064, + "186": 2.5211057662963867, + "187": 2.5176310539245605, + "188": 2.505277633666992, + "189": 2.512939214706421, + "190": 2.5019590854644775, + "191": 2.4912538528442383, + "192": 2.486813545227051, + "193": 2.4861788749694824, + "194": 2.4946980476379395, + "195": 2.492351531982422, + "196": 2.5079643726348877, + "197": 2.5245511531829834, + "198": 2.4963221549987793, + "199": 2.5263419151306152, + "200": 2.527980327606201, + "201": 2.5093507766723633, + "202": 2.489234685897827, + "203": 2.4962615966796875, + "204": 2.505506992340088, + "205": 2.530435085296631, + "206": 2.530974864959717, + "207": 2.546389579772949, + "208": 2.4833383560180664, + "209": 2.4681429862976074, + "210": 2.463146209716797, + "211": 2.4733996391296387, + "212": 2.4631106853485107, + "213": 2.49599027633667, + "214": 2.499584674835205, + "215": 2.5120723247528076 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 1.2611, + 1.52301, + 1.60204, + 1.42349, + 1.27573, + 1.04913, + 0.93337, + 1.04347, + 1.23824, + 1.30284, + 1.28247, + 1.22783, + 1.24488, + 1.2923, + 1.41484, + 1.48308, + 1.52144, + 1.52118, + 1.10598, + 0.956545, + 1.03679, + 1.18893, + 1.14049, + 1.09376, + 1.08323, + 1.06816, + 1.39938, + 1.57011, + 1.42949, + 1.35528, + 1.4494, + 1.37196, + 1.30024, + 1.19475, + 0.994113, + 0.913371, + 0.839931, + 0.938925, + 0.927257, + 1.14476, + 1.45818, + 1.27025, + 1.05327, + 0.892505, + 0.645909, + 0.884333, + 1.72664, + 2.36421, + 2.17521, + 1.99996, + 1.68539, + 1.44873, + 1.1856, + 1.19942 + ], + "train_epoch_time": 5.043837070465088, + "train_loss": 2.4864724651812824, + "train_score": 0.26887329614145344, + "val_loss": 2.529483837320391, + "val_score": 0.25956156747212505 + }, + { + "epoch": 4, + "grad_norm": 1.6333067417144775, + "learning_rate": 0.1, + "model_norm": 87.48685455322266, + "step_logs": { + "grad_norm": { + "216": 1.4290608167648315, + "217": 1.3120534420013428, + "218": 1.2739180326461792, + "219": 1.3524606227874756, + "220": 1.4517425298690796, + "221": 1.5836673974990845, + "222": 1.5921634435653687, + "223": 1.4788835048675537, + "224": 1.2752031087875366, + "225": 1.182509422302246, + "226": 1.2438864707946777, + "227": 1.4129899740219116, + "228": 1.379360318183899, + "229": 1.234757661819458, + "230": 1.2055987119674683, + "231": 1.3568651676177979, + "232": 1.4966015815734863, + "233": 1.531385898590088, + "234": 1.359263300895691, + "235": 1.1495434045791626, + "236": 1.3062193393707275, + "237": 1.463045358657837, + "238": 1.5421591997146606, + "239": 1.5483026504516602, + "240": 1.4234495162963867, + "241": 1.3835184574127197, + "242": 1.3792067766189575, + "243": 1.365871787071228, + "244": 1.2870206832885742, + "245": 1.3198952674865723, + "246": 1.2530943155288696, + "247": 1.2332710027694702, + "248": 1.1639885902404785, + "249": 1.1980524063110352, + "250": 1.4111249446868896, + "251": 1.5256085395812988, + "252": 1.6563024520874023, + "253": 1.634125828742981, + "254": 1.647292137145996, + "255": 1.6125293970108032, + "256": 1.488387107849121, + "257": 1.505018949508667, + "258": 1.3945235013961792, + "259": 1.3868354558944702, + "260": 1.5193884372711182, + "261": 1.4111015796661377, + "262": 1.33878493309021, + "263": 1.4832243919372559, + "264": 1.6625736951828003, + "265": 1.4743620157241821, + "266": 1.3665927648544312, + "267": 1.350955605506897, + "268": 1.4537014961242676, + "269": 1.6333067417144775 + }, + "loss": { + "216": 2.498208522796631, + "217": 2.5084166526794434, + "218": 2.468519687652588, + "219": 2.4700491428375244, + "220": 2.4857678413391113, + "221": 2.507571220397949, + "222": 2.4933125972747803, + "223": 2.5257890224456787, + "224": 2.4855804443359375, + "225": 2.4715840816497803, + "226": 2.481645107269287, + "227": 2.4919357299804688, + "228": 2.4991374015808105, + "229": 2.4684383869171143, + "230": 2.467231273651123, + "231": 2.4887256622314453, + "232": 2.5055744647979736, + "233": 2.4857892990112305, + "234": 2.510507822036743, + "235": 2.4743242263793945, + "236": 2.4503369331359863, + "237": 2.500880002975464, + "238": 2.4847540855407715, + "239": 2.491593360900879, + "240": 2.477583885192871, + "241": 2.4990100860595703, + "242": 2.469034433364868, + "243": 2.484649896621704, + "244": 2.465479850769043, + "245": 2.483989953994751, + "246": 2.451329231262207, + "247": 2.469083070755005, + "248": 2.4642605781555176, + "249": 2.4573283195495605, + "250": 2.4803404808044434, + "251": 2.485412359237671, + "252": 2.482370615005493, + "253": 2.516460657119751, + "254": 2.4841811656951904, + "255": 2.5117170810699463, + "256": 2.4905197620391846, + "257": 2.499704360961914, + "258": 2.460371255874634, + "259": 2.47316837310791, + "260": 2.477318048477173, + "261": 2.4979257583618164, + "262": 2.482491970062256, + "263": 2.4682559967041016, + "264": 2.4702749252319336, + "265": 2.4952211380004883, + "266": 2.4528074264526367, + "267": 2.4531466960906982, + "268": 2.457468032836914, + "269": 2.48698091506958 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 1.22328, + 1.45712, + 1.52109, + 1.35038, + 1.17946, + 0.999828, + 0.983561, + 1.15486, + 1.52851, + 1.76753, + 1.6039, + 1.24813, + 1.31351, + 1.61904, + 1.69748, + 1.35177, + 1.11865, + 1.05997, + 1.3588, + 1.87243, + 1.43613, + 1.16836, + 1.04478, + 1.03936, + 1.22277, + 1.30556, + 1.29798, + 1.33182, + 1.48844, + 1.42584, + 1.56111, + 1.62337, + 1.81882, + 1.71203, + 1.2456, + 1.06785, + 0.904872, + 0.942365, + 0.915466, + 0.965952, + 1.12424, + 1.10358, + 1.26517, + 1.28589, + 1.07311, + 1.25448, + 1.38505, + 1.12196, + 0.893683, + 1.14789, + 1.31336, + 1.34413, + 1.16289, + 0.93226 + ], + "train_epoch_time": 5.047112941741943, + "train_loss": 2.4842419149543837, + "train_score": 0.25807142210656636, + "val_loss": 2.520354661821087, + "val_score": 0.2501973307912851 + }, + { + "epoch": 5, + "grad_norm": 1.5142675638198853, + "learning_rate": 0.1, + "model_norm": 87.50237274169922, + "step_logs": { + "grad_norm": { + "270": 1.6810544729232788, + "271": 1.5603219270706177, + "272": 1.3195146322250366, + "273": 1.1578401327133179, + "274": 1.2478570938110352, + "275": 1.3786864280700684, + "276": 1.5500452518463135, + "277": 1.421463131904602, + "278": 1.341933250427246, + "279": 1.4265083074569702, + "280": 1.3541568517684937, + "281": 1.3280987739562988, + "282": 1.4996294975280762, + "283": 1.4291025400161743, + "284": 1.2934948205947876, + "285": 1.3371226787567139, + "286": 1.463035225868225, + "287": 1.5002892017364502, + "288": 1.5061780214309692, + "289": 1.704079508781433, + "290": 1.914076566696167, + "291": 1.7685819864273071, + "292": 1.3837261199951172, + "293": 1.202583909034729, + "294": 1.2350960969924927, + "295": 1.3258447647094727, + "296": 1.4913440942764282, + "297": 1.6742933988571167, + "298": 1.4741325378417969, + "299": 1.2376271486282349, + "300": 1.3532912731170654, + "301": 1.407514214515686, + "302": 1.258879542350769, + "303": 1.1456117630004883, + "304": 1.2588986158370972, + "305": 1.4454586505889893, + "306": 1.8084183931350708, + "307": 1.835208773612976, + "308": 1.47889244556427, + "309": 1.2990059852600098, + "310": 1.4059151411056519, + "311": 1.3239109516143799, + "312": 1.2116364240646362, + "313": 1.2776561975479126, + "314": 1.3373874425888062, + "315": 1.380043625831604, + "316": 1.561672568321228, + "317": 1.5787192583084106, + "318": 1.5495904684066772, + "319": 1.6338167190551758, + "320": 1.7478995323181152, + "321": 1.904791235923767, + "322": 1.7893041372299194, + "323": 1.5142675638198853 + }, + "loss": { + "270": 2.4693410396575928, + "271": 2.474403142929077, + "272": 2.481935977935791, + "273": 2.4520368576049805, + "274": 2.4503657817840576, + "275": 2.452192783355713, + "276": 2.4742250442504883, + "277": 2.4822051525115967, + "278": 2.4493818283081055, + "279": 2.4777278900146484, + "280": 2.456301212310791, + "281": 2.4464101791381836, + "282": 2.4542148113250732, + "283": 2.4563732147216797, + "284": 2.4444618225097656, + "285": 2.4467639923095703, + "286": 2.4570093154907227, + "287": 2.4427075386047363, + "288": 2.4584643840789795, + "289": 2.4551587104797363, + "290": 2.4839529991149902, + "291": 2.4930782318115234, + "292": 2.438288688659668, + "293": 2.4505019187927246, + "294": 2.404829502105713, + "295": 2.4518871307373047, + "296": 2.4218006134033203, + "297": 2.45648193359375, + "298": 2.4494688510894775, + "299": 2.416492462158203, + "300": 2.4349756240844727, + "301": 2.4601359367370605, + "302": 2.423393726348877, + "303": 2.4048194885253906, + "304": 2.4216668605804443, + "305": 2.426604747772217, + "306": 2.4317641258239746, + "307": 2.487464189529419, + "308": 2.4466681480407715, + "309": 2.4164767265319824, + "310": 2.409547805786133, + "311": 2.440345287322998, + "312": 2.389723300933838, + "313": 2.4066061973571777, + "314": 2.394221067428589, + "315": 2.4129481315612793, + "316": 2.428553581237793, + "317": 2.4317996501922607, + "318": 2.4167392253875732, + "319": 2.428300142288208, + "320": 2.4225542545318604, + "321": 2.444823741912842, + "322": 2.4715967178344727, + "323": 2.408566474914551 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 0.873811, + 1.01635, + 1.42548, + 1.82907, + 1.57362, + 1.2901, + 1.02979, + 1.22848, + 1.36017, + 1.2176, + 1.3395, + 1.38697, + 1.0913, + 1.20273, + 1.46101, + 1.36852, + 1.14788, + 1.08523, + 1.08371, + 0.845473, + 0.677992, + 0.79705, + 1.27346, + 1.69443, + 1.57646, + 1.39481, + 1.08889, + 0.876295, + 1.12719, + 1.57763, + 1.32957, + 1.24181, + 1.52917, + 1.83235, + 1.52803, + 1.16142, + 0.743573, + 0.73856, + 1.11867, + 1.43206, + 1.21904, + 1.3923, + 1.62781, + 1.47427, + 1.3386, + 1.26696, + 0.99579, + 0.975704, + 1.00646, + 0.909695, + 0.79294, + 0.673834, + 0.771986, + 1.0504 + ], + "train_epoch_time": 5.054343223571777, + "train_loss": 2.4141224595703386, + "train_score": 0.2681223097969504, + "val_loss": 2.4734828012271537, + "val_score": 0.25829237227324914 + }, + { + "epoch": 6, + "grad_norm": 1.3155577182769775, + "learning_rate": 0.1, + "model_norm": 87.51831817626953, + "step_logs": { + "grad_norm": { + "324": 1.418066382408142, + "325": 1.26797616481781, + "326": 1.3308789730072021, + "327": 1.3658920526504517, + "328": 1.509848952293396, + "329": 1.6191020011901855, + "330": 1.5451252460479736, + "331": 1.455051064491272, + "332": 1.3848249912261963, + "333": 1.2380965948104858, + "334": 1.2768330574035645, + "335": 1.2965487241744995, + "336": 1.532166838645935, + "337": 1.5866661071777344, + "338": 1.4262930154800415, + "339": 1.3868048191070557, + "340": 1.3542104959487915, + "341": 1.3543922901153564, + "342": 1.6262602806091309, + "343": 1.951874852180481, + "344": 2.2120168209075928, + "345": 2.171908140182495, + "346": 1.7381994724273682, + "347": 1.292962908744812, + "348": 1.3616982698440552, + "349": 1.5318586826324463, + "350": 1.463059425354004, + "351": 1.53459632396698, + "352": 1.513016700744629, + "353": 1.4959349632263184, + "354": 1.5368945598602295, + "355": 1.5273686647415161, + "356": 1.4757351875305176, + "357": 1.4687684774398804, + "358": 1.3882992267608643, + "359": 1.2901830673217773, + "360": 1.3259265422821045, + "361": 1.401950478553772, + "362": 1.4129544496536255, + "363": 1.2871674299240112, + "364": 1.2704095840454102, + "365": 1.3863569498062134, + "366": 1.3463984727859497, + "367": 1.3396457433700562, + "368": 1.609025001525879, + "369": 1.774939775466919, + "370": 1.7160730361938477, + "371": 1.55253005027771, + "372": 1.293172836303711, + "373": 1.4918750524520874, + "374": 1.6320565938949585, + "375": 1.781571388244629, + "376": 1.6311960220336914, + "377": 1.3155577182769775 + }, + "loss": { + "324": 2.3906726837158203, + "325": 2.409987449645996, + "326": 2.3870949745178223, + "327": 2.3950319290161133, + "328": 2.412079334259033, + "329": 2.393211841583252, + "330": 2.3995373249053955, + "331": 2.4038217067718506, + "332": 2.3917741775512695, + "333": 2.382768154144287, + "334": 2.362889289855957, + "335": 2.37251615524292, + "336": 2.414001703262329, + "337": 2.415994167327881, + "338": 2.389451026916504, + "339": 2.3947300910949707, + "340": 2.380363941192627, + "341": 2.383960247039795, + "342": 2.397738218307495, + "343": 2.403510093688965, + "344": 2.4402947425842285, + "345": 2.438345432281494, + "346": 2.4622936248779297, + "347": 2.3993618488311768, + "348": 2.4011447429656982, + "349": 2.3996105194091797, + "350": 2.406304121017456, + "351": 2.402252674102783, + "352": 2.4081785678863525, + "353": 2.392700433731079, + "354": 2.405967950820923, + "355": 2.380117893218994, + "356": 2.390638828277588, + "357": 2.381976366043091, + "358": 2.410921573638916, + "359": 2.3644192218780518, + "360": 2.3576500415802, + "361": 2.3726725578308105, + "362": 2.379549980163574, + "363": 2.3842437267303467, + "364": 2.3507213592529297, + "365": 2.3805453777313232, + "366": 2.392122268676758, + "367": 2.371797561645508, + "368": 2.395376682281494, + "369": 2.4013423919677734, + "370": 2.392371892929077, + "371": 2.4127049446105957, + "372": 2.379133462905884, + "373": 2.364417314529419, + "374": 2.4106600284576416, + "375": 2.385195732116699, + "376": 2.4129273891448975, + "377": 2.3587918281555176 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 1.18885, + 1.49897, + 1.3477, + 1.28374, + 1.05809, + 0.91292, + 1.00508, + 1.13539, + 1.24718, + 1.55444, + 1.44936, + 1.41134, + 1.02831, + 0.959676, + 1.17457, + 1.24516, + 1.29799, + 1.2996, + 0.906612, + 0.630873, + 0.49873, + 0.516907, + 0.814968, + 1.43524, + 1.29496, + 1.02259, + 1.12416, + 1.02007, + 1.05196, + 1.06921, + 1.0186, + 1.02026, + 1.09773, + 1.10416, + 1.25088, + 1.42044, + 1.34104, + 1.20718, + 1.1919, + 1.43907, + 1.45651, + 1.23859, + 1.31958, + 1.32159, + 0.925227, + 0.762231, + 0.812376, + 1.00098, + 1.42267, + 1.06233, + 0.905035, + 0.75148, + 0.906843, + 1.36292 + ], + "train_epoch_time": 5.047745704650879, + "train_loss": 2.361743169330285, + "train_score": 0.29974892387431185, + "val_loss": 2.4082364491288617, + "val_score": 0.2869053176432061 + }, + { + "epoch": 7, + "grad_norm": 1.4832714796066284, + "learning_rate": 0.1, + "model_norm": 87.5346450805664, + "step_logs": { + "grad_norm": { + "378": 1.2507907152175903, + "379": 1.2716996669769287, + "380": 1.3078027963638306, + "381": 1.3168158531188965, + "382": 1.2987083196640015, + "383": 1.2467254400253296, + "384": 1.3580095767974854, + "385": 1.6096895933151245, + "386": 1.5116077661514282, + "387": 1.3220933675765991, + "388": 1.473404049873352, + "389": 1.6791013479232788, + "390": 1.6889094114303589, + "391": 1.5956958532333374, + "392": 1.5935845375061035, + "393": 1.4105433225631714, + "394": 1.271815299987793, + "395": 1.3040547370910645, + "396": 1.3289153575897217, + "397": 1.1971837282180786, + "398": 1.221039056777954, + "399": 1.423857569694519, + "400": 1.5197818279266357, + "401": 1.584545612335205, + "402": 1.4568161964416504, + "403": 1.227216124534607, + "404": 1.2610230445861816, + "405": 1.5278589725494385, + "406": 1.4574626684188843, + "407": 1.3358162641525269, + "408": 1.221930742263794, + "409": 1.2022416591644287, + "410": 1.3379154205322266, + "411": 1.5157263278961182, + "412": 1.5712929964065552, + "413": 1.6875874996185303, + "414": 1.6722849607467651, + "415": 1.7562932968139648, + "416": 1.8147763013839722, + "417": 1.6781888008117676, + "418": 1.631113886833191, + "419": 1.6726852655410767, + "420": 1.5651556253433228, + "421": 1.3145784139633179, + "422": 1.3216067552566528, + "423": 1.4262635707855225, + "424": 1.352217674255371, + "425": 1.2871074676513672, + "426": 1.2919652462005615, + "427": 1.3381145000457764, + "428": 1.3508021831512451, + "429": 1.5001040697097778, + "430": 1.5713080167770386, + "431": 1.4832714796066284 + }, + "loss": { + "378": 2.367905855178833, + "379": 2.3387227058410645, + "380": 2.375861883163452, + "381": 2.3376030921936035, + "382": 2.3504133224487305, + "383": 2.3421030044555664, + "384": 2.3537960052490234, + "385": 2.357940673828125, + "386": 2.388073444366455, + "387": 2.3495824337005615, + "388": 2.3642053604125977, + "389": 2.376500129699707, + "390": 2.3908321857452393, + "391": 2.3723621368408203, + "392": 2.3595778942108154, + "393": 2.3609061241149902, + "394": 2.3399276733398438, + "395": 2.3277668952941895, + "396": 2.361086845397949, + "397": 2.336249351501465, + "398": 2.362821578979492, + "399": 2.327812433242798, + "400": 2.3626058101654053, + "401": 2.3715500831604004, + "402": 2.3660147190093994, + "403": 2.3475341796875, + "404": 2.341981887817383, + "405": 2.3601129055023193, + "406": 2.3760735988616943, + "407": 2.334378242492676, + "408": 2.328312397003174, + "409": 2.341625452041626, + "410": 2.3512682914733887, + "411": 2.3703114986419678, + "412": 2.3764235973358154, + "413": 2.357820987701416, + "414": 2.3812451362609863, + "415": 2.3705861568450928, + "416": 2.388590097427368, + "417": 2.3676886558532715, + "418": 2.357398509979248, + "419": 2.3815762996673584, + "420": 2.359773635864258, + "421": 2.3335471153259277, + "422": 2.337965965270996, + "423": 2.3463830947875977, + "424": 2.3496408462524414, + "425": 2.325202465057373, + "426": 2.338893175125122, + "427": 2.323929786682129, + "428": 2.3332583904266357, + "429": 2.3421614170074463, + "430": 2.3548731803894043, + "431": 2.3263583183288574 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 1.51354, + 1.44614, + 1.38911, + 1.3481, + 1.39355, + 1.50683, + 1.27633, + 0.910015, + 1.04513, + 1.34421, + 1.08903, + 0.842915, + 0.838178, + 0.93171, + 0.929146, + 1.1866, + 1.44662, + 1.36882, + 1.33696, + 1.63004, + 1.58479, + 1.14819, + 1.02289, + 0.944545, + 1.11483, + 1.55873, + 1.47278, + 1.01104, + 1.11857, + 1.30821, + 1.55937, + 1.62007, + 1.31354, + 1.03172, + 0.962519, + 0.827901, + 0.851498, + 0.768532, + 0.725263, + 0.840704, + 0.886063, + 0.851209, + 0.963286, + 1.35034, + 1.33855, + 1.15345, + 1.28502, + 1.40356, + 1.40123, + 1.29789, + 1.27873, + 1.04082, + 0.953773, + 1.05739 + ], + "train_epoch_time": 5.046604156494141, + "train_loss": 2.3419349491168644, + "train_score": 0.30487468625756897, + "val_loss": 2.393266716178605, + "val_score": 0.2925157869655968 + }, + { + "epoch": 8, + "grad_norm": 1.6719913482666016, + "learning_rate": 0.1, + "model_norm": 87.55176544189453, + "step_logs": { + "grad_norm": { + "432": 1.3772529363632202, + "433": 1.3426146507263184, + "434": 1.3324344158172607, + "435": 1.358776569366455, + "436": 1.4715733528137207, + "437": 1.4788800477981567, + "438": 1.4976952075958252, + "439": 1.3530436754226685, + "440": 1.3663952350616455, + "441": 1.4930800199508667, + "442": 1.5838810205459595, + "443": 1.513715147972107, + "444": 1.3886741399765015, + "445": 1.199733853340149, + "446": 1.1420347690582275, + "447": 1.2655256986618042, + "448": 1.4025110006332397, + "449": 1.4412257671356201, + "450": 1.4895614385604858, + "451": 1.5109293460845947, + "452": 1.5501667261123657, + "453": 1.658917784690857, + "454": 1.5020052194595337, + "455": 1.365525722503662, + "456": 1.3858025074005127, + "457": 1.5922483205795288, + "458": 1.7014318704605103, + "459": 1.5323066711425781, + "460": 1.4663453102111816, + "461": 1.5507745742797852, + "462": 1.418809175491333, + "463": 1.3079020977020264, + "464": 1.497633457183838, + "465": 1.5724527835845947, + "466": 1.3144352436065674, + "467": 1.224091649055481, + "468": 1.2142049074172974, + "469": 1.1949164867401123, + "470": 1.262269139289856, + "471": 1.2704116106033325, + "472": 1.2816797494888306, + "473": 1.4210776090621948, + "474": 1.5883044004440308, + "475": 1.6863433122634888, + "476": 1.7087546586990356, + "477": 1.6457250118255615, + "478": 1.5868438482284546, + "479": 1.602194905281067, + "480": 1.4961035251617432, + "481": 1.38497793674469, + "482": 1.2938809394836426, + "483": 1.3164377212524414, + "484": 1.5357096195220947, + "485": 1.6719913482666016 + }, + "loss": { + "432": 2.3569703102111816, + "433": 2.3265621662139893, + "434": 2.3311655521392822, + "435": 2.325826406478882, + "436": 2.348071813583374, + "437": 2.3353023529052734, + "438": 2.349766492843628, + "439": 2.329619884490967, + "440": 2.3471851348876953, + "441": 2.3441779613494873, + "442": 2.3317179679870605, + "443": 2.3459043502807617, + "444": 2.3380584716796875, + "445": 2.334829092025757, + "446": 2.304365634918213, + "447": 2.2944769859313965, + "448": 2.3185274600982666, + "449": 2.3153786659240723, + "450": 2.341874122619629, + "451": 2.3294148445129395, + "452": 2.323920488357544, + "453": 2.332111120223999, + "454": 2.3540799617767334, + "455": 2.3185930252075195, + "456": 2.3187055587768555, + "457": 2.327409267425537, + "458": 2.3606953620910645, + "459": 2.3353662490844727, + "460": 2.321321964263916, + "461": 2.330463409423828, + "462": 2.33009672164917, + "463": 2.3038291931152344, + "464": 2.3188774585723877, + "465": 2.350034236907959, + "466": 2.32249116897583, + "467": 2.310570240020752, + "468": 2.298391580581665, + "469": 2.300656318664551, + "470": 2.294245719909668, + "471": 2.3013916015625, + "472": 2.311711311340332, + "473": 2.305185079574585, + "474": 2.307091236114502, + "475": 2.3272156715393066, + "476": 2.310476303100586, + "477": 2.3332691192626953, + "478": 2.333348274230957, + "479": 2.329441547393799, + "480": 2.333315372467041, + "481": 2.306332588195801, + "482": 2.2903270721435547, + "483": 2.2886452674865723, + "484": 2.3130598068237305, + "485": 2.3527469635009766 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 1.24259, + 1.29066, + 1.31305, + 1.25974, + 1.0843, + 1.06777, + 1.04756, + 1.27251, + 1.25717, + 1.05154, + 0.92946, + 1.02382, + 1.21242, + 1.62213, + 1.76682, + 1.43266, + 1.17869, + 1.1147, + 1.05547, + 1.02037, + 0.967085, + 0.847422, + 1.04347, + 1.24344, + 1.20738, + 0.918018, + 0.815475, + 0.994635, + 1.0796, + 0.969047, + 1.15751, + 1.34679, + 1.03387, + 0.950427, + 1.34424, + 1.54202, + 1.55898, + 1.6113, + 1.43991, + 1.42594, + 1.40726, + 1.14148, + 0.914529, + 0.81836, + 0.791302, + 0.86149, + 0.92664, + 0.907447, + 1.04244, + 1.20236, + 1.36807, + 1.32062, + 0.980773, + 0.841603 + ], + "train_epoch_time": 5.05733060836792, + "train_loss": 2.3190040145064015, + "train_score": 0.31665172162151745, + "val_loss": 2.3817042644754753, + "val_score": 0.30194729457082214 + }, + { + "epoch": 9, + "grad_norm": 1.7779604196548462, + "learning_rate": 0.1, + "model_norm": 87.56884765625, + "step_logs": { + "grad_norm": { + "486": 1.6742181777954102, + "487": 1.551944375038147, + "488": 1.386225938796997, + "489": 1.5135105848312378, + "490": 1.717846393585205, + "491": 1.7236398458480835, + "492": 1.6709500551223755, + "493": 1.867740511894226, + "494": 1.5722646713256836, + "495": 1.3661384582519531, + "496": 1.3567614555358887, + "497": 1.3679317235946655, + "498": 1.327094316482544, + "499": 1.2675182819366455, + "500": 1.2265138626098633, + "501": 1.1087000370025635, + "502": 1.055065393447876, + "503": 1.16732919216156, + "504": 1.2666935920715332, + "505": 1.3211218118667603, + "506": 1.470011591911316, + "507": 1.7048187255859375, + "508": 1.79426109790802, + "509": 1.7555835247039795, + "510": 1.5852285623550415, + "511": 1.4790658950805664, + "512": 1.4900202751159668, + "513": 1.4202228784561157, + "514": 1.2604990005493164, + "515": 1.227232575416565, + "516": 1.250272274017334, + "517": 1.4230231046676636, + "518": 1.522752285003662, + "519": 1.4509660005569458, + "520": 1.2020130157470703, + "521": 1.1757957935333252, + "522": 1.2957830429077148, + "523": 1.5180374383926392, + "524": 1.6535571813583374, + "525": 1.6018004417419434, + "526": 1.5107072591781616, + "527": 1.3575783967971802, + "528": 1.324666976928711, + "529": 1.362179160118103, + "530": 1.474269986152649, + "531": 1.5422247648239136, + "532": 1.6697173118591309, + "533": 1.58110773563385, + "534": 1.3566840887069702, + "535": 1.3498913049697876, + "536": 1.3571470975875854, + "537": 1.4225178956985474, + "538": 1.533928632736206, + "539": 1.7779604196548462 + }, + "loss": { + "486": 2.3220930099487305, + "487": 2.3261220455169678, + "488": 2.2855758666992188, + "489": 2.3221473693847656, + "490": 2.2996368408203125, + "491": 2.3574090003967285, + "492": 2.3189697265625, + "493": 2.3327951431274414, + "494": 2.3433547019958496, + "495": 2.310523509979248, + "496": 2.2948732376098633, + "497": 2.293727159500122, + "498": 2.2975926399230957, + "499": 2.256014347076416, + "500": 2.2843923568725586, + "501": 2.2881460189819336, + "502": 2.28273868560791, + "503": 2.289323568344116, + "504": 2.3028831481933594, + "505": 2.2738089561462402, + "506": 2.2794981002807617, + "507": 2.314923048019409, + "508": 2.317366600036621, + "509": 2.3298819065093994, + "510": 2.3180510997772217, + "511": 2.290679454803467, + "512": 2.293121337890625, + "513": 2.3143627643585205, + "514": 2.290820837020874, + "515": 2.276301860809326, + "516": 2.2668962478637695, + "517": 2.2852418422698975, + "518": 2.2983531951904297, + "519": 2.2913625240325928, + "520": 2.2787485122680664, + "521": 2.263439178466797, + "522": 2.278364896774292, + "523": 2.3045449256896973, + "524": 2.3066561222076416, + "525": 2.3127849102020264, + "526": 2.2955591678619385, + "527": 2.2579898834228516, + "528": 2.2588822841644287, + "529": 2.2573554515838623, + "530": 2.2974653244018555, + "531": 2.284247636795044, + "532": 2.291372776031494, + "533": 2.3105831146240234, + "534": 2.2505717277526855, + "535": 2.242361307144165, + "536": 2.255277156829834, + "537": 2.252377986907959, + "538": 2.2567806243896484, + "539": 2.271329402923584 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 0.828429, + 0.965784, + 1.1894, + 1.01372, + 0.779275, + 0.793491, + 0.830554, + 0.668719, + 0.947953, + 1.238, + 1.24667, + 1.22578, + 1.30458, + 1.40421, + 1.51854, + 1.86147, + 2.05068, + 1.68004, + 1.43525, + 1.30277, + 1.05487, + 0.79649, + 0.719819, + 0.755946, + 0.922442, + 1.0471, + 1.03286, + 1.14741, + 1.4418, + 1.51139, + 1.45018, + 1.12852, + 0.991193, + 1.08838, + 1.57717, + 1.63721, + 1.35693, + 1.00005, + 0.843615, + 0.901402, + 1.00584, + 1.22516, + 1.2873, + 1.21655, + 1.05705, + 0.96039, + 0.821883, + 0.92427, + 1.22274, + 1.23057, + 1.22446, + 1.11308, + 0.959133, + 0.718515 + ], + "train_epoch_time": 5.098841667175293, + "train_loss": 2.3024649360087546, + "train_score": 0.3245034524067232, + "val_loss": 2.348097736060825, + "val_score": 0.3150114812615545 + }, + { + "epoch": 10, + "grad_norm": 1.2640491724014282, + "learning_rate": 0.1, + "model_norm": 87.58625030517578, + "step_logs": { + "grad_norm": { + "540": 1.7134689092636108, + "541": 1.5779328346252441, + "542": 1.5871375799179077, + "543": 1.4161053895950317, + "544": 1.2711663246154785, + "545": 1.3576077222824097, + "546": 1.3114019632339478, + "547": 1.1410677433013916, + "548": 1.0767924785614014, + "549": 1.204289197921753, + "550": 1.4401222467422485, + "551": 1.4469940662384033, + "552": 1.3848143815994263, + "553": 1.467239260673523, + "554": 1.4632148742675781, + "555": 1.3870761394500732, + "556": 1.4207103252410889, + "557": 1.5865219831466675, + "558": 1.5462971925735474, + "559": 1.417084813117981, + "560": 1.3681401014328003, + "561": 1.322740912437439, + "562": 1.2615020275115967, + "563": 1.2015020847320557, + "564": 1.1711989641189575, + "565": 1.1907838582992554, + "566": 1.1135379076004028, + "567": 1.1052764654159546, + "568": 1.1534289121627808, + "569": 1.1689847707748413, + "570": 1.2904889583587646, + "571": 1.5788869857788086, + "572": 1.697857141494751, + "573": 1.7383028268814087, + "574": 1.7290221452713013, + "575": 1.6544092893600464, + "576": 1.5269207954406738, + "577": 1.4095512628555298, + "578": 1.3658093214035034, + "579": 1.2597167491912842, + "580": 1.1534322500228882, + "581": 1.2306861877441406, + "582": 1.3668746948242188, + "583": 1.5056084394454956, + "584": 1.6548144817352295, + "585": 1.8572112321853638, + "586": 2.060096025466919, + "587": 1.9171427488327026, + "588": 1.455045461654663, + "589": 1.3886091709136963, + "590": 1.4992473125457764, + "591": 1.5363121032714844, + "592": 1.393326997756958, + "593": 1.2640491724014282 + }, + "loss": { + "540": 2.3114218711853027, + "541": 2.2731072902679443, + "542": 2.2986507415771484, + "543": 2.294313907623291, + "544": 2.2365550994873047, + "545": 2.2500340938568115, + "546": 2.2500264644622803, + "547": 2.2545166015625, + "548": 2.240654468536377, + "549": 2.243361473083496, + "550": 2.2672383785247803, + "551": 2.2800679206848145, + "552": 2.245500087738037, + "553": 2.255904197692871, + "554": 2.256373405456543, + "555": 2.280869245529175, + "556": 2.254581928253174, + "557": 2.263603925704956, + "558": 2.2469727993011475, + "559": 2.2748565673828125, + "560": 2.2153480052948, + "561": 2.2607178688049316, + "562": 2.2670421600341797, + "563": 2.2362453937530518, + "564": 2.261608123779297, + "565": 2.207308053970337, + "566": 2.2216320037841797, + "567": 2.210115671157837, + "568": 2.247307538986206, + "569": 2.2463788986206055, + "570": 2.2509500980377197, + "571": 2.2585365772247314, + "572": 2.283242702484131, + "573": 2.280989408493042, + "574": 2.264427900314331, + "575": 2.2549333572387695, + "576": 2.250190019607544, + "577": 2.2448570728302, + "578": 2.2494397163391113, + "579": 2.249887228012085, + "580": 2.2311456203460693, + "581": 2.236873149871826, + "582": 2.2451956272125244, + "583": 2.229813814163208, + "584": 2.268385648727417, + "585": 2.271548271179199, + "586": 2.278242588043213, + "587": 2.2822768688201904, + "588": 2.2455201148986816, + "589": 2.219609260559082, + "590": 2.250032424926758, + "591": 2.2523858547210693, + "592": 2.2375988960266113, + "593": 2.234337329864502 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 0.787275, + 0.912941, + 0.912523, + 1.14409, + 1.38412, + 1.22079, + 1.30833, + 1.73153, + 1.93246, + 1.54681, + 1.0932, + 1.08897, + 1.17093, + 1.0479, + 1.05389, + 1.1855, + 1.117, + 0.899308, + 0.939749, + 1.13282, + 1.18353, + 1.2921, + 1.42457, + 1.54907, + 1.64875, + 1.55667, + 1.79169, + 1.80914, + 1.6892, + 1.64386, + 1.35163, + 0.905993, + 0.792045, + 0.75487, + 0.757456, + 0.823849, + 0.965131, + 1.12987, + 1.20585, + 1.4178, + 1.67704, + 1.47689, + 1.2017, + 0.983659, + 0.828358, + 0.658566, + 0.536815, + 0.620954, + 1.06063, + 1.15111, + 1.00102, + 0.954298, + 1.15259, + 1.39837 + ], + "train_epoch_time": 5.047824859619141, + "train_loss": 2.220233416454693, + "train_score": 0.35017485649377067, + "val_loss": 2.2958824054245013, + "val_score": 0.3326770601672228 + }, + { + "epoch": 11, + "grad_norm": 1.2157100439071655, + "learning_rate": 0.1, + "model_norm": 87.6042251586914, + "step_logs": { + "grad_norm": { + "594": 1.1669530868530273, + "595": 1.2132084369659424, + "596": 1.4693739414215088, + "597": 1.7073791027069092, + "598": 1.698878288269043, + "599": 1.7962965965270996, + "600": 1.5512479543685913, + "601": 1.4652150869369507, + "602": 1.3944478034973145, + "603": 1.3317171335220337, + "604": 1.302470326423645, + "605": 1.1761752367019653, + "606": 1.1240900754928589, + "607": 1.187212347984314, + "608": 1.229611873626709, + "609": 1.2905144691467285, + "610": 1.4038149118423462, + "611": 1.422003984451294, + "612": 1.4970314502716064, + "613": 1.5171924829483032, + "614": 1.5920166969299316, + "615": 1.599853515625, + "616": 1.410097599029541, + "617": 1.705039143562317, + "618": 1.4403878450393677, + "619": 1.7139389514923096, + "620": 1.5842852592468262, + "621": 1.6414768695831299, + "622": 1.5656919479370117, + "623": 1.4700489044189453, + "624": 1.4448580741882324, + "625": 1.521384358406067, + "626": 1.4248954057693481, + "627": 1.273108720779419, + "628": 1.279323935508728, + "629": 1.4342466592788696, + "630": 1.5914961099624634, + "631": 1.575094223022461, + "632": 1.3863567113876343, + "633": 1.3225921392440796, + "634": 1.4696089029312134, + "635": 1.6384886503219604, + "636": 1.785634160041809, + "637": 1.6300214529037476, + "638": 1.462430715560913, + "639": 1.4831770658493042, + "640": 1.5735154151916504, + "641": 1.5286848545074463, + "642": 1.3503004312515259, + "643": 1.3330632448196411, + "644": 1.3279651403427124, + "645": 1.2344977855682373, + "646": 1.231083869934082, + "647": 1.2157100439071655 + }, + "loss": { + "594": 2.2216835021972656, + "595": 2.244797706604004, + "596": 2.2370944023132324, + "597": 2.2341275215148926, + "598": 2.2558789253234863, + "599": 2.246788263320923, + "600": 2.2335166931152344, + "601": 2.2481918334960938, + "602": 2.242051124572754, + "603": 2.238752841949463, + "604": 2.2127814292907715, + "605": 2.2252867221832275, + "606": 2.207686424255371, + "607": 2.207956314086914, + "608": 2.1954174041748047, + "609": 2.2380852699279785, + "610": 2.2355079650878906, + "611": 2.2108030319213867, + "612": 2.2386536598205566, + "613": 2.2283706665039062, + "614": 2.238961935043335, + "615": 2.2507331371307373, + "616": 2.2310800552368164, + "617": 2.255988597869873, + "618": 2.275700092315674, + "619": 2.253037452697754, + "620": 2.243696689605713, + "621": 2.2591986656188965, + "622": 2.222571849822998, + "623": 2.2285008430480957, + "624": 2.2326738834381104, + "625": 2.210934638977051, + "626": 2.229423999786377, + "627": 2.23610520362854, + "628": 2.2118873596191406, + "629": 2.2106199264526367, + "630": 2.204601764678955, + "631": 2.231132745742798, + "632": 2.221668243408203, + "633": 2.21366810798645, + "634": 2.193549871444702, + "635": 2.2152392864227295, + "636": 2.242152690887451, + "637": 2.2621583938598633, + "638": 2.2313241958618164, + "639": 2.194629669189453, + "640": 2.2009198665618896, + "641": 2.233835220336914, + "642": 2.2121715545654297, + "643": 2.2223803997039795, + "644": 2.2018537521362305, + "645": 2.206470489501953, + "646": 2.201155662536621, + "647": 2.1892902851104736 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 1.63146, + 1.52513, + 1.03614, + 0.766387, + 0.781612, + 0.696315, + 0.928168, + 1.0472, + 1.15303, + 1.26236, + 1.30438, + 1.60858, + 1.74717, + 1.56651, + 1.45205, + 1.34385, + 1.13437, + 1.09332, + 0.998907, + 0.968068, + 0.883388, + 0.879354, + 1.12206, + 0.776012, + 1.09687, + 0.766969, + 0.893917, + 0.838465, + 0.906657, + 1.03121, + 1.06949, + 0.955208, + 1.09806, + 1.37963, + 1.35146, + 1.07465, + 0.8704, + 0.899316, + 1.15592, + 1.2655, + 1.01565, + 0.825152, + 0.703202, + 0.851405, + 1.04331, + 0.997643, + 0.888919, + 0.955906, + 1.21327, + 1.2506, + 1.24858, + 1.44783, + 1.45236, + 1.4813 + ], + "train_epoch_time": 5.062178134918213, + "train_loss": 2.188803861130943, + "train_score": 0.3473177456017717, + "val_loss": 2.2665565969202195, + "val_score": 0.3304750283946947 + }, + { + "epoch": 12, + "grad_norm": 1.1594904661178589, + "learning_rate": 0.1, + "model_norm": 87.6198959350586, + "step_logs": { + "grad_norm": { + "648": 1.1954418420791626, + "649": 1.1974676847457886, + "650": 1.2672324180603027, + "651": 1.452003002166748, + "652": 1.528839349746704, + "653": 1.503833293914795, + "654": 1.5454744100570679, + "655": 1.5231748819351196, + "656": 1.417120337486267, + "657": 1.2715767621994019, + "658": 1.2945997714996338, + "659": 1.2057448625564575, + "660": 1.1349579095840454, + "661": 1.118137001991272, + "662": 1.064670205116272, + "663": 1.0222800970077515, + "664": 1.0659438371658325, + "665": 1.051184058189392, + "666": 1.158028244972229, + "667": 1.17327082157135, + "668": 1.1828001737594604, + "669": 1.123181939125061, + "670": 1.0461944341659546, + "671": 1.0050735473632812, + "672": 1.0115554332733154, + "673": 1.070014238357544, + "674": 1.1404701471328735, + "675": 1.2399085760116577, + "676": 1.3631823062896729, + "677": 1.2542368173599243, + "678": 1.2008265256881714, + "679": 1.0959028005599976, + "680": 1.0375603437423706, + "681": 1.1151578426361084, + "682": 1.1901311874389648, + "683": 1.1519356966018677, + "684": 1.092047095298767, + "685": 1.0978142023086548, + "686": 0.99715656042099, + "687": 0.9034143090248108, + "688": 0.8573057651519775, + "689": 0.8090256452560425, + "690": 0.8250359892845154, + "691": 0.796980619430542, + "692": 0.7854346036911011, + "693": 0.815854549407959, + "694": 0.8617132306098938, + "695": 0.9406847357749939, + "696": 0.93160080909729, + "697": 0.9361287355422974, + "698": 1.0740296840667725, + "699": 1.254116415977478, + "700": 1.2265839576721191, + "701": 1.1594904661178589 + }, + "loss": { + "648": 2.1869421005249023, + "649": 2.1919777393341064, + "650": 2.1624321937561035, + "651": 2.1949124336242676, + "652": 2.224238634109497, + "653": 2.1967856884002686, + "654": 2.2175872325897217, + "655": 2.2128512859344482, + "656": 2.214641571044922, + "657": 2.1855058670043945, + "658": 2.190960645675659, + "659": 2.1876306533813477, + "660": 2.177907943725586, + "661": 2.151695966720581, + "662": 2.190953254699707, + "663": 2.1881825923919678, + "664": 2.1756930351257324, + "665": 2.18461012840271, + "666": 2.154822587966919, + "667": 2.1482014656066895, + "668": 2.170921564102173, + "669": 2.170084238052368, + "670": 2.162811040878296, + "671": 2.172851800918579, + "672": 2.166306257247925, + "673": 2.1446595191955566, + "674": 2.167536497116089, + "675": 2.1664373874664307, + "676": 2.1408629417419434, + "677": 2.1656198501586914, + "678": 2.1529040336608887, + "679": 2.173119068145752, + "680": 2.1585640907287598, + "681": 2.1659741401672363, + "682": 2.1546504497528076, + "683": 2.1651697158813477, + "684": 2.1620402336120605, + "685": 2.1537861824035645, + "686": 2.170220375061035, + "687": 2.1818923950195312, + "688": 2.1448683738708496, + "689": 2.1345367431640625, + "690": 2.1399693489074707, + "691": 2.1359193325042725, + "692": 2.115656852722168, + "693": 2.1095523834228516, + "694": 2.1351966857910156, + "695": 2.152808904647827, + "696": 2.1223111152648926, + "697": 2.1618027687072754, + "698": 2.1327104568481445, + "699": 2.1687190532684326, + "700": 2.1600100994110107, + "701": 2.135938882827759 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 1.53031, + 1.52865, + 1.34657, + 1.04108, + 0.951607, + 0.971378, + 0.928447, + 0.95379, + 1.10278, + 1.35166, + 1.30726, + 1.50475, + 1.69075, + 1.72104, + 1.93287, + 2.09384, + 1.91482, + 1.97704, + 1.60684, + 1.56055, + 1.55175, + 1.72019, + 1.97603, + 2.15097, + 2.1171, + 1.87318, + 1.66647, + 1.40918, + 1.15208, + 1.37665, + 1.49301, + 1.80942, + 2.00511, + 1.74173, + 1.5212, + 1.63168, + 1.81293, + 1.78708, + 2.18262, + 2.67337, + 2.91829, + 3.26121, + 3.14385, + 3.36271, + 3.42946, + 3.16931, + 2.87549, + 2.43286, + 2.4454, + 2.46686, + 1.84884, + 1.37888, + 1.43569, + 1.58875 + ], + "train_epoch_time": 5.051527261734009, + "train_loss": 2.1430730223826733, + "train_score": 0.36499618906714826, + "val_loss": 2.2215975239684744, + "val_score": 0.3424852897940219 + }, + { + "epoch": 13, + "grad_norm": 0.7096759080886841, + "learning_rate": 0.06666666666666668, + "model_norm": 87.62943267822266, + "step_logs": { + "grad_norm": { + "702": 1.2094677686691284, + "703": 1.1364346742630005, + "704": 1.054935336112976, + "705": 0.9208025336265564, + "706": 0.8918678760528564, + "707": 0.8889166712760925, + "708": 0.8920423984527588, + "709": 0.8592656254768372, + "710": 0.7808614373207092, + "711": 0.7903451323509216, + "712": 0.8282138705253601, + "713": 0.8917585611343384, + "714": 0.9138843417167664, + "715": 0.898725688457489, + "716": 0.8300848603248596, + "717": 0.7455387711524963, + "718": 0.7375110983848572, + "719": 0.7463124394416809, + "720": 0.7465780973434448, + "721": 0.7408370971679688, + "722": 0.7487491965293884, + "723": 0.7014108896255493, + "724": 0.6839889287948608, + "725": 0.7828773856163025, + "726": 0.8136268854141235, + "727": 0.8110736012458801, + "728": 0.8075604438781738, + "729": 0.731393039226532, + "730": 0.6665154695510864, + "731": 0.7339186072349548, + "732": 0.7479880452156067, + "733": 0.7132483720779419, + "734": 0.6741827130317688, + "735": 0.6816652417182922, + "736": 0.7633786797523499, + "737": 0.766368567943573, + "738": 0.7119523882865906, + "739": 0.7511018514633179, + "740": 0.693392813205719, + "741": 0.7348403930664062, + "742": 0.690436601638794, + "743": 0.7857136726379395, + "744": 0.767687201499939, + "745": 0.7049455046653748, + "746": 0.6908838152885437, + "747": 0.6771897673606873, + "748": 0.6792553067207336, + "749": 0.7423776984214783, + "750": 0.7747229337692261, + "751": 0.6995111703872681, + "752": 0.7536569833755493, + "753": 0.687478244304657, + "754": 0.7189297080039978, + "755": 0.7096759080886841 + }, + "loss": { + "702": 2.13319993019104, + "703": 2.1362335681915283, + "704": 2.120169162750244, + "705": 2.117563009262085, + "706": 2.1312122344970703, + "707": 2.130305767059326, + "708": 2.1443395614624023, + "709": 2.1255943775177, + "710": 2.127607822418213, + "711": 2.1300694942474365, + "712": 2.1319546699523926, + "713": 2.136672019958496, + "714": 2.121074676513672, + "715": 2.1448912620544434, + "716": 2.113769769668579, + "717": 2.138568878173828, + "718": 2.1325201988220215, + "719": 2.112884998321533, + "720": 2.1137192249298096, + "721": 2.1203198432922363, + "722": 2.1200571060180664, + "723": 2.1013803482055664, + "724": 2.113459587097168, + "725": 2.1090354919433594, + "726": 2.1204957962036133, + "727": 2.0943808555603027, + "728": 2.110152006149292, + "729": 2.123112678527832, + "730": 2.1332902908325195, + "731": 2.102656841278076, + "732": 2.1249969005584717, + "733": 2.1087183952331543, + "734": 2.1010513305664062, + "735": 2.106625556945801, + "736": 2.110645294189453, + "737": 2.0891237258911133, + "738": 2.1045916080474854, + "739": 2.1199705600738525, + "740": 2.1246423721313477, + "741": 2.1044230461120605, + "742": 2.1334028244018555, + "743": 2.130838632583618, + "744": 2.113445281982422, + "745": 2.128753900527954, + "746": 2.1046688556671143, + "747": 2.112100124359131, + "748": 2.101102590560913, + "749": 2.069340467453003, + "750": 2.1018152236938477, + "751": 2.093327522277832, + "752": 2.091395854949951, + "753": 2.0998992919921875, + "754": 2.0963640213012695, + "755": 2.092761754989624 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 1.45829, + 1.65409, + 1.9051, + 2.49749, + 2.67933, + 2.696, + 2.69478, + 2.87889, + 3.48934, + 3.41005, + 3.10809, + 2.68685, + 2.53965, + 2.65553, + 3.0677, + 3.84754, + 3.92063, + 3.79345, + 3.79225, + 3.86328, + 3.78159, + 4.2713, + 4.51748, + 3.44109, + 3.20322, + 3.18372, + 3.23567, + 3.96891, + 4.80208, + 3.90367, + 3.79812, + 4.14512, + 4.62255, + 4.53362, + 3.62189, + 3.55704, + 4.15208, + 3.75779, + 4.41903, + 3.89715, + 4.47533, + 3.45161, + 3.5861, + 4.28365, + 4.40934, + 4.60568, + 4.55388, + 3.75476, + 3.50188, + 4.27807, + 3.68204, + 4.44304, + 4.05596, + 4.15527 + ], + "train_epoch_time": 5.051677703857422, + "train_loss": 2.0992124385094884, + "train_score": 0.3778447811709586, + "val_loss": 2.1831022269690217, + "val_score": 0.35468839736811464 + }, + { + "epoch": 14, + "grad_norm": 0.6314717531204224, + "learning_rate": 0.03333333333333334, + "model_norm": 87.63260650634766, + "step_logs": { + "grad_norm": { + "756": 0.7444124221801758, + "757": 0.7044227123260498, + "758": 0.7111073136329651, + "759": 0.741810142993927, + "760": 0.7313619256019592, + "761": 0.6684687733650208, + "762": 0.6533973217010498, + "763": 0.6640459299087524, + "764": 0.6890078186988831, + "765": 0.6936907768249512, + "766": 0.7096317410469055, + "767": 0.6938508749008179, + "768": 0.6939355134963989, + "769": 0.6502628922462463, + "770": 0.6904476881027222, + "771": 0.6724308133125305, + "772": 0.652132511138916, + "773": 0.6908398270606995, + "774": 0.6512293815612793, + "775": 0.6589061617851257, + "776": 0.6848368048667908, + "777": 0.6390787363052368, + "778": 0.6640979051589966, + "779": 0.6511648297309875, + "780": 0.636833906173706, + "781": 0.6551579833030701, + "782": 0.7036217451095581, + "783": 0.7764425277709961, + "784": 0.6543644666671753, + "785": 0.6530728340148926, + "786": 0.670935332775116, + "787": 0.7032603025436401, + "788": 0.7079858183860779, + "789": 0.6747470498085022, + "790": 0.6547266244888306, + "791": 0.6820287108421326, + "792": 0.6060270667076111, + "793": 0.6199682354927063, + "794": 0.6568408608436584, + "795": 0.6298273801803589, + "796": 0.6749716401100159, + "797": 0.6366844773292542, + "798": 0.6207833290100098, + "799": 0.639532744884491, + "800": 0.6093708872795105, + "801": 0.6194463968276978, + "802": 0.662527859210968, + "803": 0.6145942211151123, + "804": 0.6294696927070618, + "805": 0.6803239583969116, + "806": 0.6261860728263855, + "807": 0.6561301946640015, + "808": 0.7099471688270569, + "809": 0.6314717531204224 + }, + "loss": { + "756": 2.1088690757751465, + "757": 2.098982810974121, + "758": 2.1028637886047363, + "759": 2.0999045372009277, + "760": 2.0710790157318115, + "761": 2.104261875152588, + "762": 2.098921775817871, + "763": 2.088793992996216, + "764": 2.090606451034546, + "765": 2.090880870819092, + "766": 2.114882469177246, + "767": 2.099087715148926, + "768": 2.1193289756774902, + "769": 2.0847418308258057, + "770": 2.095512628555298, + "771": 2.078007221221924, + "772": 2.1021549701690674, + "773": 2.0957417488098145, + "774": 2.113266706466675, + "775": 2.106503963470459, + "776": 2.082190990447998, + "777": 2.084826946258545, + "778": 2.110208034515381, + "779": 2.0971906185150146, + "780": 2.101323127746582, + "781": 2.0823447704315186, + "782": 2.0737085342407227, + "783": 2.098522663116455, + "784": 2.0923144817352295, + "785": 2.0959861278533936, + "786": 2.0784354209899902, + "787": 2.103172779083252, + "788": 2.088852882385254, + "789": 2.114295482635498, + "790": 2.075995922088623, + "791": 2.1067326068878174, + "792": 2.0907301902770996, + "793": 2.0980305671691895, + "794": 2.1044297218322754, + "795": 2.087343454360962, + "796": 2.0867247581481934, + "797": 2.097341537475586, + "798": 2.0897085666656494, + "799": 2.0791001319885254, + "800": 2.0794973373413086, + "801": 2.085522174835205, + "802": 2.0800888538360596, + "803": 2.0732412338256836, + "804": 2.072136163711548, + "805": 2.094399929046631, + "806": 2.0907092094421387, + "807": 2.0761852264404297, + "808": 2.116187334060669, + "809": 2.0969619750976562 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 3.80559, + 4.23002, + 4.15854, + 3.81605, + 3.87197, + 4.7091, + 4.91634, + 4.73696, + 4.40376, + 4.34508, + 4.19972, + 4.36012, + 4.40109, + 4.93031, + 4.39571, + 4.5957, + 4.94303, + 4.3912, + 4.98295, + 4.85194, + 4.43963, + 5.10459, + 4.78477, + 4.94602, + 5.18132, + 4.85133, + 4.1886, + 3.48093, + 4.88638, + 4.91434, + 4.61716, + 4.25248, + 4.16734, + 4.64391, + 4.84291, + 4.52902, + 5.69264, + 5.4585, + 4.87769, + 5.262, + 4.5803, + 5.17393, + 5.42258, + 5.08335, + 5.60009, + 5.4351, + 4.73886, + 5.48874, + 5.2296, + 4.5251, + 5.33196, + 4.82265, + 4.19858, + 5.25875 + ], + "train_epoch_time": 5.051954984664917, + "train_loss": 2.087881578844965, + "train_score": 0.3797267306740349, + "val_loss": 2.1743666903980836, + "val_score": 0.3561280135719702 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:18:26.655239", + "final_model_norm": 87.63260650634766, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:16:40.770252", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 3.3889477252960205, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.43351745605469, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 8.455106735229492, + "3": 5.430062770843506, + "4": 5.6468892097473145, + "5": 11.033295631408691, + "6": 19.403236389160156, + "7": 7.91170072555542, + "8": 4.659134864807129, + "9": 2.318803071975708, + "10": 2.1720690727233887, + "11": 2.752659559249878, + "12": 4.2507452964782715, + "13": 13.20004653930664, + "14": 4.406890869140625, + "15": 6.960628509521484, + "16": 6.359367370605469, + "17": 6.250153541564941, + "18": 3.660008192062378, + "19": 33.08381271362305, + "20": 2.868354320526123, + "21": 4.112018585205078, + "22": 4.233462333679199, + "23": 4.344981670379639, + "24": 3.540527820587158, + "25": 5.1248674392700195, + "26": 4.448975086212158, + "27": 3.6585779190063477, + "28": 3.3421261310577393, + "29": 3.0488529205322266, + "30": 4.774571418762207, + "31": 19.267662048339844, + "32": 3.362060546875, + "33": 3.615204095840454, + "34": 3.1254024505615234, + "35": 2.9841437339782715, + "36": 3.054182529449463, + "37": 4.397572994232178, + "38": 5.369297027587891, + "39": 3.1309306621551514, + "40": 5.369064807891846, + "41": 7.794357776641846, + "42": 19.277755737304688, + "43": 5.582124710083008, + "44": 5.446364879608154, + "45": 3.250910520553589, + "46": 8.72555923461914, + "47": 5.285445690155029, + "48": 8.959678649902344, + "49": 4.935024261474609, + "50": 24.596059799194336, + "51": 4.8176774978637695, + "52": 3.8231101036071777, + "53": 3.3889477252960205 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.944894313812256, + "3": 3.726104974746704, + "4": 3.616518020629883, + "5": 3.6710801124572754, + "6": 4.058044910430908, + "7": 4.158349990844727, + "8": 3.6257386207580566, + "9": 3.4210009574890137, + "10": 3.3945889472961426, + "11": 3.3422651290893555, + "12": 3.3065052032470703, + "13": 3.434197425842285, + "14": 3.3901124000549316, + "15": 3.5200142860412598, + "16": 3.3997273445129395, + "17": 3.6144275665283203, + "18": 3.1550843715667725, + "19": 3.332406520843506, + "20": 3.0696306228637695, + "21": 3.039764404296875, + "22": 3.055429458618164, + "23": 3.0484390258789062, + "24": 2.9547481536865234, + "25": 3.0867815017700195, + "26": 3.014432430267334, + "27": 2.9839138984680176, + "28": 2.8958330154418945, + "29": 2.840064287185669, + "30": 2.9247920513153076, + "31": 3.5178093910217285, + "32": 3.006455421447754, + "33": 2.860466480255127, + "34": 2.837097644805908, + "35": 2.829009532928467, + "36": 2.790703773498535, + "37": 2.9146840572357178, + "38": 3.127567768096924, + "39": 2.8455801010131836, + "40": 2.8222756385803223, + "41": 3.1355457305908203, + "42": 3.0576977729797363, + "43": 2.990561008453369, + "44": 3.3500051498413086, + "45": 2.903428077697754, + "46": 3.255727767944336, + "47": 3.073944330215454, + "48": 2.9995102882385254, + "49": 2.9204652309417725, + "50": 3.68436336517334, + "51": 3.002871036529541, + "52": 3.006086826324463, + "53": 2.839353561401367 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 0.00869338, + 0.00874607, + 0.055182, + 0.12637, + 0.113415, + 0.0301567, + 0.0107788, + 0.0664326, + 0.167027, + 0.636247, + 0.719515, + 0.441099, + 0.182995, + 0.0197094, + 0.174562, + 0.072652, + 0.0840652, + 0.0925248, + 0.23553, + 0.00304458, + 0.373096, + 0.179775, + 0.170483, + 0.161474, + 0.235713, + 0.117528, + 0.152295, + 0.222926, + 0.259255, + 0.305531, + 0.1283, + 0.00947576, + 0.265977, + 0.218862, + 0.290444, + 0.317684, + 0.299174, + 0.150718, + 0.108486, + 0.290285, + 0.0979044, + 0.0516122, + 0.00822776, + 0.095974, + 0.112936, + 0.274727, + 0.0427623, + 0.110035, + 0.037365, + 0.119915, + 0.0060902, + 0.129378, + 0.205669, + 0.247223 + ], + "train_epoch_time": 5.052546262741089, + "train_loss": 3.368078115099301, + "train_score": 0.15117131457523772, + "val_loss": 3.394641490809269, + "val_score": 0.14839265246615754 + }, + { + "epoch": 1, + "grad_norm": 1.6649152040481567, + "learning_rate": 0.1, + "model_norm": 87.44978332519531, + "step_logs": { + "grad_norm": { + "54": 11.882963180541992, + "55": 2.3635265827178955, + "56": 3.352724552154541, + "57": 2.6107516288757324, + "58": 3.1675150394439697, + "59": 3.0631000995635986, + "60": 2.564509391784668, + "61": 1.6012704372406006, + "62": 1.8905109167099, + "63": 2.189606189727783, + "64": 1.955898404121399, + "65": 1.822391152381897, + "66": 1.8813598155975342, + "67": 2.1241745948791504, + "68": 2.038508176803589, + "69": 1.5413856506347656, + "70": 1.5947394371032715, + "71": 2.029998540878296, + "72": 1.9531116485595703, + "73": 1.586624026298523, + "74": 1.6178563833236694, + "75": 1.7283015251159668, + "76": 1.7703524827957153, + "77": 1.7480460405349731, + "78": 1.7916656732559204, + "79": 1.9708962440490723, + "80": 1.925018310546875, + "81": 1.6343408823013306, + "82": 1.7145962715148926, + "83": 2.0812294483184814, + "84": 1.965393304824829, + "85": 1.6621005535125732, + "86": 1.767539620399475, + "87": 2.3451972007751465, + "88": 2.0563714504241943, + "89": 1.2139837741851807, + "90": 1.1802303791046143, + "91": 1.2762067317962646, + "92": 1.412876844406128, + "93": 1.8772350549697876, + "94": 1.8925275802612305, + "95": 1.7011109590530396, + "96": 1.6600311994552612, + "97": 1.7145100831985474, + "98": 1.7258946895599365, + "99": 1.5846753120422363, + "100": 1.5328458547592163, + "101": 1.586608648300171, + "102": 1.581928014755249, + "103": 1.6588969230651855, + "104": 1.6137290000915527, + "105": 1.4103758335113525, + "106": 1.4387884140014648, + "107": 1.6649152040481567 + }, + "loss": { + "54": 3.3600575923919678, + "55": 2.787196636199951, + "56": 2.8264527320861816, + "57": 2.7755112648010254, + "58": 2.874067544937134, + "59": 2.8506340980529785, + "60": 2.9195547103881836, + "61": 2.649099588394165, + "62": 2.6556055545806885, + "63": 2.7002651691436768, + "64": 2.716719150543213, + "65": 2.64172625541687, + "66": 2.6891369819641113, + "67": 2.6354784965515137, + "68": 2.7284579277038574, + "69": 2.621030569076538, + "70": 2.6266555786132812, + "71": 2.6157021522521973, + "72": 2.7143189907073975, + "73": 2.600590229034424, + "74": 2.6325554847717285, + "75": 2.605961799621582, + "76": 2.655390739440918, + "77": 2.609612464904785, + "78": 2.632941722869873, + "79": 2.6077609062194824, + "80": 2.647109031677246, + "81": 2.595229387283325, + "82": 2.611328363418579, + "83": 2.5804758071899414, + "84": 2.6764211654663086, + "85": 2.5902743339538574, + "86": 2.5982279777526855, + "87": 2.5912327766418457, + "88": 2.712203025817871, + "89": 2.575115203857422, + "90": 2.5741593837738037, + "91": 2.5489249229431152, + "92": 2.585357427597046, + "93": 2.562504529953003, + "94": 2.632828712463379, + "95": 2.5821690559387207, + "96": 2.612212657928467, + "97": 2.5670647621154785, + "98": 2.6118991374969482, + "99": 2.574159622192383, + "100": 2.596236228942871, + "101": 2.5502476692199707, + "102": 2.600957155227661, + "103": 2.537538528442383, + "104": 2.5901052951812744, + "105": 2.5361320972442627, + "106": 2.5423049926757812, + "107": 2.5615177154541016 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 0.0237956, + 0.498938, + 0.251447, + 0.407204, + 0.286457, + 0.303822, + 0.443923, + 1.03316, + 0.743028, + 0.563215, + 0.710153, + 0.795435, + 0.759747, + 0.584089, + 0.656587, + 1.10319, + 1.03282, + 0.634741, + 0.711552, + 1.03306, + 1.00577, + 0.872427, + 0.847245, + 0.854024, + 0.820214, + 0.671336, + 0.714335, + 0.971607, + 0.888255, + 0.595744, + 0.692876, + 0.937629, + 0.831647, + 0.471138, + 0.641385, + 1.74731, + 1.848, + 1.565, + 1.29513, + 0.727156, + 0.735086, + 0.892317, + 0.947929, + 0.873286, + 0.876855, + 1.02507, + 1.10496, + 1.01308, + 1.03935, + 0.922091, + 0.994618, + 1.27498, + 1.2281, + 0.924088 + ], + "train_epoch_time": 5.04816198348999, + "train_loss": 2.599024853165899, + "train_score": 0.23535464490674318, + "val_loss": 2.630504760621746, + "val_score": 0.23474275241215659 + }, + { + "epoch": 2, + "grad_norm": 1.493432641029358, + "learning_rate": 0.1, + "model_norm": 87.46326446533203, + "step_logs": { + "grad_norm": { + "108": 1.6845800876617432, + "109": 1.596843957901001, + "110": 1.5800604820251465, + "111": 1.4663677215576172, + "112": 1.5330389738082886, + "113": 1.560696005821228, + "114": 1.5556598901748657, + "115": 1.6265424489974976, + "116": 1.7033133506774902, + "117": 1.711039662361145, + "118": 1.5589275360107422, + "119": 1.459154486656189, + "120": 1.4164886474609375, + "121": 1.3624179363250732, + "122": 1.3997383117675781, + "123": 1.482944369316101, + "124": 1.611213207244873, + "125": 1.8654340505599976, + "126": 1.717869520187378, + "127": 1.3610656261444092, + "128": 1.362554669380188, + "129": 1.6764695644378662, + "130": 1.6157361268997192, + "131": 1.4396878480911255, + "132": 1.4864356517791748, + "133": 1.642707347869873, + "134": 1.6450635194778442, + "135": 1.452309250831604, + "136": 1.4403904676437378, + "137": 1.431042194366455, + "138": 1.5069602727890015, + "139": 1.6157925128936768, + "140": 1.6178302764892578, + "141": 1.5775846242904663, + "142": 1.5009082555770874, + "143": 1.485425591468811, + "144": 1.4340190887451172, + "145": 1.4824427366256714, + "146": 1.5407404899597168, + "147": 1.4523173570632935, + "148": 1.5248172283172607, + "149": 1.560773253440857, + "150": 1.471592664718628, + "151": 1.4706635475158691, + "152": 1.4568729400634766, + "153": 1.3535856008529663, + "154": 1.322677493095398, + "155": 1.349790334701538, + "156": 1.3448057174682617, + "157": 1.32919180393219, + "158": 1.3414663076400757, + "159": 1.343133568763733, + "160": 1.4307098388671875, + "161": 1.493432641029358 + }, + "loss": { + "108": 2.600191831588745, + "109": 2.528883934020996, + "110": 2.593515396118164, + "111": 2.5468149185180664, + "112": 2.5517120361328125, + "113": 2.543083667755127, + "114": 2.568270206451416, + "115": 2.5522754192352295, + "116": 2.6112096309661865, + "117": 2.55145263671875, + "118": 2.5874886512756348, + "119": 2.5259652137756348, + "120": 2.557525157928467, + "121": 2.518737316131592, + "122": 2.5307493209838867, + "123": 2.5311412811279297, + "124": 2.5617401599884033, + "125": 2.5553529262542725, + "126": 2.605602741241455, + "127": 2.537452459335327, + "128": 2.544696569442749, + "129": 2.5528926849365234, + "130": 2.570498466491699, + "131": 2.5278353691101074, + "132": 2.5573782920837402, + "133": 2.52195405960083, + "134": 2.5916879177093506, + "135": 2.496354579925537, + "136": 2.5397682189941406, + "137": 2.5125160217285156, + "138": 2.5369157791137695, + "139": 2.5224549770355225, + "140": 2.5586395263671875, + "141": 2.536864757537842, + "142": 2.5578291416168213, + "143": 2.523402214050293, + "144": 2.537787914276123, + "145": 2.5420727729797363, + "146": 2.55967378616333, + "147": 2.5217766761779785, + "148": 2.528733253479004, + "149": 2.524456024169922, + "150": 2.5312998294830322, + "151": 2.5099291801452637, + "152": 2.5288877487182617, + "153": 2.5043282508850098, + "154": 2.5190539360046387, + "155": 2.5095953941345215, + "156": 2.5225343704223633, + "157": 2.4985921382904053, + "158": 2.5087904930114746, + "159": 2.5019383430480957, + "160": 2.5126709938049316, + "161": 2.5212783813476562 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 0.916267, + 0.991754, + 1.03882, + 1.18444, + 1.08574, + 1.04406, + 1.06123, + 0.96471, + 0.900021, + 0.8715, + 1.0647, + 1.18638, + 1.27466, + 1.35694, + 1.29168, + 1.15098, + 0.9868, + 0.73433, + 0.882933, + 1.36975, + 1.37065, + 0.908325, + 0.984638, + 1.21959, + 1.15745, + 0.934581, + 0.957673, + 1.18355, + 1.22415, + 1.22689, + 1.11713, + 0.966167, + 0.977559, + 1.01932, + 1.13544, + 1.14363, + 1.23409, + 1.15673, + 1.07827, + 1.19559, + 1.0876, + 1.03631, + 1.16888, + 1.16047, + 1.19148, + 1.36685, + 1.43989, + 1.37743, + 1.39482, + 1.41423, + 1.39414, + 1.38688, + 1.22753, + 1.13045 + ], + "train_epoch_time": 5.050227165222168, + "train_loss": 2.5330812643042937, + "train_score": 0.2363858500546328, + "val_loss": 2.5737051487516456, + "val_score": 0.22898877029714573 + }, + { + "epoch": 3, + "grad_norm": 1.526887059211731, + "learning_rate": 0.1, + "model_norm": 87.47559356689453, + "step_logs": { + "grad_norm": { + "162": 1.4928721189498901, + "163": 1.6865094900131226, + "164": 1.6986087560653687, + "165": 1.6533052921295166, + "166": 1.6001272201538086, + "167": 1.5447994470596313, + "168": 1.5828136205673218, + "169": 1.5642145872116089, + "170": 1.431638240814209, + "171": 1.2382491827011108, + "172": 1.29545259475708, + "173": 1.488620400428772, + "174": 1.5724915266036987, + "175": 1.5411988496780396, + "176": 1.4260880947113037, + "177": 1.2863078117370605, + "178": 1.2801792621612549, + "179": 1.3829270601272583, + "180": 1.461946725845337, + "181": 1.4035640954971313, + "182": 1.366930365562439, + "183": 1.2010037899017334, + "184": 1.1213728189468384, + "185": 1.3109997510910034, + "186": 1.372214674949646, + "187": 1.362729787826538, + "188": 1.3166824579238892, + "189": 1.3974947929382324, + "190": 1.4236501455307007, + "191": 1.3842368125915527, + "192": 1.383528709411621, + "193": 1.5255012512207031, + "194": 1.6087610721588135, + "195": 1.5967577695846558, + "196": 1.585250735282898, + "197": 1.5104517936706543, + "198": 1.5963188409805298, + "199": 1.6635619401931763, + "200": 1.4449334144592285, + "201": 1.2055463790893555, + "202": 1.1725956201553345, + "203": 1.214180827140808, + "204": 1.2591506242752075, + "205": 1.374233365058899, + "206": 1.441994071006775, + "207": 1.5264184474945068, + "208": 1.4113317728042603, + "209": 1.140454888343811, + "210": 1.1167337894439697, + "211": 1.2823407649993896, + "212": 1.3605154752731323, + "213": 1.3126513957977295, + "214": 1.3711744546890259, + "215": 1.526887059211731 + }, + "loss": { + "162": 2.5369386672973633, + "163": 2.509779691696167, + "164": 2.5565848350524902, + "165": 2.538588047027588, + "166": 2.545473098754883, + "167": 2.494863510131836, + "168": 2.5543622970581055, + "169": 2.534714937210083, + "170": 2.5358211994171143, + "171": 2.4962592124938965, + "172": 2.494605779647827, + "173": 2.525576591491699, + "174": 2.5370254516601562, + "175": 2.520183563232422, + "176": 2.5178821086883545, + "177": 2.511479377746582, + "178": 2.5065903663635254, + "179": 2.4960975646972656, + "180": 2.5319511890411377, + "181": 2.5154550075531006, + "182": 2.518453359603882, + "183": 2.4957737922668457, + "184": 2.5026257038116455, + "185": 2.494650363922119, + "186": 2.50592041015625, + "187": 2.4931392669677734, + "188": 2.492976665496826, + "189": 2.5015175342559814, + "190": 2.510263442993164, + "191": 2.481764316558838, + "192": 2.512326240539551, + "193": 2.488492965698242, + "194": 2.530038356781006, + "195": 2.5199599266052246, + "196": 2.5205793380737305, + "197": 2.498992443084717, + "198": 2.5292038917541504, + "199": 2.522573947906494, + "200": 2.525585174560547, + "201": 2.487456798553467, + "202": 2.4869112968444824, + "203": 2.471940040588379, + "204": 2.478116512298584, + "205": 2.479897975921631, + "206": 2.490063190460205, + "207": 2.503345012664795, + "208": 2.514010429382324, + "209": 2.4574787616729736, + "210": 2.4901504516601562, + "211": 2.480341672897339, + "212": 2.493208169937134, + "213": 2.4661519527435303, + "214": 2.4971795082092285, + "215": 2.495734930038452 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 1.13832, + 0.882385, + 0.886081, + 0.928723, + 0.994167, + 1.04545, + 1.01958, + 1.03594, + 1.23723, + 1.62807, + 1.48648, + 1.13971, + 1.026, + 1.061, + 1.23806, + 1.51789, + 1.52947, + 1.30516, + 1.18466, + 1.27689, + 1.34785, + 1.73028, + 1.9902, + 1.45146, + 1.33083, + 1.34254, + 1.43799, + 1.28086, + 1.23855, + 1.29521, + 1.3125, + 1.06933, + 0.977561, + 0.988361, + 1.00301, + 1.09535, + 0.992532, + 0.911519, + 1.20967, + 1.71154, + 1.80869, + 1.67676, + 1.56303, + 1.31315, + 1.19752, + 1.07442, + 1.26214, + 1.88944, + 1.99676, + 1.50836, + 1.34695, + 1.43127, + 1.3282, + 1.07049 + ], + "train_epoch_time": 5.052087783813477, + "train_loss": 2.5195413763930157, + "train_score": 0.2547043131105869, + "val_loss": 2.5706858183294705, + "val_score": 0.24922413160825568 + }, + { + "epoch": 4, + "grad_norm": 1.2100497484207153, + "learning_rate": 0.1, + "model_norm": 87.48690032958984, + "step_logs": { + "grad_norm": { + "216": 1.5042321681976318, + "217": 1.2972551584243774, + "218": 1.3525184392929077, + "219": 1.4106248617172241, + "220": 1.4683302640914917, + "221": 1.4328974485397339, + "222": 1.5292645692825317, + "223": 1.516752004623413, + "224": 1.3745100498199463, + "225": 1.2629231214523315, + "226": 1.2997370958328247, + "227": 1.344958782196045, + "228": 1.2913954257965088, + "229": 1.1746699810028076, + "230": 1.083894968032837, + "231": 1.1090645790100098, + "232": 1.175661563873291, + "233": 1.2521255016326904, + "234": 1.3336749076843262, + "235": 1.327859878540039, + "236": 1.335525631904602, + "237": 1.4305250644683838, + "238": 1.5687737464904785, + "239": 1.4777119159698486, + "240": 1.6281108856201172, + "241": 1.5883123874664307, + "242": 1.418677568435669, + "243": 1.3770877122879028, + "244": 1.4308468103408813, + "245": 1.418418049812317, + "246": 1.4686918258666992, + "247": 1.4832983016967773, + "248": 1.4652273654937744, + "249": 1.502508282661438, + "250": 1.5151818990707397, + "251": 1.3689600229263306, + "252": 1.2781827449798584, + "253": 1.311661958694458, + "254": 1.373867392539978, + "255": 1.3660823106765747, + "256": 1.3258785009384155, + "257": 1.326951503753662, + "258": 1.3120416402816772, + "259": 1.2513842582702637, + "260": 1.1746522188186646, + "261": 1.2064586877822876, + "262": 1.2606614828109741, + "263": 1.3701711893081665, + "264": 1.5060062408447266, + "265": 1.5568857192993164, + "266": 1.4231266975402832, + "267": 1.240964651107788, + "268": 1.1753987073898315, + "269": 1.2100497484207153 + }, + "loss": { + "216": 2.5180835723876953, + "217": 2.473853588104248, + "218": 2.50290584564209, + "219": 2.495795726776123, + "220": 2.4973506927490234, + "221": 2.5057730674743652, + "222": 2.494767189025879, + "223": 2.513709545135498, + "224": 2.510648250579834, + "225": 2.4886891841888428, + "226": 2.497945547103882, + "227": 2.4738125801086426, + "228": 2.4851808547973633, + "229": 2.4756364822387695, + "230": 2.4734315872192383, + "231": 2.4554872512817383, + "232": 2.4761457443237305, + "233": 2.491023063659668, + "234": 2.4589333534240723, + "235": 2.485600233078003, + "236": 2.488257884979248, + "237": 2.487308979034424, + "238": 2.492017984390259, + "239": 2.5216212272644043, + "240": 2.491151809692383, + "241": 2.5172672271728516, + "242": 2.5080184936523438, + "243": 2.49701189994812, + "244": 2.490177631378174, + "245": 2.506624698638916, + "246": 2.466916561126709, + "247": 2.5057616233825684, + "248": 2.4754409790039062, + "249": 2.4973416328430176, + "250": 2.477790117263794, + "251": 2.482135772705078, + "252": 2.455598831176758, + "253": 2.4782023429870605, + "254": 2.465925693511963, + "255": 2.499709129333496, + "256": 2.4612679481506348, + "257": 2.4860377311706543, + "258": 2.4684667587280273, + "259": 2.4743924140930176, + "260": 2.4582266807556152, + "261": 2.480653762817383, + "262": 2.472898006439209, + "263": 2.4647326469421387, + "264": 2.477093458175659, + "265": 2.497713804244995, + "266": 2.4905996322631836, + "267": 2.4573888778686523, + "268": 2.4505364894866943, + "269": 2.465961217880249 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 1.11286, + 1.47002, + 1.36823, + 1.25426, + 1.15833, + 1.22043, + 1.06676, + 1.09266, + 1.32889, + 1.56033, + 1.47867, + 1.36757, + 1.49018, + 1.79414, + 2.10536, + 1.99629, + 1.79148, + 1.58885, + 1.38244, + 1.4097, + 1.39505, + 1.21545, + 1.01258, + 1.15478, + 0.939793, + 0.997832, + 1.24613, + 1.31673, + 1.21631, + 1.24589, + 1.14365, + 1.13889, + 1.15304, + 1.10623, + 1.07928, + 1.32448, + 1.50304, + 1.44043, + 1.30644, + 1.33948, + 1.40008, + 1.41188, + 1.43394, + 1.58011, + 1.78157, + 1.70428, + 1.556, + 1.31287, + 1.09217, + 1.03046, + 1.22975, + 1.59571, + 1.77374, + 1.68415 + ], + "train_epoch_time": 5.048539161682129, + "train_loss": 2.4629252667748602, + "train_score": 0.27318081959248275, + "val_loss": 2.5018012871287856, + "val_score": 0.26599723774721645 + }, + { + "epoch": 5, + "grad_norm": 1.275873064994812, + "learning_rate": 0.1, + "model_norm": 87.50006103515625, + "step_logs": { + "grad_norm": { + "270": 1.242241382598877, + "271": 1.3752840757369995, + "272": 1.509659767150879, + "273": 1.4997674226760864, + "274": 1.4664418697357178, + "275": 1.4549927711486816, + "276": 1.433301568031311, + "277": 1.4563589096069336, + "278": 1.4142285585403442, + "279": 1.3457815647125244, + "280": 1.1438502073287964, + "281": 1.0439789295196533, + "282": 1.152848243713379, + "283": 1.298545002937317, + "284": 1.3708072900772095, + "285": 1.3351082801818848, + "286": 1.4092931747436523, + "287": 1.5157380104064941, + "288": 1.655364990234375, + "289": 1.6383436918258667, + "290": 1.5117796659469604, + "291": 1.3852317333221436, + "292": 1.2005242109298706, + "293": 1.1524837017059326, + "294": 1.2188464403152466, + "295": 1.2545958757400513, + "296": 1.2708994150161743, + "297": 1.2899991273880005, + "298": 1.3007780313491821, + "299": 1.1948150396347046, + "300": 1.29708993434906, + "301": 1.3692450523376465, + "302": 1.3729547262191772, + "303": 1.4372953176498413, + "304": 1.5518672466278076, + "305": 1.8208012580871582, + "306": 1.7197481393814087, + "307": 1.5619542598724365, + "308": 1.5854787826538086, + "309": 1.5662997961044312, + "310": 1.4740266799926758, + "311": 1.2492971420288086, + "312": 0.9861946105957031, + "313": 1.0496667623519897, + "314": 1.251468539237976, + "315": 1.2535101175308228, + "316": 1.201544165611267, + "317": 1.3078744411468506, + "318": 1.4728519916534424, + "319": 1.5280863046646118, + "320": 1.4423651695251465, + "321": 1.2795406579971313, + "322": 1.2070748805999756, + "323": 1.275873064994812 + }, + "loss": { + "270": 2.451322555541992, + "271": 2.473684787750244, + "272": 2.4785008430480957, + "273": 2.5072813034057617, + "274": 2.485354423522949, + "275": 2.477545738220215, + "276": 2.471564769744873, + "277": 2.4904720783233643, + "278": 2.466644525527954, + "279": 2.4801747798919678, + "280": 2.4546728134155273, + "281": 2.459286689758301, + "282": 2.462512969970703, + "283": 2.4804182052612305, + "284": 2.4487881660461426, + "285": 2.471494197845459, + "286": 2.458620548248291, + "287": 2.4713315963745117, + "288": 2.4676902294158936, + "289": 2.4956986904144287, + "290": 2.4918088912963867, + "291": 2.4724583625793457, + "292": 2.4272124767303467, + "293": 2.4410529136657715, + "294": 2.443024158477783, + "295": 2.470226287841797, + "296": 2.4550609588623047, + "297": 2.455300807952881, + "298": 2.4781670570373535, + "299": 2.4526126384735107, + "300": 2.4411096572875977, + "301": 2.4632813930511475, + "302": 2.444310426712036, + "303": 2.4847426414489746, + "304": 2.4452319145202637, + "305": 2.4813785552978516, + "306": 2.465850830078125, + "307": 2.494922637939453, + "308": 2.47973370552063, + "309": 2.4533133506774902, + "310": 2.4604482650756836, + "311": 2.457676649093628, + "312": 2.421933650970459, + "313": 2.424926996231079, + "314": 2.4268150329589844, + "315": 2.4415690898895264, + "316": 2.435175895690918, + "317": 2.447998046875, + "318": 2.4521968364715576, + "319": 2.4638612270355225, + "320": 2.4494247436523438, + "321": 2.4374911785125732, + "322": 2.437119722366333, + "323": 2.4203295707702637 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 1.5885, + 1.30785, + 1.0875, + 1.11469, + 1.15574, + 1.17031, + 1.20309, + 1.17421, + 1.2333, + 1.36941, + 1.8761, + 2.25645, + 1.85282, + 1.47099, + 1.30316, + 1.38652, + 1.23791, + 1.07568, + 0.90054, + 0.929784, + 1.09028, + 1.2885, + 1.68409, + 1.83784, + 1.64448, + 1.56938, + 1.51999, + 1.47545, + 1.46462, + 1.71802, + 1.45093, + 1.31387, + 1.29671, + 1.20279, + 1.01534, + 0.748459, + 0.833752, + 1.02263, + 0.986471, + 1.00001, + 1.13241, + 1.57468, + 2.49022, + 2.20088, + 1.54952, + 1.55387, + 1.68675, + 1.43113, + 1.13041, + 1.05517, + 1.17737, + 1.48879, + 1.67266, + 1.48682 + ], + "train_epoch_time": 5.05258846282959, + "train_loss": 2.425375849068421, + "train_score": 0.2787907998650796, + "val_loss": 2.4786855774823615, + "val_score": 0.26686280158302406 + }, + { + "epoch": 6, + "grad_norm": 1.60768723487854, + "learning_rate": 0.1, + "model_norm": 87.51543426513672, + "step_logs": { + "grad_norm": { + "324": 1.4600036144256592, + "325": 1.5363696813583374, + "326": 1.5483111143112183, + "327": 1.5883113145828247, + "328": 1.6253679990768433, + "329": 1.6698472499847412, + "330": 1.6124595403671265, + "331": 1.4245474338531494, + "332": 1.2198418378829956, + "333": 1.1659995317459106, + "334": 1.1940242052078247, + "335": 1.3308666944503784, + "336": 1.2919392585754395, + "337": 1.1751962900161743, + "338": 1.243245005607605, + "339": 1.2223283052444458, + "340": 1.3887358903884888, + "341": 1.4822484254837036, + "342": 1.6142373085021973, + "343": 1.8940600156784058, + "344": 2.068099021911621, + "345": 1.7159664630889893, + "346": 1.5897043943405151, + "347": 1.515467882156372, + "348": 1.5037363767623901, + "349": 1.5573487281799316, + "350": 1.685118556022644, + "351": 1.511350154876709, + "352": 1.276816725730896, + "353": 1.2557131052017212, + "354": 1.4487239122390747, + "355": 1.4267280101776123, + "356": 1.3650164604187012, + "357": 1.3793189525604248, + "358": 1.3184151649475098, + "359": 1.4550317525863647, + "360": 1.5756475925445557, + "361": 1.5412311553955078, + "362": 1.458289384841919, + "363": 1.3948017358779907, + "364": 1.2699570655822754, + "365": 1.1540223360061646, + "366": 1.098054051399231, + "367": 1.1575748920440674, + "368": 1.4290499687194824, + "369": 1.671949028968811, + "370": 1.7307555675506592, + "371": 1.6433302164077759, + "372": 1.419509768486023, + "373": 1.2741972208023071, + "374": 1.2830681800842285, + "375": 1.5328922271728516, + "376": 1.5815271139144897, + "377": 1.60768723487854 + }, + "loss": { + "324": 2.423346996307373, + "325": 2.444814682006836, + "326": 2.4383111000061035, + "327": 2.457697868347168, + "328": 2.4347856044769287, + "329": 2.452908992767334, + "330": 2.4509775638580322, + "331": 2.4557056427001953, + "332": 2.405529737472534, + "333": 2.404137372970581, + "334": 2.4043030738830566, + "335": 2.3914694786071777, + "336": 2.403027296066284, + "337": 2.39839506149292, + "338": 2.395979404449463, + "339": 2.422151803970337, + "340": 2.4026143550872803, + "341": 2.438060760498047, + "342": 2.4172914028167725, + "343": 2.4507336616516113, + "344": 2.477540969848633, + "345": 2.4538772106170654, + "346": 2.4470860958099365, + "347": 2.412900447845459, + "348": 2.4140400886535645, + "349": 2.4156644344329834, + "350": 2.4223508834838867, + "351": 2.434015989303589, + "352": 2.38643741607666, + "353": 2.3736932277679443, + "354": 2.421555757522583, + "355": 2.3984429836273193, + "356": 2.381211757659912, + "357": 2.3999085426330566, + "358": 2.3930954933166504, + "359": 2.4029130935668945, + "360": 2.4074549674987793, + "361": 2.4009592533111572, + "362": 2.3978631496429443, + "363": 2.4047818183898926, + "364": 2.3720412254333496, + "365": 2.385714530944824, + "366": 2.3702468872070312, + "367": 2.3664233684539795, + "368": 2.3798351287841797, + "369": 2.4168667793273926, + "370": 2.4074058532714844, + "371": 2.407222270965576, + "372": 2.39224910736084, + "373": 2.3683271408081055, + "374": 2.352229595184326, + "375": 2.413966655731201, + "376": 2.403733968734741, + "377": 2.4112908840179443 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 1.13686, + 1.03575, + 1.01712, + 0.97422, + 0.921632, + 0.879687, + 0.942674, + 1.2101, + 1.6166, + 1.76833, + 1.68641, + 1.35019, + 1.43971, + 1.7366, + 1.55014, + 1.62116, + 1.24579, + 1.10969, + 0.927672, + 0.683138, + 0.579266, + 0.833365, + 0.968315, + 1.05062, + 1.06758, + 0.996013, + 0.853053, + 1.0656, + 1.46384, + 1.50537, + 1.15378, + 1.17828, + 1.27797, + 1.26144, + 1.37675, + 1.13499, + 0.969706, + 1.01076, + 1.12755, + 1.23609, + 1.47077, + 1.79139, + 1.96583, + 1.76601, + 1.16534, + 0.864583, + 0.80367, + 0.891387, + 1.18722, + 1.45871, + 1.42883, + 1.02733, + 0.961021, + 0.932925 + ], + "train_epoch_time": 5.051176071166992, + "train_loss": 2.406868515739824, + "train_score": 0.2787560528114844, + "val_loss": 2.466258319456186, + "val_score": 0.2689168343724668 + }, + { + "epoch": 7, + "grad_norm": 1.1687053442001343, + "learning_rate": 0.1, + "model_norm": 87.53131866455078, + "step_logs": { + "grad_norm": { + "378": 1.60160231590271, + "379": 1.6076996326446533, + "380": 1.6012232303619385, + "381": 1.4979603290557861, + "382": 1.4272422790527344, + "383": 1.2978098392486572, + "384": 1.2874720096588135, + "385": 1.3287444114685059, + "386": 1.381339192390442, + "387": 1.3700239658355713, + "388": 1.3750993013381958, + "389": 1.3893704414367676, + "390": 1.3712728023529053, + "391": 1.4219294786453247, + "392": 1.346638560295105, + "393": 1.288305401802063, + "394": 1.3512741327285767, + "395": 1.440542221069336, + "396": 1.5063265562057495, + "397": 1.649722933769226, + "398": 1.6636254787445068, + "399": 1.612720251083374, + "400": 1.5331531763076782, + "401": 1.2922505140304565, + "402": 1.2506208419799805, + "403": 1.429261565208435, + "404": 1.4418500661849976, + "405": 1.4607679843902588, + "406": 1.4172638654708862, + "407": 1.2512582540512085, + "408": 1.151175856590271, + "409": 1.2085741758346558, + "410": 1.3249741792678833, + "411": 1.4612982273101807, + "412": 1.584967017173767, + "413": 1.5188218355178833, + "414": 1.4100773334503174, + "415": 1.2858773469924927, + "416": 1.2695493698120117, + "417": 1.3973702192306519, + "418": 1.4315433502197266, + "419": 1.4916226863861084, + "420": 1.4846605062484741, + "421": 1.4671449661254883, + "422": 1.4627171754837036, + "423": 1.3842188119888306, + "424": 1.4306995868682861, + "425": 1.3572522401809692, + "426": 1.4067387580871582, + "427": 1.6193788051605225, + "428": 1.754942774772644, + "429": 1.572987675666809, + "430": 1.3550280332565308, + "431": 1.1687053442001343 + }, + "loss": { + "378": 2.411616563796997, + "379": 2.387085437774658, + "380": 2.4100770950317383, + "381": 2.393401622772217, + "382": 2.412733793258667, + "383": 2.3630266189575195, + "384": 2.376993417739868, + "385": 2.3643651008605957, + "386": 2.385039806365967, + "387": 2.3838796615600586, + "388": 2.37530255317688, + "389": 2.382737159729004, + "390": 2.3571596145629883, + "391": 2.377121925354004, + "392": 2.389420509338379, + "393": 2.354663372039795, + "394": 2.387359857559204, + "395": 2.3569040298461914, + "396": 2.397303342819214, + "397": 2.3658015727996826, + "398": 2.425973653793335, + "399": 2.39426851272583, + "400": 2.388237953186035, + "401": 2.3718059062957764, + "402": 2.3537840843200684, + "403": 2.337611675262451, + "404": 2.3659653663635254, + "405": 2.358236074447632, + "406": 2.3582913875579834, + "407": 2.34114933013916, + "408": 2.3399524688720703, + "409": 2.3467700481414795, + "410": 2.363800525665283, + "411": 2.3622684478759766, + "412": 2.374744415283203, + "413": 2.36867618560791, + "414": 2.374596118927002, + "415": 2.3485848903656006, + "416": 2.338998794555664, + "417": 2.3492870330810547, + "418": 2.3476479053497314, + "419": 2.353184938430786, + "420": 2.3702774047851562, + "421": 2.3458361625671387, + "422": 2.3727898597717285, + "423": 2.3481180667877197, + "424": 2.3533499240875244, + "425": 2.3392038345336914, + "426": 2.361246109008789, + "427": 2.372911214828491, + "428": 2.3703529834747314, + "429": 2.3798115253448486, + "430": 2.3412435054779053, + "431": 2.3429765701293945 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 0.940154, + 0.923545, + 0.939999, + 1.06663, + 1.18444, + 1.40296, + 1.43401, + 1.33916, + 1.24996, + 1.27007, + 1.25618, + 1.23435, + 1.25355, + 1.1757, + 1.31762, + 1.4187, + 1.30747, + 1.13577, + 1.05654, + 0.869273, + 0.876547, + 0.920566, + 1.01603, + 1.42032, + 1.50493, + 1.14432, + 1.13807, + 1.10516, + 1.17408, + 1.49532, + 1.76573, + 1.60666, + 1.34647, + 1.10625, + 0.945315, + 1.02681, + 1.19427, + 1.42039, + 1.45121, + 1.20313, + 1.14558, + 1.05764, + 1.07534, + 1.08981, + 1.10902, + 1.22549, + 1.14971, + 1.26983, + 1.1932, + 0.904867, + 0.769639, + 0.961816, + 1.27512, + 1.71537 + ], + "train_epoch_time": 5.059226751327515, + "train_loss": 2.335863566638065, + "train_score": 0.3121558914225618, + "val_loss": 2.3935280300582726, + "val_score": 0.29677633511760065 + }, + { + "epoch": 8, + "grad_norm": 1.1332361698150635, + "learning_rate": 0.1, + "model_norm": 87.5473861694336, + "step_logs": { + "grad_norm": { + "432": 1.137690544128418, + "433": 1.1460480690002441, + "434": 1.2171761989593506, + "435": 1.2076777219772339, + "436": 1.2529245615005493, + "437": 1.402597427368164, + "438": 1.3453058004379272, + "439": 1.212056279182434, + "440": 1.2251746654510498, + "441": 1.2845358848571777, + "442": 1.362449049949646, + "443": 1.6387406587600708, + "444": 1.5799988508224487, + "445": 1.5100924968719482, + "446": 1.6327098608016968, + "447": 1.7873666286468506, + "448": 1.6572623252868652, + "449": 1.3638532161712646, + "450": 1.3508301973342896, + "451": 1.4135549068450928, + "452": 1.4002708196640015, + "453": 1.3939937353134155, + "454": 1.3698304891586304, + "455": 1.2942852973937988, + "456": 1.2993489503860474, + "457": 1.3878233432769775, + "458": 1.3831275701522827, + "459": 1.3291853666305542, + "460": 1.3543205261230469, + "461": 1.428424596786499, + "462": 1.6272356510162354, + "463": 1.7715330123901367, + "464": 1.6866225004196167, + "465": 1.4478378295898438, + "466": 1.2568714618682861, + "467": 1.1030749082565308, + "468": 1.0275148153305054, + "469": 1.0894169807434082, + "470": 1.1351925134658813, + "471": 1.2910327911376953, + "472": 1.4991974830627441, + "473": 1.5773869752883911, + "474": 1.5351835489273071, + "475": 1.4473085403442383, + "476": 1.3482539653778076, + "477": 1.56797194480896, + "478": 1.9412206411361694, + "479": 1.932516098022461, + "480": 1.9513517618179321, + "481": 1.7118409872055054, + "482": 1.7004340887069702, + "483": 1.3866249322891235, + "484": 1.1696233749389648, + "485": 1.1332361698150635 + }, + "loss": { + "432": 2.340517520904541, + "433": 2.3275694847106934, + "434": 2.3364553451538086, + "435": 2.325854539871216, + "436": 2.3437535762786865, + "437": 2.327770233154297, + "438": 2.33870792388916, + "439": 2.305976390838623, + "440": 2.33780574798584, + "441": 2.3219358921051025, + "442": 2.3375911712646484, + "443": 2.3292489051818848, + "444": 2.3892416954040527, + "445": 2.310730457305908, + "446": 2.355114459991455, + "447": 2.349273920059204, + "448": 2.3818507194519043, + "449": 2.348038673400879, + "450": 2.35707426071167, + "451": 2.330489158630371, + "452": 2.3492016792297363, + "453": 2.3373923301696777, + "454": 2.3339121341705322, + "455": 2.3187499046325684, + "456": 2.3144209384918213, + "457": 2.306708335876465, + "458": 2.32818603515625, + "459": 2.320002555847168, + "460": 2.3211567401885986, + "461": 2.336489677429199, + "462": 2.349205493927002, + "463": 2.3457818031311035, + "464": 2.341017961502075, + "465": 2.3531289100646973, + "466": 2.312844753265381, + "467": 2.3081393241882324, + "468": 2.3177995681762695, + "469": 2.3095102310180664, + "470": 2.3065314292907715, + "471": 2.339859962463379, + "472": 2.336761951446533, + "473": 2.347179412841797, + "474": 2.3455986976623535, + "475": 2.3234448432922363, + "476": 2.316662073135376, + "477": 2.340973377227783, + "478": 2.368338108062744, + "479": 2.338229179382324, + "480": 2.3880410194396973, + "481": 2.3419253826141357, + "482": 2.35868763923645, + "483": 2.355233907699585, + "484": 2.3115785121917725, + "485": 2.320404052734375 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 1.80827, + 1.77214, + 1.57707, + 1.59471, + 1.49301, + 1.18324, + 1.29221, + 1.56967, + 1.55744, + 1.40721, + 1.2593, + 0.867352, + 0.957076, + 1.01331, + 0.883474, + 0.735371, + 0.867225, + 1.26232, + 1.29173, + 1.16633, + 1.19811, + 1.20285, + 1.2438, + 1.38418, + 1.37085, + 1.19763, + 1.21701, + 1.31316, + 1.2655, + 1.14512, + 0.887197, + 0.747462, + 0.822941, + 1.12255, + 1.46408, + 1.89693, + 2.19533, + 1.94595, + 1.78987, + 1.40383, + 1.03967, + 0.943343, + 0.995252, + 1.1092, + 1.27444, + 0.952182, + 0.628484, + 0.626096, + 0.627149, + 0.799183, + 0.815738, + 1.22494, + 1.68973, + 1.80685 + ], + "train_epoch_time": 5.050874948501587, + "train_loss": 2.2988176273991763, + "train_score": 0.3208314652251376, + "val_loss": 2.3519796161783, + "val_score": 0.3054813069604705 + }, + { + "epoch": 9, + "grad_norm": 1.2005212306976318, + "learning_rate": 0.1, + "model_norm": 87.5645980834961, + "step_logs": { + "grad_norm": { + "486": 1.1214721202850342, + "487": 1.1258738040924072, + "488": 1.1950767040252686, + "489": 1.3669465780258179, + "490": 1.3901455402374268, + "491": 1.4446160793304443, + "492": 1.5292778015136719, + "493": 1.4820761680603027, + "494": 1.525681972503662, + "495": 1.5819711685180664, + "496": 1.6606371402740479, + "497": 1.845616340637207, + "498": 1.8000943660736084, + "499": 1.6151983737945557, + "500": 1.476933479309082, + "501": 1.3950945138931274, + "502": 1.2935055494308472, + "503": 1.2673839330673218, + "504": 1.3930432796478271, + "505": 1.5287995338439941, + "506": 1.5310273170471191, + "507": 1.36247980594635, + "508": 1.43450927734375, + "509": 1.5258667469024658, + "510": 1.4105826616287231, + "511": 1.3734098672866821, + "512": 1.4327290058135986, + "513": 1.5748199224472046, + "514": 1.6368809938430786, + "515": 1.5906060934066772, + "516": 1.4321568012237549, + "517": 1.4684892892837524, + "518": 1.6119998693466187, + "519": 2.138305187225342, + "520": 3.083324432373047, + "521": 2.522343873977661, + "522": 1.7955716848373413, + "523": 1.698992371559143, + "524": 1.6268047094345093, + "525": 1.5548611879348755, + "526": 1.5203980207443237, + "527": 1.4332115650177002, + "528": 1.3108603954315186, + "529": 1.2389236688613892, + "530": 1.1775331497192383, + "531": 1.1402958631515503, + "532": 1.1709182262420654, + "533": 1.1948164701461792, + "534": 1.3235933780670166, + "535": 1.3287609815597534, + "536": 1.1896299123764038, + "537": 1.1644668579101562, + "538": 1.1485133171081543, + "539": 1.2005212306976318 + }, + "loss": { + "486": 2.306220531463623, + "487": 2.2960259914398193, + "488": 2.3082311153411865, + "489": 2.3108201026916504, + "490": 2.3137893676757812, + "491": 2.3080358505249023, + "492": 2.3295693397521973, + "493": 2.3241429328918457, + "494": 2.3161849975585938, + "495": 2.3040428161621094, + "496": 2.326857805252075, + "497": 2.3326635360717773, + "498": 2.339661121368408, + "499": 2.3283119201660156, + "500": 2.321657180786133, + "501": 2.2988147735595703, + "502": 2.280843734741211, + "503": 2.2978503704071045, + "504": 2.2929351329803467, + "505": 2.3016517162323, + "506": 2.3221685886383057, + "507": 2.281674861907959, + "508": 2.2965335845947266, + "509": 2.30495548248291, + "510": 2.286433696746826, + "511": 2.297111988067627, + "512": 2.2914953231811523, + "513": 2.3124561309814453, + "514": 2.3285834789276123, + "515": 2.30527400970459, + "516": 2.286728858947754, + "517": 2.2645764350891113, + "518": 2.301481246948242, + "519": 2.3149728775024414, + "520": 2.3718090057373047, + "521": 2.4531021118164062, + "522": 2.3675405979156494, + "523": 2.3290956020355225, + "524": 2.339332103729248, + "525": 2.3267135620117188, + "526": 2.2976150512695312, + "527": 2.2909488677978516, + "528": 2.300706386566162, + "529": 2.2797560691833496, + "530": 2.287539482116699, + "531": 2.2514655590057373, + "532": 2.250145435333252, + "533": 2.248020648956299, + "534": 2.2692179679870605, + "535": 2.2780089378356934, + "536": 2.278042793273926, + "537": 2.2823715209960938, + "538": 2.2705466747283936, + "539": 2.252415180206299 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 1.83368, + 1.81133, + 1.61617, + 1.2367, + 1.1973, + 1.10596, + 0.9961, + 1.05809, + 0.995051, + 0.920648, + 0.843763, + 0.684809, + 0.722042, + 0.892461, + 1.06433, + 1.18113, + 1.3632, + 1.43056, + 1.18158, + 0.984778, + 0.990667, + 1.22912, + 1.116, + 0.989987, + 1.14911, + 1.21782, + 1.11633, + 0.93242, + 0.869076, + 0.911166, + 1.11489, + 1.05014, + 0.885681, + 0.506298, + 0.249483, + 0.385573, + 0.734331, + 0.806872, + 0.883936, + 0.962409, + 0.993946, + 1.11531, + 1.3389, + 1.48525, + 1.64977, + 1.73153, + 1.64118, + 1.5747, + 1.29529, + 1.29021, + 1.60967, + 1.68319, + 1.72131, + 1.56282 + ], + "train_epoch_time": 5.052049875259399, + "train_loss": 2.265104422849767, + "train_score": 0.3293994351644256, + "val_loss": 2.322902155250961, + "val_score": 0.31362568141398556 + }, + { + "epoch": 10, + "grad_norm": 1.6595216989517212, + "learning_rate": 0.1, + "model_norm": 87.58207702636719, + "step_logs": { + "grad_norm": { + "540": 1.2803752422332764, + "541": 1.3663889169692993, + "542": 1.4194055795669556, + "543": 1.4473828077316284, + "544": 1.4625210762023926, + "545": 1.3707042932510376, + "546": 1.3922961950302124, + "547": 1.317380428314209, + "548": 1.3131691217422485, + "549": 1.1916605234146118, + "550": 1.1941823959350586, + "551": 1.1862330436706543, + "552": 1.2014741897583008, + "553": 1.2803758382797241, + "554": 1.159397840499878, + "555": 1.0783162117004395, + "556": 1.096177577972412, + "557": 1.158869743347168, + "558": 1.2966809272766113, + "559": 1.3969601392745972, + "560": 1.4888845682144165, + "561": 1.499085783958435, + "562": 1.4343029260635376, + "563": 1.3455581665039062, + "564": 1.2944754362106323, + "565": 1.2354768514633179, + "566": 1.2829288244247437, + "567": 1.3580394983291626, + "568": 1.3430910110473633, + "569": 1.4303832054138184, + "570": 1.8011568784713745, + "571": 1.9026520252227783, + "572": 1.9378623962402344, + "573": 1.789549469947815, + "574": 1.6741491556167603, + "575": 1.739890456199646, + "576": 1.5230759382247925, + "577": 1.397168517112732, + "578": 1.2681032419204712, + "579": 1.2249735593795776, + "580": 1.165692687034607, + "581": 1.1072674989700317, + "582": 1.1658586263656616, + "583": 1.1926738023757935, + "584": 1.202600121498108, + "585": 1.3199353218078613, + "586": 1.439717173576355, + "587": 1.5407600402832031, + "588": 1.529766321182251, + "589": 1.4613008499145508, + "590": 1.5500118732452393, + "591": 1.7532572746276855, + "592": 1.8600577116012573, + "593": 1.6595216989517212 + }, + "loss": { + "540": 2.270373582839966, + "541": 2.2558369636535645, + "542": 2.2684831619262695, + "543": 2.2699015140533447, + "544": 2.2554569244384766, + "545": 2.2837038040161133, + "546": 2.273789644241333, + "547": 2.267402172088623, + "548": 2.245145320892334, + "549": 2.261744737625122, + "550": 2.2455899715423584, + "551": 2.2805604934692383, + "552": 2.222210168838501, + "553": 2.269192695617676, + "554": 2.252525806427002, + "555": 2.2254936695098877, + "556": 2.2567338943481445, + "557": 2.2426486015319824, + "558": 2.2250969409942627, + "559": 2.248084783554077, + "560": 2.26503324508667, + "561": 2.269549608230591, + "562": 2.265213966369629, + "563": 2.2614564895629883, + "564": 2.245302677154541, + "565": 2.2372922897338867, + "566": 2.250276803970337, + "567": 2.260746479034424, + "568": 2.25730037689209, + "569": 2.250591516494751, + "570": 2.276700019836426, + "571": 2.3106374740600586, + "572": 2.295170307159424, + "573": 2.2932000160217285, + "574": 2.2557008266448975, + "575": 2.2844595909118652, + "576": 2.2920069694519043, + "577": 2.2681570053100586, + "578": 2.243772268295288, + "579": 2.2578420639038086, + "580": 2.2256407737731934, + "581": 2.266733169555664, + "582": 2.230048418045044, + "583": 2.222313165664673, + "584": 2.2402119636535645, + "585": 2.248361587524414, + "586": 2.268207550048828, + "587": 2.231321096420288, + "588": 2.2259740829467773, + "589": 2.27091121673584, + "590": 2.2529563903808594, + "591": 2.266594648361206, + "592": 2.276758909225464, + "593": 2.2648444175720215 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 1.38491, + 1.20826, + 1.12596, + 1.08353, + 1.05446, + 1.21549, + 1.17297, + 1.30649, + 1.30198, + 1.59272, + 1.57467, + 1.6207, + 1.53942, + 1.38419, + 1.67573, + 1.91397, + 1.8781, + 1.66991, + 1.32337, + 1.15198, + 1.02177, + 1.00992, + 1.1011, + 1.24906, + 1.33995, + 1.46573, + 1.3672, + 1.22582, + 1.25135, + 1.1, + 0.701783, + 0.638283, + 0.61118, + 0.716068, + 0.80481, + 0.75464, + 0.988036, + 1.16192, + 1.39531, + 1.50467, + 1.6379, + 1.84882, + 1.64067, + 1.56229, + 1.54898, + 1.29051, + 1.09428, + 0.939923, + 0.951196, + 1.06346, + 0.937741, + 0.737365, + 0.658058, + 0.82238 + ], + "train_epoch_time": 5.051379680633545, + "train_loss": 2.2414143720350443, + "train_score": 0.34328035323144374, + "val_loss": 2.300314697414534, + "val_score": 0.3277707022243745 + }, + { + "epoch": 11, + "grad_norm": 1.422560214996338, + "learning_rate": 0.1, + "model_norm": 87.59950256347656, + "step_logs": { + "grad_norm": { + "594": 1.4724085330963135, + "595": 1.5659208297729492, + "596": 1.5948623418807983, + "597": 1.5096204280853271, + "598": 1.476780652999878, + "599": 1.4445205926895142, + "600": 1.2030155658721924, + "601": 1.1663322448730469, + "602": 1.2035514116287231, + "603": 1.3046060800552368, + "604": 1.2823153734207153, + "605": 1.29511296749115, + "606": 1.437174916267395, + "607": 1.6487631797790527, + "608": 1.6355232000350952, + "609": 1.4788060188293457, + "610": 1.3755265474319458, + "611": 1.2401158809661865, + "612": 1.067068338394165, + "613": 1.0521490573883057, + "614": 1.1348017454147339, + "615": 1.1846015453338623, + "616": 1.2649641036987305, + "617": 1.2051887512207031, + "618": 1.0959563255310059, + "619": 1.1193002462387085, + "620": 1.0330448150634766, + "621": 0.9784910082817078, + "622": 0.9489515423774719, + "623": 0.9668910503387451, + "624": 1.2256289720535278, + "625": 1.2776364088058472, + "626": 1.3848919868469238, + "627": 1.5670509338378906, + "628": 1.620158076286316, + "629": 1.6570059061050415, + "630": 1.6332999467849731, + "631": 1.572061538696289, + "632": 1.6720561981201172, + "633": 1.7698657512664795, + "634": 1.7282272577285767, + "635": 1.7018442153930664, + "636": 1.6266013383865356, + "637": 1.6040335893630981, + "638": 1.6183538436889648, + "639": 1.5059492588043213, + "640": 1.2513744831085205, + "641": 1.1860790252685547, + "642": 1.1543378829956055, + "643": 1.2302004098892212, + "644": 1.3721766471862793, + "645": 1.4554718732833862, + "646": 1.433977484703064, + "647": 1.422560214996338 + }, + "loss": { + "594": 2.259866237640381, + "595": 2.2519421577453613, + "596": 2.2438693046569824, + "597": 2.2767887115478516, + "598": 2.218242645263672, + "599": 2.2393274307250977, + "600": 2.238276720046997, + "601": 2.220353603363037, + "602": 2.2341716289520264, + "603": 2.227184772491455, + "604": 2.2201545238494873, + "605": 2.226320266723633, + "606": 2.253817558288574, + "607": 2.2502970695495605, + "608": 2.259983539581299, + "609": 2.2395195960998535, + "610": 2.216625928878784, + "611": 2.2334721088409424, + "612": 2.201089382171631, + "613": 2.207909345626831, + "614": 2.2100014686584473, + "615": 2.1870317459106445, + "616": 2.2290401458740234, + "617": 2.194772243499756, + "618": 2.193455219268799, + "619": 2.2254507541656494, + "620": 2.201402187347412, + "621": 2.2043111324310303, + "622": 2.204188346862793, + "623": 2.2055840492248535, + "624": 2.2074508666992188, + "625": 2.2152113914489746, + "626": 2.2013015747070312, + "627": 2.248645305633545, + "628": 2.228558301925659, + "629": 2.235501766204834, + "630": 2.21940279006958, + "631": 2.2460076808929443, + "632": 2.223534107208252, + "633": 2.2548344135284424, + "634": 2.2394323348999023, + "635": 2.25168776512146, + "636": 2.2220373153686523, + "637": 2.2316551208496094, + "638": 2.232147216796875, + "639": 2.23519229888916, + "640": 2.220662832260132, + "641": 2.211493730545044, + "642": 2.212052822113037, + "643": 2.176553964614868, + "644": 2.201409339904785, + "645": 2.223477840423584, + "646": 2.2088513374328613, + "647": 2.213635206222534 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 1.04238, + 0.91837, + 0.882168, + 0.99905, + 1.01713, + 1.07317, + 1.54658, + 1.63222, + 1.54237, + 1.30857, + 1.35019, + 1.32731, + 1.09119, + 0.827796, + 0.844874, + 1.02408, + 1.17153, + 1.4523, + 1.9331, + 1.99447, + 1.71614, + 1.55851, + 1.39303, + 1.51105, + 1.82617, + 1.77633, + 2.06282, + 2.30229, + 2.44771, + 2.35922, + 1.46951, + 1.35706, + 1.14775, + 0.915703, + 0.849003, + 0.814192, + 0.831963, + 0.908808, + 0.79532, + 0.719837, + 0.749784, + 0.777443, + 0.839826, + 0.867362, + 0.852267, + 0.985585, + 1.4181, + 1.57202, + 1.66008, + 1.43819, + 1.16918, + 1.0496, + 1.07419, + 1.09387 + ], + "train_epoch_time": 5.051513433456421, + "train_loss": 2.2047455189046765, + "train_score": 0.35434899569583933, + "val_loss": 2.2707991964097136, + "val_score": 0.33801395748020174 + }, + { + "epoch": 12, + "grad_norm": 0.9680524468421936, + "learning_rate": 0.1, + "model_norm": 87.61489868164062, + "step_logs": { + "grad_norm": { + "648": 1.3333641290664673, + "649": 1.31859290599823, + "650": 1.3806748390197754, + "651": 1.347794532775879, + "652": 1.4184019565582275, + "653": 1.4079928398132324, + "654": 1.4346829652786255, + "655": 1.4215614795684814, + "656": 1.3713256120681763, + "657": 1.4001219272613525, + "658": 1.3395192623138428, + "659": 1.3574950695037842, + "660": 1.399519681930542, + "661": 1.3149547576904297, + "662": 1.1317532062530518, + "663": 0.9868398308753967, + "664": 0.9376863837242126, + "665": 0.9813666343688965, + "666": 1.069417119026184, + "667": 1.1566966772079468, + "668": 1.1301541328430176, + "669": 1.096773624420166, + "670": 1.13601553440094, + "671": 1.1344237327575684, + "672": 1.1862298250198364, + "673": 1.1706833839416504, + "674": 1.3028088808059692, + "675": 1.3327603340148926, + "676": 1.2247729301452637, + "677": 1.130468487739563, + "678": 1.0298511981964111, + "679": 1.0488574504852295, + "680": 1.07109797000885, + "681": 1.0068838596343994, + "682": 0.9685542583465576, + "683": 0.9551768898963928, + "684": 0.9691235423088074, + "685": 0.9821853637695312, + "686": 0.8839348554611206, + "687": 0.8862894773483276, + "688": 0.8559242486953735, + "689": 0.8531018495559692, + "690": 0.8814327120780945, + "691": 0.9617823958396912, + "692": 0.9558363556861877, + "693": 0.9620358943939209, + "694": 0.8950363993644714, + "695": 0.8203489780426025, + "696": 0.8070682287216187, + "697": 0.859437882900238, + "698": 0.9139648675918579, + "699": 0.855360746383667, + "700": 0.8960896730422974, + "701": 0.9680524468421936 + }, + "loss": { + "648": 2.224252462387085, + "649": 2.2232770919799805, + "650": 2.2095859050750732, + "651": 2.213632583618164, + "652": 2.1954970359802246, + "653": 2.1968834400177, + "654": 2.2026238441467285, + "655": 2.212841033935547, + "656": 2.196122884750366, + "657": 2.197126626968384, + "658": 2.2139625549316406, + "659": 2.180508852005005, + "660": 2.183412551879883, + "661": 2.1939024925231934, + "662": 2.178495407104492, + "663": 2.1660470962524414, + "664": 2.1710925102233887, + "665": 2.1895346641540527, + "666": 2.170623302459717, + "667": 2.1945128440856934, + "668": 2.1607513427734375, + "669": 2.1708621978759766, + "670": 2.1882219314575195, + "671": 2.169952869415283, + "672": 2.147392511367798, + "673": 2.1595332622528076, + "674": 2.1798410415649414, + "675": 2.178086042404175, + "676": 2.1641979217529297, + "677": 2.1711418628692627, + "678": 2.172391891479492, + "679": 2.133408546447754, + "680": 2.143892765045166, + "681": 2.1644768714904785, + "682": 2.1919074058532715, + "683": 2.1479907035827637, + "684": 2.1551153659820557, + "685": 2.15683650970459, + "686": 2.1568961143493652, + "687": 2.158215045928955, + "688": 2.151196241378784, + "689": 2.1430177688598633, + "690": 2.1212120056152344, + "691": 2.1417641639709473, + "692": 2.1535301208496094, + "693": 2.1516737937927246, + "694": 2.1559958457946777, + "695": 2.1714184284210205, + "696": 2.147042751312256, + "697": 2.1262757778167725, + "698": 2.1543636322021484, + "699": 2.1321985721588135, + "700": 2.1494712829589844, + "701": 2.149899959564209 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 1.25108, + 1.27871, + 1.15912, + 1.21859, + 1.09127, + 1.10817, + 1.07011, + 1.09501, + 1.16782, + 1.12079, + 1.23388, + 1.18326, + 1.11475, + 1.26881, + 1.7008, + 2.2242, + 2.46924, + 2.27347, + 1.89797, + 1.64021, + 1.69172, + 1.80467, + 1.6956, + 1.68616, + 1.52607, + 1.57573, + 1.28429, + 1.22623, + 1.44273, + 1.69891, + 2.04828, + 1.93928, + 1.86872, + 2.13498, + 2.33655, + 2.35432, + 2.29463, + 2.23579, + 2.76051, + 2.74754, + 2.93636, + 2.94458, + 2.73027, + 2.31536, + 2.35713, + 2.32484, + 2.69133, + 3.22661, + 3.29625, + 2.87866, + 2.57905, + 2.91426, + 2.67688, + 2.29414 + ], + "train_epoch_time": 5.059179306030273, + "train_loss": 2.141586204375564, + "train_score": 0.36812791427546626, + "val_loss": 2.216165391777468, + "val_score": 0.34919453190744404 + }, + { + "epoch": 13, + "grad_norm": 0.6713171005249023, + "learning_rate": 0.06666666666666668, + "model_norm": 87.6242904663086, + "step_logs": { + "grad_norm": { + "702": 0.9447621703147888, + "703": 0.8605120182037354, + "704": 0.8270865082740784, + "705": 0.8139187097549438, + "706": 0.9207656383514404, + "707": 0.9539266228675842, + "708": 0.9187578558921814, + "709": 0.8544454574584961, + "710": 0.8573761582374573, + "711": 0.8325833678245544, + "712": 0.7487674951553345, + "713": 0.8300611972808838, + "714": 0.9403583407402039, + "715": 0.9269595742225647, + "716": 0.8547358512878418, + "717": 0.8651770353317261, + "718": 0.9049345254898071, + "719": 0.8973135948181152, + "720": 0.9019340872764587, + "721": 0.8226966857910156, + "722": 0.7691552042961121, + "723": 0.6991496086120605, + "724": 0.6866421699523926, + "725": 0.7393158078193665, + "726": 0.79970782995224, + "727": 0.7658272981643677, + "728": 0.7023969888687134, + "729": 0.6750680804252625, + "730": 0.7801414728164673, + "731": 0.7106881737709045, + "732": 0.703877329826355, + "733": 0.6623641848564148, + "734": 0.6847352385520935, + "735": 0.7203131318092346, + "736": 0.6406399607658386, + "737": 0.6447098851203918, + "738": 0.6694806218147278, + "739": 0.6632204651832581, + "740": 0.7323748469352722, + "741": 0.8258577585220337, + "742": 0.7254605293273926, + "743": 0.6315357685089111, + "744": 0.7073687314987183, + "745": 0.7020779848098755, + "746": 0.7511247992515564, + "747": 0.7816352844238281, + "748": 0.7458645105361938, + "749": 0.7709537148475647, + "750": 0.7125895023345947, + "751": 0.6858197450637817, + "752": 0.747973620891571, + "753": 0.7492133378982544, + "754": 0.716821014881134, + "755": 0.6713171005249023 + }, + "loss": { + "702": 2.1394238471984863, + "703": 2.135296583175659, + "704": 2.1354780197143555, + "705": 2.1325507164001465, + "706": 2.143461227416992, + "707": 2.122426986694336, + "708": 2.127103805541992, + "709": 2.1159729957580566, + "710": 2.1261091232299805, + "711": 2.1214585304260254, + "712": 2.105083703994751, + "713": 2.128053903579712, + "714": 2.1445674896240234, + "715": 2.128133773803711, + "716": 2.1315219402313232, + "717": 2.137157678604126, + "718": 2.13249135017395, + "719": 2.1486611366271973, + "720": 2.133594512939453, + "721": 2.119948148727417, + "722": 2.126617431640625, + "723": 2.13726806640625, + "724": 2.1391525268554688, + "725": 2.1157310009002686, + "726": 2.1103405952453613, + "727": 2.1100518703460693, + "728": 2.140583038330078, + "729": 2.1129374504089355, + "730": 2.1272308826446533, + "731": 2.1331515312194824, + "732": 2.1278982162475586, + "733": 2.1136536598205566, + "734": 2.1283087730407715, + "735": 2.1350760459899902, + "736": 2.106642246246338, + "737": 2.11907958984375, + "738": 2.1068787574768066, + "739": 2.1340057849884033, + "740": 2.1070449352264404, + "741": 2.104229688644409, + "742": 2.123256206512451, + "743": 2.114095449447632, + "744": 2.12141752243042, + "745": 2.108335494995117, + "746": 2.1152632236480713, + "747": 2.104945182800293, + "748": 2.1202139854431152, + "749": 2.120616912841797, + "750": 2.1229805946350098, + "751": 2.098238229751587, + "752": 2.1216821670532227, + "753": 2.111076831817627, + "754": 2.0979056358337402, + "755": 2.1012048721313477 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 2.39691, + 2.88366, + 3.12171, + 3.21912, + 2.52823, + 2.3324, + 2.51992, + 2.89829, + 2.8923, + 3.06041, + 3.7547, + 3.08861, + 2.42523, + 2.47672, + 2.9176, + 2.85513, + 2.60407, + 2.66858, + 2.62278, + 3.13217, + 3.59469, + 4.37239, + 4.53713, + 3.8708, + 3.29982, + 3.59775, + 4.33877, + 4.63651, + 3.49517, + 4.22341, + 4.29494, + 4.8177, + 4.5393, + 4.11501, + 5.1329, + 5.09822, + 4.70071, + 4.85154, + 3.92832, + 3.0852, + 4.03436, + 5.30064, + 4.23969, + 4.27729, + 3.74921, + 3.44534, + 3.81118, + 3.56784, + 4.18088, + 4.46103, + 3.79234, + 3.76091, + 4.08286, + 4.66244 + ], + "train_epoch_time": 5.051976680755615, + "train_loss": 2.105823877928418, + "train_score": 0.37626434707402157, + "val_loss": 2.1842726755087467, + "val_score": 0.3553656000220543 + }, + { + "epoch": 14, + "grad_norm": 0.6120442152023315, + "learning_rate": 0.03333333333333334, + "model_norm": 87.62741088867188, + "step_logs": { + "grad_norm": { + "756": 0.6779073476791382, + "757": 0.6146882176399231, + "758": 0.7082776427268982, + "759": 0.6646072268486023, + "760": 0.6687437891960144, + "761": 0.6980213522911072, + "762": 0.6405911445617676, + "763": 0.6580644845962524, + "764": 0.666760265827179, + "765": 0.6627838611602783, + "766": 0.6748723387718201, + "767": 0.6789063215255737, + "768": 0.6050003170967102, + "769": 0.6512197256088257, + "770": 0.6254962086677551, + "771": 0.6485661268234253, + "772": 0.6217379570007324, + "773": 0.620040774345398, + "774": 0.6436275243759155, + "775": 0.6680901646614075, + "776": 0.6732286214828491, + "777": 0.6128937005996704, + "778": 0.6669917702674866, + "779": 0.6437169909477234, + "780": 0.6341109871864319, + "781": 0.6595199704170227, + "782": 0.6319490075111389, + "783": 0.6454424262046814, + "784": 0.6392427682876587, + "785": 0.6286539435386658, + "786": 0.7049348950386047, + "787": 0.572549045085907, + "788": 0.6379544734954834, + "789": 0.6582005620002747, + "790": 0.6561388373374939, + "791": 0.7041144967079163, + "792": 0.6080366373062134, + "793": 0.6152358055114746, + "794": 0.6456167101860046, + "795": 0.625890851020813, + "796": 0.6409896016120911, + "797": 0.6351111531257629, + "798": 0.5750425457954407, + "799": 0.5945930480957031, + "800": 0.6238871216773987, + "801": 0.6249281167984009, + "802": 0.6484242081642151, + "803": 0.5904596447944641, + "804": 0.6446259617805481, + "805": 0.6036942005157471, + "806": 0.600235104560852, + "807": 0.6659184098243713, + "808": 0.6049234867095947, + "809": 0.6120442152023315 + }, + "loss": { + "756": 2.091010332107544, + "757": 2.098677635192871, + "758": 2.1143531799316406, + "759": 2.0961642265319824, + "760": 2.0748157501220703, + "761": 2.1173954010009766, + "762": 2.1137185096740723, + "763": 2.102023124694824, + "764": 2.1141042709350586, + "765": 2.094511032104492, + "766": 2.1117911338806152, + "767": 2.1226460933685303, + "768": 2.097991466522217, + "769": 2.0794951915740967, + "770": 2.1216061115264893, + "771": 2.1137049198150635, + "772": 2.1160500049591064, + "773": 2.096975803375244, + "774": 2.1074235439300537, + "775": 2.1048173904418945, + "776": 2.102797746658325, + "777": 2.1090340614318848, + "778": 2.083104133605957, + "779": 2.1024649143218994, + "780": 2.0781493186950684, + "781": 2.08461332321167, + "782": 2.1110751628875732, + "783": 2.1057887077331543, + "784": 2.119384765625, + "785": 2.0936455726623535, + "786": 2.097327709197998, + "787": 2.0956673622131348, + "788": 2.131375789642334, + "789": 2.0970778465270996, + "790": 2.095752716064453, + "791": 2.110517978668213, + "792": 2.1039462089538574, + "793": 2.096714496612549, + "794": 2.0943994522094727, + "795": 2.1116466522216797, + "796": 2.0844802856445312, + "797": 2.1108970642089844, + "798": 2.096395492553711, + "799": 2.086451768875122, + "800": 2.106167793273926, + "801": 2.1052422523498535, + "802": 2.077186107635498, + "803": 2.081002712249756, + "804": 2.0980730056762695, + "805": 2.093629837036133, + "806": 2.09326171875, + "807": 2.1027448177337646, + "808": 2.077929735183716, + "809": 2.0949196815490723 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 4.55004, + 5.55438, + 4.21474, + 4.74564, + 4.63938, + 4.34575, + 5.15093, + 4.85401, + 4.7554, + 4.76803, + 4.63669, + 4.6053, + 5.73182, + 4.90346, + 5.4227, + 5.025, + 5.47408, + 5.45447, + 5.08724, + 4.71568, + 4.63951, + 5.61453, + 4.68242, + 5.07386, + 5.16828, + 4.79258, + 5.28615, + 5.05475, + 5.18654, + 5.29761, + 4.22054, + 6.39289, + 5.23698, + 4.84059, + 4.86798, + 4.25699, + 5.69082, + 5.53932, + 5.0247, + 5.39044, + 5.07336, + 5.2332, + 6.33977, + 5.90159, + 5.41104, + 5.39066, + 4.94034, + 5.96887, + 5.049, + 5.74468, + 5.81006, + 4.74181, + 5.67845, + 5.59245 + ], + "train_epoch_time": 5.0515358448028564, + "train_loss": 2.0949813161382034, + "train_score": 0.3791775018361583, + "val_loss": 2.1772965982505017, + "val_score": 0.35670206624796413 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:20:11.537013", + "final_model_norm": 87.62741088867188, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:18:26.809531", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 5.927224159240723, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.43559265136719, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 8.492406845092773, + "3": 5.133657455444336, + "4": 4.355436325073242, + "5": 8.350513458251953, + "6": 20.943735122680664, + "7": 8.886734962463379, + "8": 4.939404487609863, + "9": 2.7346272468566895, + "10": 2.2990474700927734, + "11": 2.869755983352661, + "12": 5.0551557540893555, + "13": 6.891150951385498, + "14": 39.990028381347656, + "15": 6.289729595184326, + "16": 7.449644088745117, + "17": 5.954482555389404, + "18": 6.2183146476745605, + "19": 11.386857032775879, + "20": 6.120622158050537, + "21": 46.00760269165039, + "22": 5.337105751037598, + "23": 4.709524631500244, + "24": 4.025487899780273, + "25": 6.538298606872559, + "26": 3.3507795333862305, + "27": 36.01987075805664, + "28": 2.523289680480957, + "29": 2.9157509803771973, + "30": 4.439497947692871, + "31": 6.468553066253662, + "32": 4.940361022949219, + "33": 3.9475128650665283, + "34": 3.7959046363830566, + "35": 2.856990098953247, + "36": 2.2356903553009033, + "37": 3.854083299636841, + "38": 2.8693065643310547, + "39": 4.858524322509766, + "40": 3.169224500656128, + "41": 13.64751148223877, + "42": 2.795562267303467, + "43": 6.008693218231201, + "44": 3.218686819076538, + "45": 5.200477123260498, + "46": 4.667453765869141, + "47": 3.983552932739258, + "48": 7.1761393547058105, + "49": 4.028877258300781, + "50": 4.500582695007324, + "51": 4.383876800537109, + "52": 4.444519996643066, + "53": 5.927224159240723 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.9338831901550293, + "3": 3.7350575923919678, + "4": 3.591062068939209, + "5": 3.5876054763793945, + "6": 4.030679702758789, + "7": 4.174951553344727, + "8": 3.606139898300171, + "9": 3.4460549354553223, + "10": 3.3512208461761475, + "11": 3.330306053161621, + "12": 3.2731595039367676, + "13": 3.400935649871826, + "14": 4.000199317932129, + "15": 3.4472057819366455, + "16": 3.329242706298828, + "17": 3.1616313457489014, + "18": 3.22605037689209, + "19": 3.235077142715454, + "20": 3.2256669998168945, + "21": 4.876416206359863, + "22": 3.360476016998291, + "23": 3.0198841094970703, + "24": 2.9926400184631348, + "25": 3.08091402053833, + "26": 2.98525333404541, + "27": 3.5717737674713135, + "28": 2.849923849105835, + "29": 2.8644518852233887, + "30": 2.9232075214385986, + "31": 3.096769332885742, + "32": 3.514759063720703, + "33": 3.031528949737549, + "34": 2.908228874206543, + "35": 2.935027599334717, + "36": 2.764258861541748, + "37": 2.795379638671875, + "38": 2.8013205528259277, + "39": 2.8269448280334473, + "40": 2.781714916229248, + "41": 3.175449848175049, + "42": 2.7635140419006348, + "43": 3.0239179134368896, + "44": 2.9823057651519775, + "45": 2.937479019165039, + "46": 3.2030391693115234, + "47": 2.9475035667419434, + "48": 3.015583038330078, + "49": 2.857910394668579, + "50": 2.9124820232391357, + "51": 2.893673896789551, + "52": 3.0640993118286133, + "53": 2.9861316680908203 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 0.00874362, + 0.00824185, + 0.0545456, + 0.141724, + 0.189304, + 0.0514492, + 0.00918904, + 0.0528648, + 0.147806, + 0.460814, + 0.634026, + 0.404384, + 0.128085, + 0.0716168, + 0.00250137, + 0.0871371, + 0.0599894, + 0.0891709, + 0.0834307, + 0.0249504, + 0.086105, + 0.00230378, + 0.117975, + 0.136156, + 0.184679, + 0.0720693, + 0.265882, + 0.00275296, + 0.447609, + 0.336931, + 0.148317, + 0.0740107, + 0.144005, + 0.194543, + 0.201836, + 0.359579, + 0.553039, + 0.188191, + 0.340258, + 0.119759, + 0.276953, + 0.017049, + 0.353609, + 0.0837548, + 0.287869, + 0.108615, + 0.147029, + 0.185743, + 0.0585584, + 0.176068, + 0.143789, + 0.150568, + 0.155115, + 0.0849975 + ], + "train_epoch_time": 5.055506944656372, + "train_loss": 2.971780118709657, + "train_score": 0.15369776722091855, + "val_loss": 2.9890767000572827, + "val_score": 0.15274289626752194 + }, + { + "epoch": 1, + "grad_norm": 1.6106148958206177, + "learning_rate": 0.1, + "model_norm": 87.45330047607422, + "step_logs": { + "grad_norm": { + "54": 5.978774070739746, + "55": 8.950082778930664, + "56": 6.119593143463135, + "57": 31.461462020874023, + "58": 3.1882057189941406, + "59": 10.551898956298828, + "60": 5.144927978515625, + "61": 2.8061091899871826, + "62": 3.760117530822754, + "63": 2.740030527114868, + "64": 4.894859790802002, + "65": 2.3066086769104004, + "66": 2.3835325241088867, + "67": 3.767838478088379, + "68": 2.745457649230957, + "69": 1.503357172012329, + "70": 1.1944791078567505, + "71": 1.1905282735824585, + "72": 1.7554534673690796, + "73": 2.110347032546997, + "74": 2.3435840606689453, + "75": 1.9726920127868652, + "76": 1.3262838125228882, + "77": 1.469195008277893, + "78": 2.046281337738037, + "79": 1.8338699340820312, + "80": 1.2676805257797241, + "81": 1.3730958700180054, + "82": 1.7932828664779663, + "83": 1.8086676597595215, + "84": 1.7238720655441284, + "85": 1.694278359413147, + "86": 1.600956916809082, + "87": 1.5541541576385498, + "88": 1.527895212173462, + "89": 1.6551250219345093, + "90": 1.901789903640747, + "91": 1.7149564027786255, + "92": 1.185032844543457, + "93": 1.265830636024475, + "94": 1.6216756105422974, + "95": 1.6631293296813965, + "96": 1.5494980812072754, + "97": 1.5495703220367432, + "98": 1.5657968521118164, + "99": 1.5824859142303467, + "100": 1.6045254468917847, + "101": 1.5318145751953125, + "102": 1.490466833114624, + "103": 1.5522249937057495, + "104": 1.713820219039917, + "105": 1.7040716409683228, + "106": 1.5766328573226929, + "107": 1.6106148958206177 + }, + "loss": { + "54": 2.9796619415283203, + "55": 2.897850275039673, + "56": 2.892820358276367, + "57": 4.205759048461914, + "58": 2.8588154315948486, + "59": 3.3858137130737305, + "60": 3.0331780910491943, + "61": 2.7692489624023438, + "62": 2.8786139488220215, + "63": 2.830838203430176, + "64": 2.910327196121216, + "65": 2.757439613342285, + "66": 2.7358312606811523, + "67": 2.7778244018554688, + "68": 3.032517433166504, + "69": 2.68621563911438, + "70": 2.6283602714538574, + "71": 2.619724750518799, + "72": 2.624297857284546, + "73": 2.730600118637085, + "74": 2.6886730194091797, + "75": 2.7313616275787354, + "76": 2.595308303833008, + "77": 2.627058744430542, + "78": 2.6370882987976074, + "79": 2.7046689987182617, + "80": 2.594330310821533, + "81": 2.586346387863159, + "82": 2.5819172859191895, + "83": 2.667757511138916, + "84": 2.5907230377197266, + "85": 2.648146629333496, + "86": 2.579831600189209, + "87": 2.5950562953948975, + "88": 2.568941116333008, + "89": 2.617931842803955, + "90": 2.585576295852661, + "91": 2.656446933746338, + "92": 2.5517587661743164, + "93": 2.565455436706543, + "94": 2.5689311027526855, + "95": 2.629897117614746, + "96": 2.5709469318389893, + "97": 2.6209893226623535, + "98": 2.556056261062622, + "99": 2.613426923751831, + "100": 2.5697927474975586, + "101": 2.60725474357605, + "102": 2.5579676628112793, + "103": 2.5875186920166016, + "104": 2.575045108795166, + "105": 2.615037441253662, + "106": 2.555068016052246, + "107": 2.5922436714172363 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 0.0833571, + 0.0361761, + 0.0772461, + 0.004249, + 0.281251, + 0.030409, + 0.114588, + 0.351684, + 0.203601, + 0.377055, + 0.121468, + 0.518273, + 0.481556, + 0.195668, + 0.402322, + 1.18855, + 1.84216, + 1.84832, + 0.851598, + 0.613127, + 0.489527, + 0.701876, + 1.47542, + 1.21706, + 0.629787, + 0.804224, + 1.61438, + 1.37178, + 0.802869, + 0.815509, + 0.871788, + 0.922513, + 1.00654, + 1.07438, + 1.10044, + 0.955645, + 0.714879, + 0.903223, + 1.8171, + 1.60108, + 0.976842, + 0.950795, + 1.07081, + 1.09155, + 1.04256, + 1.04359, + 0.998171, + 1.11115, + 1.15146, + 1.07393, + 0.876707, + 0.900538, + 1.02788, + 0.999292 + ], + "train_epoch_time": 5.0519633293151855, + "train_loss": 2.555980904988271, + "train_score": 0.2495438038404908, + "val_loss": 2.5955280745754834, + "val_score": 0.24849759639335967 + }, + { + "epoch": 2, + "grad_norm": 1.3384978771209717, + "learning_rate": 0.1, + "model_norm": 87.46625518798828, + "step_logs": { + "grad_norm": { + "108": 1.5474275350570679, + "109": 1.5940730571746826, + "110": 1.7816921472549438, + "111": 1.648228645324707, + "112": 1.3583422899246216, + "113": 1.363898515701294, + "114": 1.4790979623794556, + "115": 1.4925323724746704, + "116": 1.4969450235366821, + "117": 1.5330485105514526, + "118": 1.5277963876724243, + "119": 1.5661312341690063, + "120": 1.6411337852478027, + "121": 1.6089191436767578, + "122": 1.3647021055221558, + "123": 1.322136402130127, + "124": 1.4278115034103394, + "125": 1.4608854055404663, + "126": 1.5177714824676514, + "127": 1.5881707668304443, + "128": 1.5485810041427612, + "129": 1.5805672407150269, + "130": 1.6098755598068237, + "131": 1.533129096031189, + "132": 1.513983130455017, + "133": 1.4128550291061401, + "134": 1.3621541261672974, + "135": 1.3466095924377441, + "136": 1.4730033874511719, + "137": 1.386151909828186, + "138": 1.2447234392166138, + "139": 1.310735821723938, + "140": 1.3264137506484985, + "141": 1.370408296585083, + "142": 1.5405223369598389, + "143": 1.518472671508789, + "144": 1.3180512189865112, + "145": 1.2883540391921997, + "146": 1.3945871591567993, + "147": 1.470998764038086, + "148": 1.3696165084838867, + "149": 1.3303577899932861, + "150": 1.3642216920852661, + "151": 1.3836241960525513, + "152": 1.44769287109375, + "153": 1.5620477199554443, + "154": 1.7564959526062012, + "155": 1.6865432262420654, + "156": 1.4339799880981445, + "157": 1.3522312641143799, + "158": 1.279317021369934, + "159": 1.318213939666748, + "160": 1.3564062118530273, + "161": 1.3384978771209717 + }, + "loss": { + "108": 2.5644514560699463, + "109": 2.57401704788208, + "110": 2.565141201019287, + "111": 2.620950698852539, + "112": 2.5296711921691895, + "113": 2.5376832485198975, + "114": 2.560171604156494, + "115": 2.5790293216705322, + "116": 2.5469746589660645, + "117": 2.5625815391540527, + "118": 2.5406293869018555, + "119": 2.569234848022461, + "120": 2.5673959255218506, + "121": 2.5761876106262207, + "122": 2.5377085208892822, + "123": 2.5510940551757812, + "124": 2.55973482131958, + "125": 2.5625646114349365, + "126": 2.5147266387939453, + "127": 2.5600316524505615, + "128": 2.5553152561187744, + "129": 2.5767405033111572, + "130": 2.54809308052063, + "131": 2.5585923194885254, + "132": 2.5383718013763428, + "133": 2.5417728424072266, + "134": 2.5236687660217285, + "135": 2.540006160736084, + "136": 2.521879196166992, + "137": 2.5519471168518066, + "138": 2.5191805362701416, + "139": 2.539604663848877, + "140": 2.506265640258789, + "141": 2.5389111042022705, + "142": 2.5203967094421387, + "143": 2.5624523162841797, + "144": 2.5144500732421875, + "145": 2.5246286392211914, + "146": 2.5040059089660645, + "147": 2.523301839828491, + "148": 2.515552043914795, + "149": 2.525379180908203, + "150": 2.5191526412963867, + "151": 2.538222551345825, + "152": 2.502185583114624, + "153": 2.5259652137756348, + "154": 2.5255136489868164, + "155": 2.5799806118011475, + "156": 2.5219311714172363, + "157": 2.5506556034088135, + "158": 2.503695011138916, + "159": 2.5317506790161133, + "160": 2.5217232704162598, + "161": 2.523049831390381 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 1.07096, + 1.01297, + 0.808064, + 0.96477, + 1.37103, + 1.36419, + 1.17024, + 1.15773, + 1.13661, + 1.09035, + 1.08845, + 1.04748, + 0.953246, + 0.995197, + 1.36259, + 1.4594, + 1.25561, + 1.20072, + 1.09164, + 1.01496, + 1.06556, + 1.03144, + 0.983175, + 1.08854, + 1.10742, + 1.27333, + 1.36013, + 1.40072, + 1.1623, + 1.32816, + 1.62597, + 1.47821, + 1.42452, + 1.35191, + 1.06202, + 1.11133, + 1.44737, + 1.52099, + 1.28749, + 1.16612, + 1.34102, + 1.42689, + 1.35358, + 1.32585, + 1.1939, + 1.03523, + 0.818569, + 0.90703, + 1.22644, + 1.39492, + 1.52977, + 1.45697, + 1.37062, + 1.40828 + ], + "train_epoch_time": 5.0541157722473145, + "train_loss": 2.5136015885871656, + "train_score": 0.2566680864333422, + "val_loss": 2.5533794010000306, + "val_score": 0.25084313989780527 + }, + { + "epoch": 3, + "grad_norm": 1.318820595741272, + "learning_rate": 0.1, + "model_norm": 87.47798919677734, + "step_logs": { + "grad_norm": { + "162": 1.4114265441894531, + "163": 1.431295394897461, + "164": 1.4903470277786255, + "165": 1.4201840162277222, + "166": 1.419381022453308, + "167": 1.39986252784729, + "168": 1.2418795824050903, + "169": 1.1801707744598389, + "170": 1.2034727334976196, + "171": 1.2583473920822144, + "172": 1.3439244031906128, + "173": 1.396019697189331, + "174": 1.415901780128479, + "175": 1.3842941522598267, + "176": 1.349034070968628, + "177": 1.3424577713012695, + "178": 1.1667481660842896, + "179": 1.13761305809021, + "180": 1.2138445377349854, + "181": 1.2609201669692993, + "182": 1.2920323610305786, + "183": 1.2840451002120972, + "184": 1.3944884538650513, + "185": 1.389801263809204, + "186": 1.3639730215072632, + "187": 1.3643674850463867, + "188": 1.5542376041412354, + "189": 1.5981364250183105, + "190": 1.5491358041763306, + "191": 1.4451212882995605, + "192": 1.3491425514221191, + "193": 1.3006073236465454, + "194": 1.2778058052062988, + "195": 1.2574074268341064, + "196": 1.152113914489746, + "197": 1.2072381973266602, + "198": 1.4360460042953491, + "199": 1.5577796697616577, + "200": 1.5604770183563232, + "201": 1.447811245918274, + "202": 1.309539794921875, + "203": 1.2651509046554565, + "204": 1.1666126251220703, + "205": 1.3112585544586182, + "206": 1.5309338569641113, + "207": 1.518886923789978, + "208": 1.3946483135223389, + "209": 1.3189250230789185, + "210": 1.3046302795410156, + "211": 1.2808794975280762, + "212": 1.4370882511138916, + "213": 1.337239384651184, + "214": 1.225730538368225, + "215": 1.318820595741272 + }, + "loss": { + "162": 2.5070159435272217, + "163": 2.5475645065307617, + "164": 2.5105767250061035, + "165": 2.527508020401001, + "166": 2.5137524604797363, + "167": 2.559257984161377, + "168": 2.4882638454437256, + "169": 2.4969942569732666, + "170": 2.4993886947631836, + "171": 2.506685495376587, + "172": 2.500074863433838, + "173": 2.499678134918213, + "174": 2.511322021484375, + "175": 2.5015060901641846, + "176": 2.4881248474121094, + "177": 2.5125815868377686, + "178": 2.5039944648742676, + "179": 2.4961695671081543, + "180": 2.4842424392700195, + "181": 2.5197134017944336, + "182": 2.491075038909912, + "183": 2.5007705688476562, + "184": 2.501986503601074, + "185": 2.518996000289917, + "186": 2.5041511058807373, + "187": 2.5261926651000977, + "188": 2.5192923545837402, + "189": 2.541409492492676, + "190": 2.513819456100464, + "191": 2.5296738147735596, + "192": 2.4996180534362793, + "193": 2.5108487606048584, + "194": 2.475506067276001, + "195": 2.5060291290283203, + "196": 2.459573745727539, + "197": 2.503289222717285, + "198": 2.4924795627593994, + "199": 2.5321359634399414, + "200": 2.499049663543701, + "201": 2.5179004669189453, + "202": 2.4973392486572266, + "203": 2.5076847076416016, + "204": 2.4686331748962402, + "205": 2.4993233680725098, + "206": 2.507267475128174, + "207": 2.5081958770751953, + "208": 2.492157459259033, + "209": 2.5198802947998047, + "210": 2.4981138706207275, + "211": 2.4846696853637695, + "212": 2.4921669960021973, + "213": 2.51159930229187, + "214": 2.4895572662353516, + "215": 2.504476547241211 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 1.25846, + 1.24356, + 1.13031, + 1.25315, + 1.24774, + 1.306, + 1.61338, + 1.79278, + 1.72568, + 1.58307, + 1.38421, + 1.28263, + 1.25267, + 1.3054, + 1.36718, + 1.39418, + 1.83941, + 1.92879, + 1.68604, + 1.58481, + 1.49224, + 1.51675, + 1.28663, + 1.30413, + 1.34601, + 1.35707, + 1.0429, + 0.995055, + 1.0475, + 1.21131, + 1.37328, + 1.48432, + 1.51612, + 1.58502, + 1.85297, + 1.71761, + 1.20863, + 1.04346, + 1.02627, + 1.2012, + 1.45626, + 1.56671, + 1.81386, + 1.4536, + 1.06976, + 1.0872, + 1.28129, + 1.44857, + 1.4677, + 1.51444, + 1.20673, + 1.40453, + 1.65704, + 1.43994 + ], + "train_epoch_time": 5.051695108413696, + "train_loss": 2.4899268717471634, + "train_score": 0.27132801283551766, + "val_loss": 2.526738029670496, + "val_score": 0.2624049220580595 + }, + { + "epoch": 4, + "grad_norm": 1.3548485040664673, + "learning_rate": 0.1, + "model_norm": 87.48912048339844, + "step_logs": { + "grad_norm": { + "216": 1.348342776298523, + "217": 1.3550546169281006, + "218": 1.3296412229537964, + "219": 1.3579684495925903, + "220": 1.3660697937011719, + "221": 1.3053972721099854, + "222": 1.208400845527649, + "223": 1.1640582084655762, + "224": 1.1284453868865967, + "225": 1.1163370609283447, + "226": 1.2030351161956787, + "227": 1.3001850843429565, + "228": 1.265457034111023, + "229": 1.3263202905654907, + "230": 1.6208581924438477, + "231": 1.526879072189331, + "232": 1.2974154949188232, + "233": 1.3816595077514648, + "234": 1.4321775436401367, + "235": 1.4069297313690186, + "236": 1.4668422937393188, + "237": 1.3747762441635132, + "238": 1.439998984336853, + "239": 1.4145033359527588, + "240": 1.3636399507522583, + "241": 1.2812285423278809, + "242": 1.1984456777572632, + "243": 1.2026156187057495, + "244": 1.1779249906539917, + "245": 1.1976126432418823, + "246": 1.271930456161499, + "247": 1.385982871055603, + "248": 1.379332423210144, + "249": 1.3337682485580444, + "250": 1.4559179544448853, + "251": 1.5996768474578857, + "252": 1.3911381959915161, + "253": 1.236518383026123, + "254": 1.2514238357543945, + "255": 1.2472258806228638, + "256": 1.1549932956695557, + "257": 1.204041838645935, + "258": 1.369576096534729, + "259": 1.5055357217788696, + "260": 1.5250625610351562, + "261": 1.3742485046386719, + "262": 1.2223076820373535, + "263": 1.14608633518219, + "264": 1.0943989753723145, + "265": 1.1048413515090942, + "266": 1.308950662612915, + "267": 1.4192947149276733, + "268": 1.4301488399505615, + "269": 1.3548485040664673 + }, + "loss": { + "216": 2.4909420013427734, + "217": 2.4906444549560547, + "218": 2.466233253479004, + "219": 2.5231754779815674, + "220": 2.504706382751465, + "221": 2.4931962490081787, + "222": 2.495638370513916, + "223": 2.4765968322753906, + "224": 2.489243984222412, + "225": 2.465637683868408, + "226": 2.4790308475494385, + "227": 2.4879531860351562, + "228": 2.487992525100708, + "229": 2.4812231063842773, + "230": 2.496302604675293, + "231": 2.5248308181762695, + "232": 2.4637451171875, + "233": 2.4943437576293945, + "234": 2.497357130050659, + "235": 2.4934983253479004, + "236": 2.506168842315674, + "237": 2.507537603378296, + "238": 2.4810452461242676, + "239": 2.504035472869873, + "240": 2.488614082336426, + "241": 2.5019989013671875, + "242": 2.461118698120117, + "243": 2.4807281494140625, + "244": 2.4645473957061768, + "245": 2.4701766967773438, + "246": 2.4564895629882812, + "247": 2.483874559402466, + "248": 2.5018720626831055, + "249": 2.481210470199585, + "250": 2.483489513397217, + "251": 2.5111019611358643, + "252": 2.515650987625122, + "253": 2.4815587997436523, + "254": 2.4624993801116943, + "255": 2.4724130630493164, + "256": 2.4616270065307617, + "257": 2.4670495986938477, + "258": 2.475165605545044, + "259": 2.4850661754608154, + "260": 2.49428391456604, + "261": 2.486996650695801, + "262": 2.4579572677612305, + "263": 2.4820985794067383, + "264": 2.478109359741211, + "265": 2.466869592666626, + "266": 2.4649291038513184, + "267": 2.488013744354248, + "268": 2.4809932708740234, + "269": 2.484416961669922 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 1.37013, + 1.35643, + 1.39497, + 1.36826, + 1.34218, + 1.46309, + 1.70907, + 1.8277, + 1.95482, + 1.97851, + 1.71287, + 1.47174, + 1.55365, + 1.41049, + 0.950183, + 1.08299, + 1.46365, + 1.30663, + 1.21755, + 1.25969, + 1.16478, + 1.32673, + 1.19649, + 1.2515, + 1.33831, + 1.52417, + 1.71355, + 1.71524, + 1.77624, + 1.72225, + 1.51841, + 1.29305, + 1.315, + 1.39477, + 1.17162, + 0.981296, + 1.2999, + 1.62302, + 1.57242, + 1.58939, + 1.84528, + 1.70175, + 1.31957, + 1.09637, + 1.07243, + 1.31688, + 1.64518, + 1.88966, + 2.06904, + 2.02091, + 1.43866, + 1.23512, + 1.21301, + 1.35345 + ], + "train_epoch_time": 5.052002429962158, + "train_loss": 2.471717495555686, + "train_score": 0.2667839848071636, + "val_loss": 2.5183303096127427, + "val_score": 0.26122990770673915 + }, + { + "epoch": 5, + "grad_norm": 1.5109161138534546, + "learning_rate": 0.1, + "model_norm": 87.50033569335938, + "step_logs": { + "grad_norm": { + "270": 1.4047431945800781, + "271": 1.421698808670044, + "272": 1.3978400230407715, + "273": 1.3299293518066406, + "274": 1.2401758432388306, + "275": 1.2176589965820312, + "276": 1.1978715658187866, + "277": 1.1949207782745361, + "278": 1.2177835702896118, + "279": 1.224126935005188, + "280": 1.2096213102340698, + "281": 1.2558765411376953, + "282": 1.443474531173706, + "283": 1.596191167831421, + "284": 1.6108492612838745, + "285": 1.6240370273590088, + "286": 1.6148786544799805, + "287": 1.5551954507827759, + "288": 1.4496687650680542, + "289": 1.3272839784622192, + "290": 1.3802237510681152, + "291": 1.3543483018875122, + "292": 1.245140790939331, + "293": 1.219671607017517, + "294": 1.2365336418151855, + "295": 1.2203431129455566, + "296": 1.2545716762542725, + "297": 1.3500263690948486, + "298": 1.4065240621566772, + "299": 1.3695147037506104, + "300": 1.2958126068115234, + "301": 1.3573617935180664, + "302": 1.5311113595962524, + "303": 1.4070990085601807, + "304": 1.1375900506973267, + "305": 1.159070372581482, + "306": 1.2499953508377075, + "307": 1.332653522491455, + "308": 1.3756132125854492, + "309": 1.2655593156814575, + "310": 1.1920593976974487, + "311": 1.2005290985107422, + "312": 1.196609616279602, + "313": 1.2147624492645264, + "314": 1.1494425535202026, + "315": 1.172121524810791, + "316": 1.1625635623931885, + "317": 1.2386637926101685, + "318": 1.3741620779037476, + "319": 1.376841425895691, + "320": 1.3388792276382446, + "321": 1.3464971780776978, + "322": 1.405653715133667, + "323": 1.5109161138534546 + }, + "loss": { + "270": 2.478456974029541, + "271": 2.4920506477355957, + "272": 2.472493886947632, + "273": 2.4852397441864014, + "274": 2.4534354209899902, + "275": 2.4587340354919434, + "276": 2.4528369903564453, + "277": 2.4688453674316406, + "278": 2.445171356201172, + "279": 2.4824700355529785, + "280": 2.448643684387207, + "281": 2.4714701175689697, + "282": 2.4722397327423096, + "283": 2.516361713409424, + "284": 2.478762149810791, + "285": 2.497800827026367, + "286": 2.5022873878479004, + "287": 2.509681463241577, + "288": 2.484309673309326, + "289": 2.4764108657836914, + "290": 2.4673147201538086, + "291": 2.472403049468994, + "292": 2.444923162460327, + "293": 2.458810329437256, + "294": 2.4640769958496094, + "295": 2.4554147720336914, + "296": 2.461606740951538, + "297": 2.4729299545288086, + "298": 2.4781930446624756, + "299": 2.473634719848633, + "300": 2.457024335861206, + "301": 2.4785900115966797, + "302": 2.4860496520996094, + "303": 2.472198724746704, + "304": 2.4644036293029785, + "305": 2.466243267059326, + "306": 2.452704429626465, + "307": 2.4463019371032715, + "308": 2.481018543243408, + "309": 2.4876208305358887, + "310": 2.466614007949829, + "311": 2.4680709838867188, + "312": 2.450718641281128, + "313": 2.460909843444824, + "314": 2.448789596557617, + "315": 2.443652629852295, + "316": 2.418773651123047, + "317": 2.4618258476257324, + "318": 2.452504873275757, + "319": 2.447181224822998, + "320": 2.476144313812256, + "321": 2.4466404914855957, + "322": 2.4622063636779785, + "323": 2.4617767333984375 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 1.25599, + 1.23294, + 1.26538, + 1.40511, + 1.59517, + 1.65829, + 1.70942, + 1.72908, + 1.6488, + 1.65665, + 1.6735, + 1.56697, + 1.18651, + 0.98765, + 0.955268, + 0.947035, + 0.959527, + 1.03764, + 1.18214, + 1.40571, + 1.29517, + 1.3479, + 1.57699, + 1.65287, + 1.61154, + 1.64877, + 1.56397, + 1.35684, + 1.25268, + 1.31887, + 1.46327, + 1.34528, + 1.06046, + 1.24863, + 1.90432, + 1.83576, + 1.56974, + 1.37745, + 1.3111, + 1.55317, + 1.73582, + 1.71243, + 1.71155, + 1.66768, + 1.85343, + 1.77867, + 1.78962, + 1.60454, + 1.29878, + 1.29092, + 1.38132, + 1.34946, + 1.24614, + 1.07837 + ], + "train_epoch_time": 5.051944971084595, + "train_loss": 2.4745563034347686, + "train_score": 0.26685347926496944, + "val_loss": 2.5326947599273053, + "val_score": 0.25766001770061137 + }, + { + "epoch": 6, + "grad_norm": 1.759065866470337, + "learning_rate": 0.1, + "model_norm": 87.5138931274414, + "step_logs": { + "grad_norm": { + "324": 1.623439908027649, + "325": 1.6806143522262573, + "326": 1.5055813789367676, + "327": 1.2876757383346558, + "328": 1.254889726638794, + "329": 1.409358024597168, + "330": 1.5430922508239746, + "331": 1.5045151710510254, + "332": 1.4431153535842896, + "333": 1.3577274084091187, + "334": 1.2942644357681274, + "335": 1.1906518936157227, + "336": 1.1726138591766357, + "337": 1.2196074724197388, + "338": 1.188008427619934, + "339": 1.0564650297164917, + "340": 1.066809058189392, + "341": 1.1904737949371338, + "342": 1.3305294513702393, + "343": 1.4903757572174072, + "344": 1.6333520412445068, + "345": 1.7529473304748535, + "346": 1.5258636474609375, + "347": 1.3201848268508911, + "348": 1.2253371477127075, + "349": 1.1936882734298706, + "350": 1.2745351791381836, + "351": 1.310713291168213, + "352": 1.3653395175933838, + "353": 1.4823520183563232, + "354": 1.567481279373169, + "355": 1.689394474029541, + "356": 1.49756920337677, + "357": 1.1672707796096802, + "358": 1.0936285257339478, + "359": 1.1354784965515137, + "360": 1.26737380027771, + "361": 1.3135985136032104, + "362": 1.3856745958328247, + "363": 1.3981680870056152, + "364": 1.369124174118042, + "365": 1.3923665285110474, + "366": 1.3907634019851685, + "367": 1.3212506771087646, + "368": 1.3154053688049316, + "369": 1.2992113828659058, + "370": 1.2521928548812866, + "371": 1.3133469820022583, + "372": 1.432778239250183, + "373": 1.5911734104156494, + "374": 1.5572890043258667, + "375": 1.640194296836853, + "376": 1.7942968606948853, + "377": 1.759065866470337 + }, + "loss": { + "324": 2.4846949577331543, + "325": 2.508121967315674, + "326": 2.4766106605529785, + "327": 2.4612317085266113, + "328": 2.45953369140625, + "329": 2.449069023132324, + "330": 2.4828591346740723, + "331": 2.4601805210113525, + "332": 2.455899238586426, + "333": 2.4449198246002197, + "334": 2.4369330406188965, + "335": 2.459473133087158, + "336": 2.4395594596862793, + "337": 2.4353997707366943, + "338": 2.478641986846924, + "339": 2.430203914642334, + "340": 2.434865951538086, + "341": 2.433072566986084, + "342": 2.4332549571990967, + "343": 2.4297852516174316, + "344": 2.465857982635498, + "345": 2.45650577545166, + "346": 2.4539458751678467, + "347": 2.442265033721924, + "348": 2.4369921684265137, + "349": 2.4326696395874023, + "350": 2.422942638397217, + "351": 2.448277235031128, + "352": 2.436768054962158, + "353": 2.440791606903076, + "354": 2.457829236984253, + "355": 2.453620433807373, + "356": 2.4883389472961426, + "357": 2.4340109825134277, + "358": 2.428116798400879, + "359": 2.4015393257141113, + "360": 2.41034197807312, + "361": 2.417464256286621, + "362": 2.421893358230591, + "363": 2.431382656097412, + "364": 2.4245219230651855, + "365": 2.4390602111816406, + "366": 2.4091711044311523, + "367": 2.437689781188965, + "368": 2.424776315689087, + "369": 2.430070638656616, + "370": 2.4288299083709717, + "371": 2.420199155807495, + "372": 2.4276294708251953, + "373": 2.4442105293273926, + "374": 2.4462759494781494, + "375": 2.421290636062622, + "376": 2.4667458534240723, + "377": 2.4498980045318604 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 0.942759, + 0.887999, + 1.09257, + 1.48436, + 1.56186, + 1.23299, + 1.04272, + 1.08686, + 1.17926, + 1.32629, + 1.45478, + 1.73489, + 1.7742, + 1.63731, + 1.7562, + 2.17737, + 2.13945, + 1.71678, + 1.37448, + 1.0939, + 0.92429, + 0.799429, + 1.05398, + 1.40127, + 1.62309, + 1.70727, + 1.49156, + 1.4251, + 1.30717, + 1.11078, + 1.00034, + 0.859697, + 1.10952, + 1.7864, + 2.03016, + 1.86265, + 1.50061, + 1.40099, + 1.26134, + 1.24375, + 1.29342, + 1.2581, + 1.24555, + 1.39639, + 1.40137, + 1.43966, + 1.54901, + 1.40311, + 1.18256, + 0.965392, + 1.00871, + 0.900029, + 0.766189, + 0.791742 + ], + "train_epoch_time": 5.054595947265625, + "train_loss": 2.4428781078398827, + "train_score": 0.2725889974293784, + "val_loss": 2.4942171108298408, + "val_score": 0.2624138918385577 + }, + { + "epoch": 7, + "grad_norm": 1.4262630939483643, + "learning_rate": 0.1, + "model_norm": 87.52951049804688, + "step_logs": { + "grad_norm": { + "378": 1.617846965789795, + "379": 1.411563754081726, + "380": 1.2483372688293457, + "381": 1.1806268692016602, + "382": 1.2389273643493652, + "383": 1.4030169248580933, + "384": 1.276199221611023, + "385": 1.108875036239624, + "386": 1.008466124534607, + "387": 0.9886185526847839, + "388": 1.0921992063522339, + "389": 1.3389133214950562, + "390": 1.4035688638687134, + "391": 1.41665518283844, + "392": 1.41960871219635, + "393": 1.592949628829956, + "394": 1.620247721672058, + "395": 1.6470298767089844, + "396": 1.545442819595337, + "397": 1.4301609992980957, + "398": 1.3720855712890625, + "399": 1.3858622312545776, + "400": 1.49479341506958, + "401": 1.562925100326538, + "402": 1.5144386291503906, + "403": 1.4692139625549316, + "404": 1.5100115537643433, + "405": 1.483263373374939, + "406": 1.3693817853927612, + "407": 1.4439547061920166, + "408": 1.4968265295028687, + "409": 1.340765357017517, + "410": 1.3486063480377197, + "411": 1.4067001342773438, + "412": 1.4785293340682983, + "413": 1.5283474922180176, + "414": 1.4990612268447876, + "415": 1.5192127227783203, + "416": 1.5972009897232056, + "417": 1.4781601428985596, + "418": 1.340295433998108, + "419": 1.2414122819900513, + "420": 1.1782641410827637, + "421": 1.3022778034210205, + "422": 1.4148863554000854, + "423": 1.7196805477142334, + "424": 1.660214900970459, + "425": 1.3662163019180298, + "426": 1.265149474143982, + "427": 1.2317677736282349, + "428": 1.3024029731750488, + "429": 1.4698814153671265, + "430": 1.4755594730377197, + "431": 1.4262630939483643 + }, + "loss": { + "378": 2.464127540588379, + "379": 2.4174060821533203, + "380": 2.411942481994629, + "381": 2.3847360610961914, + "382": 2.396725654602051, + "383": 2.4089713096618652, + "384": 2.408296585083008, + "385": 2.385756015777588, + "386": 2.380687713623047, + "387": 2.3640313148498535, + "388": 2.349022388458252, + "389": 2.386237621307373, + "390": 2.4364547729492188, + "391": 2.394516944885254, + "392": 2.3872575759887695, + "393": 2.3922128677368164, + "394": 2.432094097137451, + "395": 2.408723831176758, + "396": 2.4120514392852783, + "397": 2.4008212089538574, + "398": 2.387998104095459, + "399": 2.394864082336426, + "400": 2.39601731300354, + "401": 2.406705379486084, + "402": 2.424264907836914, + "403": 2.4021058082580566, + "404": 2.4218873977661133, + "405": 2.378183364868164, + "406": 2.3882908821105957, + "407": 2.386151075363159, + "408": 2.4186501502990723, + "409": 2.37593936920166, + "410": 2.3756043910980225, + "411": 2.3814640045166016, + "412": 2.3972043991088867, + "413": 2.3847219944000244, + "414": 2.3922367095947266, + "415": 2.3874189853668213, + "416": 2.4014251232147217, + "417": 2.4035773277282715, + "418": 2.3629751205444336, + "419": 2.3751280307769775, + "420": 2.365384101867676, + "421": 2.3484015464782715, + "422": 2.389936923980713, + "423": 2.3830745220184326, + "424": 2.41225266456604, + "425": 2.3652896881103516, + "426": 2.353670597076416, + "427": 2.3570289611816406, + "428": 2.361449718475342, + "429": 2.3687450885772705, + "430": 2.3891549110412598, + "431": 2.366312026977539 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 0.941431, + 1.21325, + 1.54776, + 1.71086, + 1.56144, + 1.22379, + 1.47868, + 1.94026, + 2.34088, + 2.41878, + 1.96917, + 1.33109, + 1.23678, + 1.19314, + 1.18457, + 0.942748, + 0.926441, + 0.887941, + 1.00991, + 1.17379, + 1.26844, + 1.24693, + 1.07233, + 0.98525, + 1.057, + 1.11281, + 1.06217, + 1.08096, + 1.27361, + 1.14443, + 1.07952, + 1.32169, + 1.30618, + 1.20349, + 1.09659, + 1.02092, + 1.06455, + 1.03441, + 0.941347, + 1.10006, + 1.3154, + 1.54119, + 1.70379, + 1.38473, + 1.19383, + 0.805827, + 0.875174, + 1.2672, + 1.47049, + 1.55349, + 1.39216, + 1.09636, + 1.09731, + 1.16325 + ], + "train_epoch_time": 5.0514843463897705, + "train_loss": 2.3739958438845927, + "train_score": 0.295296807815902, + "val_loss": 2.4253303528379493, + "val_score": 0.283129126415187 + }, + { + "epoch": 8, + "grad_norm": 1.444602131843567, + "learning_rate": 0.1, + "model_norm": 87.54541778564453, + "step_logs": { + "grad_norm": { + "432": 1.3425726890563965, + "433": 1.2621179819107056, + "434": 1.2779802083969116, + "435": 1.4188487529754639, + "436": 1.4042917490005493, + "437": 1.2528634071350098, + "438": 1.282992959022522, + "439": 1.380332112312317, + "440": 1.3788363933563232, + "441": 1.6068919897079468, + "442": 1.6531224250793457, + "443": 1.3426464796066284, + "444": 1.1646243333816528, + "445": 1.2227520942687988, + "446": 1.4188110828399658, + "447": 1.4509479999542236, + "448": 1.4236797094345093, + "449": 1.4222859144210815, + "450": 1.3198795318603516, + "451": 1.278914213180542, + "452": 1.3036612272262573, + "453": 1.3619812726974487, + "454": 1.386054515838623, + "455": 1.497849464416504, + "456": 1.5307101011276245, + "457": 1.4602429866790771, + "458": 1.4081178903579712, + "459": 1.3239916563034058, + "460": 1.3485093116760254, + "461": 1.3465546369552612, + "462": 1.2619314193725586, + "463": 1.2228964567184448, + "464": 1.3545159101486206, + "465": 1.57125985622406, + "466": 1.5788429975509644, + "467": 1.5804986953735352, + "468": 1.5144660472869873, + "469": 1.399890661239624, + "470": 1.3252006769180298, + "471": 1.2314919233322144, + "472": 1.1383312940597534, + "473": 1.0897200107574463, + "474": 1.1071062088012695, + "475": 1.2052793502807617, + "476": 1.3018338680267334, + "477": 1.5188758373260498, + "478": 1.4975824356079102, + "479": 1.4089040756225586, + "480": 1.4930580854415894, + "481": 1.7426979541778564, + "482": 1.6179229021072388, + "483": 1.3499640226364136, + "484": 1.4126147031784058, + "485": 1.444602131843567 + }, + "loss": { + "432": 2.396427631378174, + "433": 2.344536304473877, + "434": 2.370551109313965, + "435": 2.37554931640625, + "436": 2.382157325744629, + "437": 2.3646914958953857, + "438": 2.3248276710510254, + "439": 2.357404947280884, + "440": 2.358269691467285, + "441": 2.356196403503418, + "442": 2.4061853885650635, + "443": 2.3650639057159424, + "444": 2.370314598083496, + "445": 2.356516122817993, + "446": 2.3476247787475586, + "447": 2.379183769226074, + "448": 2.3693859577178955, + "449": 2.377377986907959, + "450": 2.3442137241363525, + "451": 2.3414998054504395, + "452": 2.34651517868042, + "453": 2.3363609313964844, + "454": 2.358280658721924, + "455": 2.3616933822631836, + "456": 2.363029956817627, + "457": 2.364698886871338, + "458": 2.3509631156921387, + "459": 2.349639415740967, + "460": 2.338320732116699, + "461": 2.3632969856262207, + "462": 2.3475143909454346, + "463": 2.334425926208496, + "464": 2.3368706703186035, + "465": 2.352728843688965, + "466": 2.3565022945404053, + "467": 2.3745017051696777, + "468": 2.3789525032043457, + "469": 2.3568365573883057, + "470": 2.3726563453674316, + "471": 2.3423638343811035, + "472": 2.332307815551758, + "473": 2.3284988403320312, + "474": 2.323655128479004, + "475": 2.3422038555145264, + "476": 2.3303956985473633, + "477": 2.3193583488464355, + "478": 2.379096269607544, + "479": 2.3467745780944824, + "480": 2.3588194847106934, + "481": 2.3436622619628906, + "482": 2.3912572860717773, + "483": 2.3228673934936523, + "484": 2.329345703125, + "485": 2.345519542694092 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 1.3295, + 1.47183, + 1.45145, + 1.18003, + 1.20797, + 1.50649, + 1.41235, + 1.23728, + 1.24042, + 0.912511, + 0.880479, + 1.31196, + 1.74757, + 1.57614, + 1.16622, + 1.13012, + 1.16899, + 1.17523, + 1.34564, + 1.43157, + 1.38068, + 1.2595, + 1.22754, + 1.05266, + 1.00852, + 1.10898, + 1.18568, + 1.34039, + 1.28587, + 1.30338, + 1.47413, + 1.56099, + 1.2737, + 0.952963, + 0.945344, + 0.95057, + 1.03721, + 1.20266, + 1.35105, + 1.54451, + 1.7999, + 1.96086, + 1.8958, + 1.61231, + 1.37505, + 1.00536, + 1.06079, + 1.18225, + 1.05814, + 0.771704, + 0.913504, + 1.27462, + 1.16731, + 1.12394 + ], + "train_epoch_time": 5.051590204238892, + "train_loss": 2.346574435582975, + "train_score": 0.302081465216586, + "val_loss": 2.3984540513407624, + "val_score": 0.2889907437405548 + }, + { + "epoch": 9, + "grad_norm": 1.709283709526062, + "learning_rate": 0.1, + "model_norm": 87.56116485595703, + "step_logs": { + "grad_norm": { + "486": 1.426444411277771, + "487": 1.2690880298614502, + "488": 1.1729875802993774, + "489": 1.2084312438964844, + "490": 1.231128215789795, + "491": 1.265526533126831, + "492": 1.3779784440994263, + "493": 1.3759522438049316, + "494": 1.260408878326416, + "495": 1.2346863746643066, + "496": 1.2666555643081665, + "497": 1.2679500579833984, + "498": 1.2630565166473389, + "499": 1.2052359580993652, + "500": 1.1893833875656128, + "501": 1.1256600618362427, + "502": 1.1359037160873413, + "503": 1.1839313507080078, + "504": 1.2609279155731201, + "505": 1.2821588516235352, + "506": 1.2694697380065918, + "507": 1.177640438079834, + "508": 1.1328892707824707, + "509": 1.3132929801940918, + "510": 1.3829059600830078, + "511": 1.4194427728652954, + "512": 1.440608263015747, + "513": 1.5464714765548706, + "514": 1.6855614185333252, + "515": 1.8420305252075195, + "516": 1.544111967086792, + "517": 1.2531142234802246, + "518": 1.4684197902679443, + "519": 1.57754647731781, + "520": 1.4499608278274536, + "521": 1.3234126567840576, + "522": 1.2215524911880493, + "523": 1.2147200107574463, + "524": 1.2052631378173828, + "525": 1.2438725233078003, + "526": 1.429188847541809, + "527": 1.4128731489181519, + "528": 1.2569087743759155, + "529": 1.1902834177017212, + "530": 1.2827911376953125, + "531": 1.4841653108596802, + "532": 1.6421515941619873, + "533": 1.5563585758209229, + "534": 1.5640913248062134, + "535": 1.899158239364624, + "536": 1.9190765619277954, + "537": 1.7355573177337646, + "538": 1.6624763011932373, + "539": 1.709283709526062 + }, + "loss": { + "486": 2.3604815006256104, + "487": 2.344442844390869, + "488": 2.3220529556274414, + "489": 2.3139679431915283, + "490": 2.3063836097717285, + "491": 2.3233680725097656, + "492": 2.3442485332489014, + "493": 2.352097272872925, + "494": 2.30673885345459, + "495": 2.3149304389953613, + "496": 2.334404230117798, + "497": 2.3126087188720703, + "498": 2.3284311294555664, + "499": 2.310431957244873, + "500": 2.315993309020996, + "501": 2.3191850185394287, + "502": 2.3066604137420654, + "503": 2.294793128967285, + "504": 2.2924513816833496, + "505": 2.3190152645111084, + "506": 2.326279878616333, + "507": 2.2896616458892822, + "508": 2.322035551071167, + "509": 2.326859951019287, + "510": 2.309098958969116, + "511": 2.3234498500823975, + "512": 2.341808319091797, + "513": 2.332016944885254, + "514": 2.341855525970459, + "515": 2.336174964904785, + "516": 2.363861560821533, + "517": 2.3061585426330566, + "518": 2.3379969596862793, + "519": 2.3325400352478027, + "520": 2.3183236122131348, + "521": 2.3377583026885986, + "522": 2.3155860900878906, + "523": 2.3229575157165527, + "524": 2.3258862495422363, + "525": 2.301849365234375, + "526": 2.313551425933838, + "527": 2.3046035766601562, + "528": 2.290283203125, + "529": 2.287106990814209, + "530": 2.295423984527588, + "531": 2.2950873374938965, + "532": 2.3275697231292725, + "533": 2.335520029067993, + "534": 2.331029176712036, + "535": 2.3500795364379883, + "536": 2.379878282546997, + "537": 2.3246142864227295, + "538": 2.3307132720947266, + "539": 2.3478667736053467 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 1.16009, + 1.45565, + 1.68766, + 1.58458, + 1.52169, + 1.45069, + 1.23458, + 1.24236, + 1.45203, + 1.51853, + 1.45499, + 1.43846, + 1.45955, + 1.59056, + 1.63717, + 1.83029, + 1.78772, + 1.63716, + 1.44185, + 1.41065, + 1.4435, + 1.651, + 1.80923, + 1.34911, + 1.20742, + 1.15318, + 1.12839, + 0.975097, + 0.824273, + 0.688512, + 0.991435, + 1.46861, + 1.08429, + 0.93727, + 1.10271, + 1.33478, + 1.5518, + 1.57431, + 1.60112, + 1.48773, + 1.13266, + 1.15449, + 1.44971, + 1.61431, + 1.39493, + 1.04192, + 0.86313, + 0.964194, + 0.952848, + 0.651569, + 0.646205, + 0.771744, + 0.843292, + 0.80361 + ], + "train_epoch_time": 5.052663326263428, + "train_loss": 2.3349281706461094, + "train_score": 0.3076578190969771, + "val_loss": 2.3837451266913954, + "val_score": 0.2937984352612742 + }, + { + "epoch": 10, + "grad_norm": 1.2902566194534302, + "learning_rate": 0.1, + "model_norm": 87.578369140625, + "step_logs": { + "grad_norm": { + "540": 1.5759263038635254, + "541": 1.3194650411605835, + "542": 1.3201873302459717, + "543": 1.3581231832504272, + "544": 1.4158029556274414, + "545": 1.5097893476486206, + "546": 1.4278348684310913, + "547": 1.3444151878356934, + "548": 1.2905628681182861, + "549": 1.1409507989883423, + "550": 0.976250171661377, + "551": 0.8401185274124146, + "552": 0.8717550039291382, + "553": 0.9516346454620361, + "554": 1.0399898290634155, + "555": 1.0360325574874878, + "556": 1.1502712965011597, + "557": 1.2456767559051514, + "558": 1.3800832033157349, + "559": 1.5062284469604492, + "560": 1.6548281908035278, + "561": 1.8384106159210205, + "562": 1.6748757362365723, + "563": 1.4818562269210815, + "564": 1.5310593843460083, + "565": 1.5373516082763672, + "566": 1.4384281635284424, + "567": 1.3966439962387085, + "568": 1.2711124420166016, + "569": 1.3061379194259644, + "570": 1.3145487308502197, + "571": 1.4513746500015259, + "572": 1.6005983352661133, + "573": 1.7970792055130005, + "574": 1.8261210918426514, + "575": 1.5491801500320435, + "576": 1.4420125484466553, + "577": 1.3286991119384766, + "578": 1.2842791080474854, + "579": 1.2154967784881592, + "580": 1.1259206533432007, + "581": 1.1142082214355469, + "582": 1.096204161643982, + "583": 1.1447943449020386, + "584": 1.3225001096725464, + "585": 1.3663084506988525, + "586": 1.3333994150161743, + "587": 1.2913836240768433, + "588": 1.3958765268325806, + "589": 1.5368345975875854, + "590": 1.6467427015304565, + "591": 1.6730985641479492, + "592": 1.4967221021652222, + "593": 1.2902566194534302 + }, + "loss": { + "540": 2.341536521911621, + "541": 2.2961456775665283, + "542": 2.2922070026397705, + "543": 2.2935428619384766, + "544": 2.302182197570801, + "545": 2.318861722946167, + "546": 2.330303907394409, + "547": 2.285210609436035, + "548": 2.2862589359283447, + "549": 2.274963855743408, + "550": 2.2744057178497314, + "551": 2.2525784969329834, + "552": 2.2742624282836914, + "553": 2.2582478523254395, + "554": 2.26719069480896, + "555": 2.2740767002105713, + "556": 2.2779016494750977, + "557": 2.2638401985168457, + "558": 2.2591631412506104, + "559": 2.285280704498291, + "560": 2.302722692489624, + "561": 2.30159330368042, + "562": 2.3355891704559326, + "563": 2.320845603942871, + "564": 2.2858409881591797, + "565": 2.2855262756347656, + "566": 2.3017635345458984, + "567": 2.2737414836883545, + "568": 2.267089366912842, + "569": 2.2763099670410156, + "570": 2.3022842407226562, + "571": 2.274648666381836, + "572": 2.296595811843872, + "573": 2.279085874557495, + "574": 2.305741310119629, + "575": 2.307661533355713, + "576": 2.26253342628479, + "577": 2.267441987991333, + "578": 2.2679781913757324, + "579": 2.26358699798584, + "580": 2.25803804397583, + "581": 2.251507520675659, + "582": 2.234433889389038, + "583": 2.239211320877075, + "584": 2.2679219245910645, + "585": 2.2594470977783203, + "586": 2.2744503021240234, + "587": 2.2897608280181885, + "588": 2.266309976577759, + "589": 2.2815120220184326, + "590": 2.282259941101074, + "591": 2.298517942428589, + "592": 2.275049924850464, + "593": 2.2507901191711426 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 0.942821, + 1.31888, + 1.31517, + 1.24345, + 1.14851, + 1.01728, + 1.14303, + 1.26433, + 1.37267, + 1.74759, + 2.38641, + 3.19153, + 2.99262, + 2.49362, + 2.09619, + 2.11865, + 1.72161, + 1.45893, + 1.18614, + 1.0073, + 0.840883, + 0.680994, + 0.83259, + 1.0569, + 0.975129, + 0.96703, + 1.11246, + 1.16565, + 1.40314, + 1.3343, + 1.33231, + 1.07983, + 0.896437, + 0.70571, + 0.691435, + 0.961542, + 1.08807, + 1.28435, + 1.37505, + 1.53211, + 1.78121, + 1.8136, + 1.85945, + 1.7086, + 1.29669, + 1.21033, + 1.27925, + 1.37303, + 1.16312, + 0.965981, + 0.841615, + 0.821117, + 1.01557, + 1.35202 + ], + "train_epoch_time": 5.051812648773193, + "train_loss": 2.255965086308921, + "train_score": 0.33647439928786826, + "val_loss": 2.3217272350626614, + "val_score": 0.3190343356365178 + }, + { + "epoch": 11, + "grad_norm": 1.4152730703353882, + "learning_rate": 0.1, + "model_norm": 87.59550476074219, + "step_logs": { + "grad_norm": { + "594": 1.258890151977539, + "595": 1.203203558921814, + "596": 1.2106739282608032, + "597": 1.246432900428772, + "598": 1.46476149559021, + "599": 1.6713200807571411, + "600": 1.7261934280395508, + "601": 1.5266212224960327, + "602": 1.257462501525879, + "603": 1.2569507360458374, + "604": 1.2369118928909302, + "605": 1.3587913513183594, + "606": 1.450439453125, + "607": 1.4521106481552124, + "608": 1.488783597946167, + "609": 1.5188524723052979, + "610": 1.4980356693267822, + "611": 1.445404291152954, + "612": 1.469069242477417, + "613": 1.4227392673492432, + "614": 1.3973402976989746, + "615": 1.3478527069091797, + "616": 1.3153905868530273, + "617": 1.339142084121704, + "618": 1.514383316040039, + "619": 1.6853901147842407, + "620": 1.7181789875030518, + "621": 1.6907334327697754, + "622": 1.5309399366378784, + "623": 1.5674694776535034, + "624": 1.5582903623580933, + "625": 1.5204417705535889, + "626": 1.369624137878418, + "627": 1.2383073568344116, + "628": 1.224563717842102, + "629": 1.2779327630996704, + "630": 1.2952293157577515, + "631": 1.2530248165130615, + "632": 1.203320026397705, + "633": 1.2130340337753296, + "634": 1.1765729188919067, + "635": 1.1974188089370728, + "636": 1.2775593996047974, + "637": 1.4810636043548584, + "638": 1.3493518829345703, + "639": 1.246085286140442, + "640": 1.19171941280365, + "641": 1.2672860622406006, + "642": 1.3846321105957031, + "643": 1.3609858751296997, + "644": 1.3878403902053833, + "645": 1.3903461694717407, + "646": 1.3772350549697876, + "647": 1.4152730703353882 + }, + "loss": { + "594": 2.2567625045776367, + "595": 2.241260290145874, + "596": 2.2644906044006348, + "597": 2.241572141647339, + "598": 2.259786605834961, + "599": 2.2448043823242188, + "600": 2.2831976413726807, + "601": 2.290914535522461, + "602": 2.240591049194336, + "603": 2.258387327194214, + "604": 2.2383036613464355, + "605": 2.2649953365325928, + "606": 2.269930362701416, + "607": 2.2779407501220703, + "608": 2.2678422927856445, + "609": 2.2533488273620605, + "610": 2.2468838691711426, + "611": 2.275527000427246, + "612": 2.243919849395752, + "613": 2.278785228729248, + "614": 2.2456555366516113, + "615": 2.26540470123291, + "616": 2.272749662399292, + "617": 2.2570319175720215, + "618": 2.2563540935516357, + "619": 2.2770133018493652, + "620": 2.2670493125915527, + "621": 2.273940086364746, + "622": 2.2614378929138184, + "623": 2.248924970626831, + "624": 2.2827892303466797, + "625": 2.2635655403137207, + "626": 2.250562906265259, + "627": 2.216752052307129, + "628": 2.231747627258301, + "629": 2.206434965133667, + "630": 2.2379536628723145, + "631": 2.2190659046173096, + "632": 2.2118284702301025, + "633": 2.243088722229004, + "634": 2.2341508865356445, + "635": 2.221170425415039, + "636": 2.2227730751037598, + "637": 2.257899284362793, + "638": 2.2596077919006348, + "639": 2.2156600952148438, + "640": 2.2326769828796387, + "641": 2.2194743156433105, + "642": 2.229914426803589, + "643": 2.258718252182007, + "644": 2.2309961318969727, + "645": 2.2576956748962402, + "646": 2.252530097961426, + "647": 2.257633686065674 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 1.424, + 1.54815, + 1.54496, + 1.44283, + 1.05326, + 0.803636, + 0.76624, + 0.982984, + 1.41701, + 1.42943, + 1.46299, + 1.22677, + 1.07898, + 1.0803, + 1.02317, + 0.976781, + 1.00124, + 1.08919, + 1.03974, + 1.12578, + 1.15011, + 1.24698, + 1.31354, + 1.25859, + 0.983865, + 0.801613, + 0.767934, + 0.795479, + 0.964869, + 0.915328, + 0.940089, + 0.97916, + 1.19974, + 1.44564, + 1.48827, + 1.35106, + 1.33401, + 1.41335, + 1.52753, + 1.52441, + 1.61389, + 1.54914, + 1.36186, + 1.02934, + 1.24103, + 1.42695, + 1.57209, + 1.38198, + 1.16311, + 1.21942, + 1.1583, + 1.16794, + 1.18756, + 1.12713 + ], + "train_epoch_time": 5.056878089904785, + "train_loss": 2.237460089277161, + "train_score": 0.346287661465887, + "val_loss": 2.306067126216078, + "val_score": 0.3297574627892158 + }, + { + "epoch": 12, + "grad_norm": 0.8387596011161804, + "learning_rate": 0.1, + "model_norm": 87.61042022705078, + "step_logs": { + "grad_norm": { + "648": 1.581982135772705, + "649": 1.5737553834915161, + "650": 1.4004969596862793, + "651": 1.3306586742401123, + "652": 1.2554517984390259, + "653": 1.1243354082107544, + "654": 1.0491749048233032, + "655": 1.0814530849456787, + "656": 1.0577411651611328, + "657": 1.0769085884094238, + "658": 1.0773382186889648, + "659": 1.1758944988250732, + "660": 1.4077743291854858, + "661": 1.3541282415390015, + "662": 1.2772701978683472, + "663": 1.184023380279541, + "664": 1.1352832317352295, + "665": 1.0736087560653687, + "666": 1.0102338790893555, + "667": 1.0370293855667114, + "668": 1.0756092071533203, + "669": 1.0453011989593506, + "670": 1.0390774011611938, + "671": 1.0482290983200073, + "672": 1.048433542251587, + "673": 1.1108558177947998, + "674": 1.127617359161377, + "675": 1.1093788146972656, + "676": 0.9312753081321716, + "677": 0.7296909689903259, + "678": 0.7033728957176208, + "679": 0.7739483118057251, + "680": 0.938654363155365, + "681": 1.0277092456817627, + "682": 1.0152990818023682, + "683": 0.9473673701286316, + "684": 0.9405576586723328, + "685": 0.9931362271308899, + "686": 0.9151434898376465, + "687": 0.7597059607505798, + "688": 0.7866283059120178, + "689": 0.7895268201828003, + "690": 0.7335109710693359, + "691": 0.7413969039916992, + "692": 0.8307865262031555, + "693": 0.7810856699943542, + "694": 0.7339589595794678, + "695": 0.7777649760246277, + "696": 0.7652184367179871, + "697": 0.7264647483825684, + "698": 0.6602302193641663, + "699": 0.6880760192871094, + "700": 0.7712909579277039, + "701": 0.8387596011161804 + }, + "loss": { + "648": 2.224674701690674, + "649": 2.2643589973449707, + "650": 2.22965669631958, + "651": 2.229290008544922, + "652": 2.2342844009399414, + "653": 2.2144880294799805, + "654": 2.189312696456909, + "655": 2.1986241340637207, + "656": 2.2125964164733887, + "657": 2.2047319412231445, + "658": 2.2047762870788574, + "659": 2.1904592514038086, + "660": 2.2051358222961426, + "661": 2.230994701385498, + "662": 2.231522798538208, + "663": 2.214048385620117, + "664": 2.206422805786133, + "665": 2.213144302368164, + "666": 2.2061305046081543, + "667": 2.1752383708953857, + "668": 2.1865456104278564, + "669": 2.209840774536133, + "670": 2.196215867996216, + "671": 2.179856300354004, + "672": 2.183173179626465, + "673": 2.215100049972534, + "674": 2.1977384090423584, + "675": 2.187796115875244, + "676": 2.2159652709960938, + "677": 2.1777749061584473, + "678": 2.1774747371673584, + "679": 2.195223331451416, + "680": 2.183835983276367, + "681": 2.1674954891204834, + "682": 2.1961796283721924, + "683": 2.195509433746338, + "684": 2.180155038833618, + "685": 2.1860227584838867, + "686": 2.175570249557495, + "687": 2.1664438247680664, + "688": 2.173919200897217, + "689": 2.1759843826293945, + "690": 2.1583423614501953, + "691": 2.178779125213623, + "692": 2.1496191024780273, + "693": 2.1481401920318604, + "694": 2.159451961517334, + "695": 2.157120704650879, + "696": 2.177654504776001, + "697": 2.1564247608184814, + "698": 2.150519847869873, + "699": 2.1538474559783936, + "700": 2.16737699508667, + "701": 2.1726338863372803 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 0.888921, + 0.914262, + 1.13677, + 1.25902, + 1.41755, + 1.75179, + 1.9889, + 1.8799, + 1.97762, + 1.90107, + 1.89959, + 1.58416, + 1.11268, + 1.21669, + 1.36784, + 1.57931, + 1.71191, + 1.92007, + 2.16166, + 2.02267, + 1.88995, + 2.02245, + 2.03413, + 1.98388, + 1.98612, + 1.79506, + 1.72843, + 1.77765, + 2.55509, + 4.09011, + 4.40131, + 3.66484, + 2.47861, + 2.05219, + 2.13049, + 2.44624, + 2.46443, + 2.21634, + 2.59773, + 3.75367, + 3.51321, + 3.49078, + 4.0115, + 3.9638, + 3.11446, + 3.52099, + 4.00867, + 3.56597, + 3.71893, + 4.08606, + 4.93347, + 4.54927, + 3.64333, + 3.08824 + ], + "train_epoch_time": 5.052704334259033, + "train_loss": 2.1614000637185797, + "train_score": 0.3634807657904741, + "val_loss": 2.233464566766189, + "val_score": 0.34346745810744134 + }, + { + "epoch": 13, + "grad_norm": 0.6592527627944946, + "learning_rate": 0.06666666666666668, + "model_norm": 87.6195297241211, + "step_logs": { + "grad_norm": { + "702": 0.8362524509429932, + "703": 0.8091814517974854, + "704": 0.845625638961792, + "705": 0.9983937740325928, + "706": 1.0044894218444824, + "707": 0.962888777256012, + "708": 0.8864151835441589, + "709": 0.8935837745666504, + "710": 0.8574743866920471, + "711": 0.7297285199165344, + "712": 0.7358474731445312, + "713": 0.7165845632553101, + "714": 0.6686339378356934, + "715": 0.7225404977798462, + "716": 0.7110039591789246, + "717": 0.662558913230896, + "718": 0.603072464466095, + "719": 0.6447945237159729, + "720": 0.6536538004875183, + "721": 0.6368194818496704, + "722": 0.673835039138794, + "723": 0.6417527794837952, + "724": 0.585327684879303, + "725": 0.5983076691627502, + "726": 0.6275795102119446, + "727": 0.5941815972328186, + "728": 0.6566662192344666, + "729": 0.6971524357795715, + "730": 0.6727516651153564, + "731": 0.6684824824333191, + "732": 0.6768526434898376, + "733": 0.6906571388244629, + "734": 0.6527054309844971, + "735": 0.5872622132301331, + "736": 0.5852527618408203, + "737": 0.6615782380104065, + "738": 0.6764004826545715, + "739": 0.576840877532959, + "740": 0.6007465720176697, + "741": 0.677241861820221, + "742": 0.6685288548469543, + "743": 0.693906307220459, + "744": 0.6582739353179932, + "745": 0.650596022605896, + "746": 0.6399224996566772, + "747": 0.6072272062301636, + "748": 0.6378557085990906, + "749": 0.632337749004364, + "750": 0.6046590209007263, + "751": 0.6512715220451355, + "752": 0.6133934855461121, + "753": 0.627324640750885, + "754": 0.5898172855377197, + "755": 0.6592527627944946 + }, + "loss": { + "702": 2.160598039627075, + "703": 2.153968095779419, + "704": 2.157777786254883, + "705": 2.164325475692749, + "706": 2.156890869140625, + "707": 2.1611809730529785, + "708": 2.157391309738159, + "709": 2.173656463623047, + "710": 2.168610095977783, + "711": 2.1615982055664062, + "712": 2.131659507751465, + "713": 2.159134864807129, + "714": 2.1437277793884277, + "715": 2.1801514625549316, + "716": 2.139573574066162, + "717": 2.1167964935302734, + "718": 2.137956380844116, + "719": 2.163022041320801, + "720": 2.131282091140747, + "721": 2.134119987487793, + "722": 2.155618667602539, + "723": 2.1259775161743164, + "724": 2.146799087524414, + "725": 2.1739699840545654, + "726": 2.1365103721618652, + "727": 2.1435837745666504, + "728": 2.1390767097473145, + "729": 2.1439733505249023, + "730": 2.1333580017089844, + "731": 2.1631131172180176, + "732": 2.1334164142608643, + "733": 2.1504671573638916, + "734": 2.1357102394104004, + "735": 2.1633048057556152, + "736": 2.124472141265869, + "737": 2.126359462738037, + "738": 2.1509666442871094, + "739": 2.1523540019989014, + "740": 2.1348721981048584, + "741": 2.1738030910491943, + "742": 2.12825345993042, + "743": 2.129455089569092, + "744": 2.1404051780700684, + "745": 2.123248338699341, + "746": 2.1565322875976562, + "747": 2.141040802001953, + "748": 2.133934497833252, + "749": 2.1187424659729004, + "750": 2.149569272994995, + "751": 2.1229147911071777, + "752": 2.15488338470459, + "753": 2.1460983753204346, + "754": 2.1396210193634033, + "755": 2.1444015502929688 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 3.08958, + 3.28963, + 3.01752, + 2.1713, + 2.13765, + 2.33098, + 2.74571, + 2.7222, + 2.94944, + 4.05931, + 3.93679, + 4.2048, + 4.79505, + 4.17602, + 4.23237, + 4.82203, + 5.87841, + 5.20257, + 4.98822, + 5.26243, + 4.7475, + 5.16206, + 6.26604, + 6.07302, + 5.4246, + 6.07158, + 4.96063, + 4.41127, + 4.71362, + 4.8406, + 4.6568, + 4.50825, + 5.01312, + 6.27269, + 6.20246, + 4.85819, + 4.70139, + 6.46847, + 5.91547, + 4.7395, + 4.76193, + 4.42249, + 4.9395, + 5.01624, + 5.26625, + 5.80661, + 5.24489, + 5.29883, + 5.87936, + 5.00505, + 5.72724, + 5.45337, + 6.15038, + 4.93403 + ], + "train_epoch_time": 5.0530359745025635, + "train_loss": 2.1319318702265386, + "train_score": 0.36925215209744755, + "val_loss": 2.207699832631581, + "val_score": 0.34922592566016075 + }, + { + "epoch": 14, + "grad_norm": 0.5784660577774048, + "learning_rate": 0.03333333333333334, + "model_norm": 87.62258911132812, + "step_logs": { + "grad_norm": { + "756": 0.612333357334137, + "757": 0.657143771648407, + "758": 0.6825922727584839, + "759": 0.6265087127685547, + "760": 0.6389917135238647, + "761": 0.6159690022468567, + "762": 0.6025940775871277, + "763": 0.590510904788971, + "764": 0.6645591259002686, + "765": 0.5695677399635315, + "766": 0.5927686095237732, + "767": 0.5843535661697388, + "768": 0.5996710658073425, + "769": 0.6344755291938782, + "770": 0.6641318202018738, + "771": 0.5539579391479492, + "772": 0.5904717445373535, + "773": 0.59966641664505, + "774": 0.5793558359146118, + "775": 0.6131924390792847, + "776": 0.5643621683120728, + "777": 0.5599797368049622, + "778": 0.5634403228759766, + "779": 0.6696975827217102, + "780": 0.5814907550811768, + "781": 0.6196321249008179, + "782": 0.5625707507133484, + "783": 0.5729119181632996, + "784": 0.5670679807662964, + "785": 0.6024376749992371, + "786": 0.6088864207267761, + "787": 0.5948326587677002, + "788": 0.5956305861473083, + "789": 0.5724408626556396, + "790": 0.6002993583679199, + "791": 0.5960835814476013, + "792": 0.5096903443336487, + "793": 0.5664844512939453, + "794": 0.546478271484375, + "795": 0.5342928767204285, + "796": 0.54475337266922, + "797": 0.5758211612701416, + "798": 0.6035395264625549, + "799": 0.5516563057899475, + "800": 0.6098818778991699, + "801": 0.6136682629585266, + "802": 0.5548878908157349, + "803": 0.5874367952346802, + "804": 0.5641118884086609, + "805": 0.5488502383232117, + "806": 0.5530485510826111, + "807": 0.6005003452301025, + "808": 0.5693800449371338, + "809": 0.5784660577774048 + }, + "loss": { + "756": 2.140475034713745, + "757": 2.1435508728027344, + "758": 2.1449623107910156, + "759": 2.144589424133301, + "760": 2.1265616416931152, + "761": 2.1372361183166504, + "762": 2.135319232940674, + "763": 2.1409971714019775, + "764": 2.0976028442382812, + "765": 2.1189403533935547, + "766": 2.1375911235809326, + "767": 2.132354259490967, + "768": 2.149562358856201, + "769": 2.1116445064544678, + "770": 2.162325859069824, + "771": 2.1087965965270996, + "772": 2.1294798851013184, + "773": 2.1239724159240723, + "774": 2.1256752014160156, + "775": 2.11171555519104, + "776": 2.1371350288391113, + "777": 2.126445770263672, + "778": 2.12386417388916, + "779": 2.1403026580810547, + "780": 2.1253581047058105, + "781": 2.0939249992370605, + "782": 2.143845558166504, + "783": 2.096829414367676, + "784": 2.138974189758301, + "785": 2.1372528076171875, + "786": 2.1119861602783203, + "787": 2.139653444290161, + "788": 2.1490094661712646, + "789": 2.0913941860198975, + "790": 2.1084046363830566, + "791": 2.141140937805176, + "792": 2.1391408443450928, + "793": 2.111428737640381, + "794": 2.128871440887451, + "795": 2.121690034866333, + "796": 2.128572463989258, + "797": 2.0958313941955566, + "798": 2.1182193756103516, + "799": 2.1381726264953613, + "800": 2.1112403869628906, + "801": 2.103242874145508, + "802": 2.134166717529297, + "803": 2.1283695697784424, + "804": 2.142695903778076, + "805": 2.1347200870513916, + "806": 2.1347808837890625, + "807": 2.128366231918335, + "808": 2.121126890182495, + "809": 2.113149881362915 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 5.70866, + 4.96379, + 4.60359, + 5.46374, + 5.2082, + 5.63294, + 5.88048, + 6.13989, + 4.74959, + 6.53173, + 6.08351, + 6.24465, + 5.97756, + 5.24555, + 4.90244, + 6.87197, + 6.10767, + 5.90649, + 6.33295, + 5.61619, + 6.7099, + 6.78125, + 6.69007, + 4.77219, + 6.2856, + 5.45373, + 6.77391, + 6.38833, + 6.65174, + 5.88887, + 5.69664, + 6.04719, + 6.05737, + 6.38226, + 5.85084, + 6.02603, + 8.2343, + 6.57961, + 7.12859, + 7.4323, + 7.1728, + 6.32093, + 5.81513, + 7.02596, + 5.67605, + 5.58499, + 6.93135, + 6.16772, + 6.73333, + 7.08652, + 6.97954, + 5.90228, + 6.54278, + 6.31502 + ], + "train_epoch_time": 5.052657604217529, + "train_loss": 2.122466077284628, + "train_score": 0.3722011747541523, + "val_loss": 2.2005189133018908, + "val_score": 0.35092117616010726 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:21:56.476870", + "final_model_norm": 87.62258911132812, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:20:11.667143", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 3.2457480430603027, + "learning_rate": 2.15e-11, + "model_norm": 87.43665313720703, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.512977123260498, + "3": 7.69253396987915, + "4": 21.81309700012207, + "5": 6.899621963500977, + "6": 5.383406639099121, + "7": 4.05136775970459, + "8": 3.837218999862671, + "9": 7.806838512420654, + "10": 6.003067970275879, + "11": 63.25626754760742, + "12": 5.074599742889404, + "13": 46.29304504394531, + "14": 5.295746326446533, + "15": 18.24626922607422, + "16": 6.997523307800293, + "17": 29.57970428466797, + "18": 7.914642333984375, + "19": 12.852014541625977, + "20": 9.042000770568848, + "21": 8.264215469360352, + "22": 34.86244201660156, + "23": 6.628152847290039, + "24": 18.288515090942383, + "25": 7.109195709228516, + "26": 5.042996406555176, + "27": 12.910846710205078, + "28": 3.199608325958252, + "29": 4.209935188293457, + "30": 8.058609008789062, + "31": 3.721379280090332, + "32": 52.77738571166992, + "33": 14.339056968688965, + "34": 21.678544998168945, + "35": 5.035406589508057, + "36": 20.3298282623291, + "37": 7.667484283447266, + "38": 3.5563859939575195, + "39": 4.208524703979492, + "40": 3.5099568367004395, + "41": 15.399702072143555, + "42": 11.075334548950195, + "43": 6.654026508331299, + "44": 8.310967445373535, + "45": 2.8949220180511475, + "46": 4.301228046417236, + "47": 3.8653478622436523, + "48": 3.875664472579956, + "49": 3.565122604370117, + "50": 6.90683650970459, + "51": 7.323732852935791, + "52": 7.216414928436279, + "53": 3.2457480430603027 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.8393378257751465, + "3": 3.6987524032592773, + "4": 4.2365193367004395, + "5": 4.024836540222168, + "6": 3.5493674278259277, + "7": 3.623638391494751, + "8": 3.4474129676818848, + "9": 3.550882339477539, + "10": 3.9463655948638916, + "11": 3.8574094772338867, + "12": 3.5057315826416016, + "13": 3.7508187294006348, + "14": 3.35552978515625, + "15": 3.5992021560668945, + "16": 3.3698222637176514, + "17": 4.331902027130127, + "18": 3.7917962074279785, + "19": 3.724433422088623, + "20": 3.7172322273254395, + "21": 3.4425954818725586, + "22": 3.969120979309082, + "23": 3.6345341205596924, + "24": 3.5270655155181885, + "25": 3.467682361602783, + "26": 3.8323540687561035, + "27": 3.489938735961914, + "28": 3.0839614868164062, + "29": 3.272839069366455, + "30": 3.579218626022339, + "31": 3.0954370498657227, + "32": 4.643240451812744, + "33": 3.8729076385498047, + "34": 5.958535671234131, + "35": 3.183952808380127, + "36": 3.470449209213257, + "37": 3.502824068069458, + "38": 3.414090156555176, + "39": 3.2204222679138184, + "40": 3.0980968475341797, + "41": 4.255000114440918, + "42": 3.625798463821411, + "43": 3.717097759246826, + "44": 3.2471938133239746, + "45": 2.9165167808532715, + "46": 3.624490261077881, + "47": 3.419923782348633, + "48": 3.5652120113372803, + "49": 3.2851014137268066, + "50": 3.773500919342041, + "51": 3.671895980834961, + "52": 3.500760555267334, + "53": 3.1673593521118164 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 0.00839976, + 0.00857642, + 0.0905101, + 0.0625052, + 0.00890378, + 0.0845469, + 0.122472, + 0.220771, + 0.234132, + 0.0582621, + 0.109509, + 0.000964026, + 0.136137, + 0.00175023, + 0.119648, + 0.0108108, + 0.0688206, + 0.00495098, + 0.0605316, + 0.0225485, + 0.0454664, + 0.0504061, + 0.00326572, + 0.0827301, + 0.0105452, + 0.0686117, + 0.150691, + 0.0209367, + 0.301242, + 0.18466, + 0.0551148, + 0.223519, + 0.00166696, + 0.0188363, + 0.0126788, + 0.125573, + 0.00839689, + 0.0595817, + 0.269934, + 0.181825, + 0.251473, + 0.0179422, + 0.029559, + 0.0839527, + 0.0470116, + 0.348009, + 0.195912, + 0.228897, + 0.237352, + 0.258464, + 0.0791017, + 0.0684582, + 0.0672232, + 0.300655 + ], + "train_epoch_time": 5.0551862716674805, + "train_loss": 3.249413633004494, + "train_score": 0.21943821738149377, + "val_loss": 3.2759033399115474, + "val_score": 0.21235020802571222 + }, + { + "epoch": 1, + "grad_norm": 1.557374358177185, + "learning_rate": 0.215, + "model_norm": 87.44507598876953, + "step_logs": { + "grad_norm": { + "54": 4.774287700653076, + "55": 20.154672622680664, + "56": 4.404318332672119, + "57": 4.288097381591797, + "58": 8.36601734161377, + "59": 8.29132080078125, + "60": 3.161160945892334, + "61": 2.579345703125, + "62": 5.378228187561035, + "63": 3.1769394874572754, + "64": 6.62891149520874, + "65": 2.2578682899475098, + "66": 2.9576480388641357, + "67": 3.805307149887085, + "68": 2.2968363761901855, + "69": 3.2989535331726074, + "70": 3.6977450847625732, + "71": 3.1557819843292236, + "72": 2.3498356342315674, + "73": 8.389657974243164, + "74": 3.4586596488952637, + "75": 4.117329120635986, + "76": 3.046645164489746, + "77": 2.420107364654541, + "78": 2.6633551120758057, + "79": 3.2983312606811523, + "80": 2.5718305110931396, + "81": 2.168947458267212, + "82": 3.0560479164123535, + "83": 2.806969165802002, + "84": 2.8554811477661133, + "85": 2.9730701446533203, + "86": 2.311844825744629, + "87": 2.8837015628814697, + "88": 1.9802645444869995, + "89": 1.5054583549499512, + "90": 1.8459925651550293, + "91": 2.2450551986694336, + "92": 1.8281776905059814, + "93": 1.7590476274490356, + "94": 2.589425563812256, + "95": 1.7091599702835083, + "96": 1.3318382501602173, + "97": 1.1193625926971436, + "98": 1.177375078201294, + "99": 1.3803173303604126, + "100": 1.3559755086898804, + "101": 1.373702049255371, + "102": 1.2458759546279907, + "103": 1.2519652843475342, + "104": 1.7411977052688599, + "105": 1.732823371887207, + "106": 1.6153154373168945, + "107": 1.557374358177185 + }, + "loss": { + "54": 3.2355923652648926, + "55": 5.297096252441406, + "56": 3.3338818550109863, + "57": 3.4436700344085693, + "58": 3.8406684398651123, + "59": 3.746638298034668, + "60": 3.1932473182678223, + "61": 3.080146074295044, + "62": 3.348526954650879, + "63": 3.0301764011383057, + "64": 3.911581039428711, + "65": 3.2012410163879395, + "66": 2.9062182903289795, + "67": 3.354140281677246, + "68": 2.872352123260498, + "69": 3.0757651329040527, + "70": 3.455598831176758, + "71": 3.1716394424438477, + "72": 3.1585805416107178, + "73": 3.5887584686279297, + "74": 3.1803665161132812, + "75": 3.290294647216797, + "76": 3.4018030166625977, + "77": 3.062267303466797, + "78": 2.9873971939086914, + "79": 3.2536568641662598, + "80": 3.013258695602417, + "81": 3.065859794616699, + "82": 2.8529186248779297, + "83": 3.2957162857055664, + "84": 3.068861484527588, + "85": 3.077514171600342, + "86": 3.0496582984924316, + "87": 2.9805450439453125, + "88": 3.0729541778564453, + "89": 2.779258966445923, + "90": 2.7471539974212646, + "91": 2.861774444580078, + "92": 2.887636661529541, + "93": 2.722166061401367, + "94": 2.826554298400879, + "95": 3.0241246223449707, + "96": 2.711867332458496, + "97": 2.606844902038574, + "98": 2.619281530380249, + "99": 2.630624771118164, + "100": 2.679307699203491, + "101": 2.624044895172119, + "102": 2.66232967376709, + "103": 2.615865468978882, + "104": 2.6691906452178955, + "105": 2.7768869400024414, + "106": 2.653564453125, + "107": 2.751941680908203 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.14195, + 0.0130403, + 0.171867, + 0.18728, + 0.0548743, + 0.0544997, + 0.31955, + 0.462969, + 0.115764, + 0.300227, + 0.089016, + 0.627944, + 0.332227, + 0.231634, + 0.544475, + 0.282619, + 0.252726, + 0.318471, + 0.572028, + 0.0509866, + 0.265865, + 0.19409, + 0.366493, + 0.522846, + 0.421148, + 0.299077, + 0.455566, + 0.65171, + 0.30547, + 0.418287, + 0.376373, + 0.348169, + 0.570603, + 0.358422, + 0.783627, + 1.22629, + 0.806163, + 0.567782, + 0.863984, + 0.87975, + 0.421551, + 1.03522, + 1.52885, + 2.08053, + 1.88952, + 1.38071, + 1.4572, + 1.39055, + 1.71519, + 1.6689, + 0.880407, + 0.924804, + 1.01699, + 1.13463 + ], + "train_epoch_time": 5.058413505554199, + "train_loss": 2.6439669382623485, + "train_score": 0.24040194584358712, + "val_loss": 2.6650987561069317, + "val_score": 0.23652321324351186 + }, + { + "epoch": 2, + "grad_norm": 1.361799955368042, + "learning_rate": 0.215, + "model_norm": 87.4599609375, + "step_logs": { + "grad_norm": { + "108": 1.4028363227844238, + "109": 1.4082996845245361, + "110": 1.49027681350708, + "111": 1.4508966207504272, + "112": 1.2799209356307983, + "113": 1.2841819524765015, + "114": 1.5262126922607422, + "115": 1.4296855926513672, + "116": 1.2931698560714722, + "117": 1.2932544946670532, + "118": 1.374250888824463, + "119": 1.2298253774642944, + "120": 0.9848251342773438, + "121": 1.1569411754608154, + "122": 1.4206010103225708, + "123": 1.1878057718276978, + "124": 1.0276018381118774, + "125": 1.283652663230896, + "126": 1.8712903261184692, + "127": 1.6439473628997803, + "128": 1.2326799631118774, + "129": 1.4321972131729126, + "130": 1.450684666633606, + "131": 1.2546913623809814, + "132": 1.3548060655593872, + "133": 1.4761539697647095, + "134": 1.6513665914535522, + "135": 1.5099892616271973, + "136": 1.3036129474639893, + "137": 1.2944056987762451, + "138": 1.615399718284607, + "139": 1.40032160282135, + "140": 1.0077502727508545, + "141": 1.0127054452896118, + "142": 1.1501387357711792, + "143": 1.2182928323745728, + "144": 1.3234952688217163, + "145": 1.3381359577178955, + "146": 1.3107799291610718, + "147": 1.3745554685592651, + "148": 1.2872231006622314, + "149": 1.3383318185806274, + "150": 1.4619154930114746, + "151": 1.2690492868423462, + "152": 0.9869495034217834, + "153": 0.9818185567855835, + "154": 1.2708150148391724, + "155": 1.3768160343170166, + "156": 1.358048677444458, + "157": 1.2519779205322266, + "158": 1.058868646621704, + "159": 1.1156221628189087, + "160": 1.2423121929168701, + "161": 1.361799955368042 + }, + "loss": { + "108": 2.631319761276245, + "109": 2.6504969596862793, + "110": 2.640850067138672, + "111": 2.696254253387451, + "112": 2.6220450401306152, + "113": 2.638132095336914, + "114": 2.621736526489258, + "115": 2.6706607341766357, + "116": 2.60536527633667, + "117": 2.6401000022888184, + "118": 2.6053626537323, + "119": 2.6542954444885254, + "120": 2.5550832748413086, + "121": 2.586343765258789, + "122": 2.6107537746429443, + "123": 2.6542739868164062, + "124": 2.560403347015381, + "125": 2.5869429111480713, + "126": 2.6324198246002197, + "127": 2.7599895000457764, + "128": 2.6008591651916504, + "129": 2.5921430587768555, + "130": 2.6323347091674805, + "131": 2.60345196723938, + "132": 2.617171049118042, + "133": 2.6182680130004883, + "134": 2.653480052947998, + "135": 2.6878232955932617, + "136": 2.606743812561035, + "137": 2.6013011932373047, + "138": 2.5990309715270996, + "139": 2.6867997646331787, + "140": 2.553896903991699, + "141": 2.5521392822265625, + "142": 2.542072296142578, + "143": 2.5950050354003906, + "144": 2.5921449661254883, + "145": 2.622699737548828, + "146": 2.5679845809936523, + "147": 2.619826555252075, + "148": 2.598999261856079, + "149": 2.586848735809326, + "150": 2.611640214920044, + "151": 2.6322391033172607, + "152": 2.550750255584717, + "153": 2.548252582550049, + "154": 2.563251495361328, + "155": 2.6212615966796875, + "156": 2.5728607177734375, + "157": 2.613525867462158, + "158": 2.5438265800476074, + "159": 2.5615530014038086, + "160": 2.552793502807617, + "161": 2.572293519973755 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 1.33709, + 1.3364, + 1.18908, + 1.28082, + 1.60057, + 1.59972, + 1.12553, + 1.30658, + 1.55796, + 1.57853, + 1.37955, + 1.75494, + 2.63443, + 1.93225, + 1.29366, + 1.88129, + 2.4247, + 1.56997, + 0.751749, + 1.02125, + 1.71165, + 1.26373, + 1.25082, + 1.65377, + 1.42586, + 1.20157, + 0.973036, + 1.17883, + 1.53391, + 1.55257, + 0.995982, + 1.37019, + 2.51477, + 2.4885, + 1.92171, + 1.74838, + 1.47984, + 1.4647, + 1.49463, + 1.38659, + 1.56855, + 1.44425, + 1.22199, + 1.63444, + 2.61865, + 2.6435, + 1.58718, + 1.3828, + 1.39504, + 1.66738, + 2.26884, + 2.05811, + 1.65407, + 1.38706 + ], + "train_epoch_time": 5.05213475227356, + "train_loss": 2.589251991118728, + "train_score": 0.23802456956682794, + "val_loss": 2.6198141522850973, + "val_score": 0.2345768152880477 + }, + { + "epoch": 3, + "grad_norm": 1.0490036010742188, + "learning_rate": 0.215, + "model_norm": 87.47447967529297, + "step_logs": { + "grad_norm": { + "162": 1.2861301898956299, + "163": 1.1588457822799683, + "164": 1.1387637853622437, + "165": 1.2015197277069092, + "166": 1.1771337985992432, + "167": 1.2637208700180054, + "168": 1.3964570760726929, + "169": 1.3009259700775146, + "170": 1.4832748174667358, + "171": 1.3391103744506836, + "172": 1.001443862915039, + "173": 1.0641924142837524, + "174": 1.1780095100402832, + "175": 1.2123091220855713, + "176": 1.2728246450424194, + "177": 1.2441760301589966, + "178": 1.1102662086486816, + "179": 1.0653257369995117, + "180": 1.213426947593689, + "181": 1.2454233169555664, + "182": 1.1902133226394653, + "183": 1.2155243158340454, + "184": 1.310206413269043, + "185": 1.2149938344955444, + "186": 1.1201399564743042, + "187": 1.1830081939697266, + "188": 1.166102647781372, + "189": 1.0935567617416382, + "190": 1.0420727729797363, + "191": 1.0049830675125122, + "192": 1.1084294319152832, + "193": 1.3101873397827148, + "194": 1.3110649585723877, + "195": 1.2940549850463867, + "196": 1.141141414642334, + "197": 1.0688573122024536, + "198": 1.2241190671920776, + "199": 1.7137930393218994, + "200": 1.4287818670272827, + "201": 0.9249826669692993, + "202": 0.9427591562271118, + "203": 0.9777793884277344, + "204": 0.9809828400611877, + "205": 1.0389865636825562, + "206": 1.056341528892517, + "207": 1.0056283473968506, + "208": 1.1745526790618896, + "209": 1.5735182762145996, + "210": 1.4854751825332642, + "211": 1.260992407798767, + "212": 1.1626397371292114, + "213": 1.4322115182876587, + "214": 1.373690128326416, + "215": 1.0490036010742188 + }, + "loss": { + "162": 2.5961270332336426, + "163": 2.56341814994812, + "164": 2.5469627380371094, + "165": 2.552170753479004, + "166": 2.5810842514038086, + "167": 2.5551209449768066, + "168": 2.5750069618225098, + "169": 2.5918307304382324, + "170": 2.56986927986145, + "171": 2.6384692192077637, + "172": 2.5417380332946777, + "173": 2.5497114658355713, + "174": 2.5267906188964844, + "175": 2.552063465118408, + "176": 2.5574936866760254, + "177": 2.5750246047973633, + "178": 2.520806312561035, + "179": 2.559762716293335, + "180": 2.5456981658935547, + "181": 2.5894641876220703, + "182": 2.5442113876342773, + "183": 2.567554473876953, + "184": 2.5674996376037598, + "185": 2.588540554046631, + "186": 2.5444016456604004, + "187": 2.5603115558624268, + "188": 2.5432286262512207, + "189": 2.568527936935425, + "190": 2.5365238189697266, + "191": 2.5240440368652344, + "192": 2.5167489051818848, + "193": 2.5622100830078125, + "194": 2.5690016746520996, + "195": 2.552128553390503, + "196": 2.561570167541504, + "197": 2.5302505493164062, + "198": 2.534597396850586, + "199": 2.5753073692321777, + "200": 2.669279098510742, + "201": 2.5109434127807617, + "202": 2.5188565254211426, + "203": 2.513155937194824, + "204": 2.5260703563690186, + "205": 2.5210156440734863, + "206": 2.541933536529541, + "207": 2.4990439414978027, + "208": 2.523283004760742, + "209": 2.563011646270752, + "210": 2.61623477935791, + "211": 2.5540199279785156, + "212": 2.53739333152771, + "213": 2.5462939739227295, + "214": 2.6274044513702393, + "215": 2.526057720184326 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 1.56948, + 1.90883, + 1.96406, + 1.76786, + 1.86273, + 1.59996, + 1.32045, + 1.53145, + 1.16807, + 1.47136, + 2.53441, + 2.25139, + 1.82084, + 1.73646, + 1.57862, + 1.66348, + 2.04496, + 2.25546, + 1.72894, + 1.66946, + 1.79599, + 1.73777, + 1.49565, + 1.7535, + 2.02787, + 1.82944, + 1.8703, + 2.14784, + 2.33584, + 2.49908, + 2.04844, + 1.49262, + 1.49457, + 1.52404, + 1.9671, + 2.21475, + 1.69146, + 0.876824, + 1.30756, + 2.93474, + 2.83401, + 2.62868, + 2.62496, + 2.33537, + 2.27801, + 2.47115, + 1.82903, + 1.03516, + 1.18562, + 1.6062, + 1.87714, + 1.24135, + 1.39235, + 2.29556 + ], + "train_epoch_time": 5.0522496700286865, + "train_loss": 2.523870642455442, + "train_score": 0.2451835993458243, + "val_loss": 2.5673934510052545, + "val_score": 0.2400034083188196 + }, + { + "epoch": 4, + "grad_norm": 1.0854158401489258, + "learning_rate": 0.215, + "model_norm": 87.49336242675781, + "step_logs": { + "grad_norm": { + "216": 1.0241482257843018, + "217": 1.1809654235839844, + "218": 1.2362719774246216, + "219": 1.1973521709442139, + "220": 1.2557106018066406, + "221": 1.207959532737732, + "222": 1.2223831415176392, + "223": 1.1609492301940918, + "224": 1.0549728870391846, + "225": 1.0758628845214844, + "226": 1.0378057956695557, + "227": 1.0285675525665283, + "228": 1.0697710514068604, + "229": 1.1172983646392822, + "230": 1.1721439361572266, + "231": 1.2012693881988525, + "232": 1.0827316045761108, + "233": 0.9628183245658875, + "234": 1.0146420001983643, + "235": 1.0098919868469238, + "236": 1.1025352478027344, + "237": 1.236091136932373, + "238": 1.222111701965332, + "239": 1.1935385465621948, + "240": 1.2283692359924316, + "241": 1.181260347366333, + "242": 1.0883049964904785, + "243": 0.9462724924087524, + "244": 0.841920793056488, + "245": 0.904147744178772, + "246": 1.028922200202942, + "247": 1.0499484539031982, + "248": 1.019058346748352, + "249": 1.0206499099731445, + "250": 1.0636106729507446, + "251": 1.0745441913604736, + "252": 1.3279489278793335, + "253": 1.3285070657730103, + "254": 1.3298733234405518, + "255": 1.305577039718628, + "256": 1.0363807678222656, + "257": 0.9024780988693237, + "258": 0.8831479549407959, + "259": 0.944024920463562, + "260": 1.327848196029663, + "261": 1.2221624851226807, + "262": 0.867016077041626, + "263": 0.9218744039535522, + "264": 1.0184452533721924, + "265": 1.108479380607605, + "266": 1.3029100894927979, + "267": 1.315808653831482, + "268": 1.173572301864624, + "269": 1.0854158401489258 + }, + "loss": { + "216": 2.5376625061035156, + "217": 2.528329372406006, + "218": 2.559483051300049, + "219": 2.5134034156799316, + "220": 2.5575060844421387, + "221": 2.5357069969177246, + "222": 2.5385208129882812, + "223": 2.5506324768066406, + "224": 2.52734375, + "225": 2.5073888301849365, + "226": 2.5314648151397705, + "227": 2.5137887001037598, + "228": 2.533418893814087, + "229": 2.5058951377868652, + "230": 2.5301146507263184, + "231": 2.533447504043579, + "232": 2.5572028160095215, + "233": 2.4895966053009033, + "234": 2.5353453159332275, + "235": 2.5028529167175293, + "236": 2.497239112854004, + "237": 2.5329959392547607, + "238": 2.548107624053955, + "239": 2.512467384338379, + "240": 2.532466411590576, + "241": 2.5352303981781006, + "242": 2.5191922187805176, + "243": 2.500959873199463, + "244": 2.4881105422973633, + "245": 2.4924659729003906, + "246": 2.4836041927337646, + "247": 2.508016586303711, + "248": 2.5005147457122803, + "249": 2.4982798099517822, + "250": 2.5091617107391357, + "251": 2.5055460929870605, + "252": 2.5079002380371094, + "253": 2.5764472484588623, + "254": 2.517523765563965, + "255": 2.566769599914551, + "256": 2.513474702835083, + "257": 2.5024774074554443, + "258": 2.462148427963257, + "259": 2.482999086380005, + "260": 2.5005099773406982, + "261": 2.5734333992004395, + "262": 2.5019009113311768, + "263": 2.47052264213562, + "264": 2.4790942668914795, + "265": 2.500074625015259, + "266": 2.5009069442749023, + "267": 2.520803928375244, + "268": 2.5006628036499023, + "269": 2.5019078254699707 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 2.4194, + 1.81284, + 1.67465, + 1.75315, + 1.62195, + 1.73778, + 1.69889, + 1.89244, + 2.27081, + 2.16625, + 2.35039, + 2.37609, + 2.21373, + 2.00736, + 1.84153, + 1.75562, + 2.18134, + 2.68559, + 2.4627, + 2.45406, + 2.05435, + 1.65781, + 1.70607, + 1.76371, + 1.67836, + 1.81688, + 2.12696, + 2.79302, + 3.51016, + 3.04895, + 2.34594, + 2.27507, + 2.40786, + 2.39821, + 2.21801, + 2.16997, + 1.42216, + 1.4598, + 1.42349, + 1.50585, + 2.34011, + 3.07253, + 3.1568, + 2.78618, + 1.41818, + 1.72288, + 3.32825, + 2.907, + 2.39011, + 2.03469, + 1.47322, + 1.45597, + 1.81566, + 2.12363 + ], + "train_epoch_time": 5.051345109939575, + "train_loss": 2.4881013529543554, + "train_score": 0.25934585722823394, + "val_loss": 2.529016125489595, + "val_score": 0.2525518438709316 + }, + { + "epoch": 5, + "grad_norm": 1.00590980052948, + "learning_rate": 0.215, + "model_norm": 87.51631164550781, + "step_logs": { + "grad_norm": { + "270": 1.13013756275177, + "271": 1.2563263177871704, + "272": 1.244115948677063, + "273": 1.1930584907531738, + "274": 1.1821926832199097, + "275": 1.1659003496170044, + "276": 1.1019318103790283, + "277": 1.0566940307617188, + "278": 0.9704861044883728, + "279": 1.014012098312378, + "280": 1.0404714345932007, + "281": 0.9590476751327515, + "282": 0.8862022757530212, + "283": 0.9800989627838135, + "284": 1.1799497604370117, + "285": 1.3598498106002808, + "286": 1.5365018844604492, + "287": 1.4131407737731934, + "288": 1.1602160930633545, + "289": 1.1545852422714233, + "290": 1.2624964714050293, + "291": 1.133628487586975, + "292": 0.9308536052703857, + "293": 0.8660498857498169, + "294": 0.9804956912994385, + "295": 1.0610535144805908, + "296": 1.3071521520614624, + "297": 1.44125497341156, + "298": 1.4970768690109253, + "299": 1.519706130027771, + "300": 1.5792064666748047, + "301": 1.2901794910430908, + "302": 1.0978492498397827, + "303": 0.98650723695755, + "304": 0.9940528869628906, + "305": 1.1404259204864502, + "306": 1.3931688070297241, + "307": 1.7322280406951904, + "308": 1.398901104927063, + "309": 1.0341744422912598, + "310": 1.0753517150878906, + "311": 1.1951384544372559, + "312": 1.3540966510772705, + "313": 1.1803466081619263, + "314": 1.1124998331069946, + "315": 1.1645481586456299, + "316": 1.2335587739944458, + "317": 1.1928004026412964, + "318": 1.1615335941314697, + "319": 1.267061471939087, + "320": 1.4724963903427124, + "321": 1.3068675994873047, + "322": 1.0462076663970947, + "323": 1.00590980052948 + }, + "loss": { + "270": 2.47450852394104, + "271": 2.505202531814575, + "272": 2.5308775901794434, + "273": 2.5177090167999268, + "274": 2.5039329528808594, + "275": 2.5061721801757812, + "276": 2.50260853767395, + "277": 2.501378059387207, + "278": 2.4689221382141113, + "279": 2.486182689666748, + "280": 2.483917713165283, + "281": 2.4691946506500244, + "282": 2.45881986618042, + "283": 2.4496452808380127, + "284": 2.4789481163024902, + "285": 2.5068202018737793, + "286": 2.5256478786468506, + "287": 2.5311498641967773, + "288": 2.484822988510132, + "289": 2.46293044090271, + "290": 2.475250482559204, + "291": 2.4920730590820312, + "292": 2.433931350708008, + "293": 2.450202465057373, + "294": 2.4047436714172363, + "295": 2.4686248302459717, + "296": 2.4366941452026367, + "297": 2.517975330352783, + "298": 2.492640972137451, + "299": 2.5122952461242676, + "300": 2.523442506790161, + "301": 2.5308361053466797, + "302": 2.457784414291382, + "303": 2.4264230728149414, + "304": 2.4332144260406494, + "305": 2.4344351291656494, + "306": 2.460434913635254, + "307": 2.5175065994262695, + "308": 2.560126543045044, + "309": 2.42850399017334, + "310": 2.4256277084350586, + "311": 2.461831569671631, + "312": 2.4378867149353027, + "313": 2.476686716079712, + "314": 2.4298629760742188, + "315": 2.433743476867676, + "316": 2.451724052429199, + "317": 2.4429852962493896, + "318": 2.4287307262420654, + "319": 2.4491324424743652, + "320": 2.4364917278289795, + "321": 2.4973602294921875, + "322": 2.4239325523376465, + "323": 2.405879020690918 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 1.93743, + 1.58722, + 1.63512, + 1.76881, + 1.79162, + 1.84369, + 2.06103, + 2.24017, + 2.62137, + 2.41795, + 2.29444, + 2.68457, + 3.13084, + 2.55014, + 1.78049, + 1.35563, + 1.06981, + 1.2675, + 1.84594, + 1.84757, + 1.55295, + 1.93919, + 2.80896, + 3.26675, + 2.50137, + 2.19271, + 1.4261, + 1.21219, + 1.11217, + 1.08781, + 1.01185, + 1.52042, + 2.03919, + 2.49325, + 2.46242, + 1.87182, + 1.26766, + 0.838997, + 1.30824, + 2.27066, + 2.0976, + 1.72354, + 1.32958, + 1.77767, + 1.96328, + 1.79457, + 1.61121, + 1.71706, + 1.80018, + 1.52552, + 1.12372, + 1.46224, + 2.21455, + 2.37769 + ], + "train_epoch_time": 5.054481506347656, + "train_loss": 2.410666897170342, + "train_score": 0.28630850969291316, + "val_loss": 2.458363356738085, + "val_score": 0.2764198838911702 + }, + { + "epoch": 6, + "grad_norm": 1.3920936584472656, + "learning_rate": 0.215, + "model_norm": 87.53954315185547, + "step_logs": { + "grad_norm": { + "324": 1.2398954629898071, + "325": 1.2559360265731812, + "326": 1.1005347967147827, + "327": 1.0315883159637451, + "328": 1.0457839965820312, + "329": 1.1575443744659424, + "330": 1.3922940492630005, + "331": 1.421283483505249, + "332": 1.2612704038619995, + "333": 1.0700067281723022, + "334": 0.9830129742622375, + "335": 1.0199302434921265, + "336": 1.105617880821228, + "337": 1.1271659135818481, + "338": 1.1486330032348633, + "339": 1.158549427986145, + "340": 1.0926823616027832, + "341": 1.081673502922058, + "342": 1.0944740772247314, + "343": 1.0738455057144165, + "344": 1.1464033126831055, + "345": 1.2084888219833374, + "346": 1.1741127967834473, + "347": 1.0600250959396362, + "348": 0.9916175007820129, + "349": 1.1096489429473877, + "350": 1.3233437538146973, + "351": 1.6562645435333252, + "352": 1.6098417043685913, + "353": 1.4056298732757568, + "354": 1.2175393104553223, + "355": 1.1655157804489136, + "356": 1.0950380563735962, + "357": 1.1121106147766113, + "358": 0.9945564270019531, + "359": 0.8062542676925659, + "360": 0.8896239995956421, + "361": 1.1539795398712158, + "362": 1.1678218841552734, + "363": 0.99281245470047, + "364": 0.9655885696411133, + "365": 1.053301453590393, + "366": 1.167069435119629, + "367": 1.2617477178573608, + "368": 1.1912317276000977, + "369": 1.0710958242416382, + "370": 1.0876836776733398, + "371": 1.2153470516204834, + "372": 1.4546935558319092, + "373": 1.1815884113311768, + "374": 1.0157623291015625, + "375": 1.1425656080245972, + "376": 1.2669028043746948, + "377": 1.3920936584472656 + }, + "loss": { + "324": 2.394548177719116, + "325": 2.4889769554138184, + "326": 2.4241433143615723, + "327": 2.405470371246338, + "328": 2.4188551902770996, + "329": 2.391901969909668, + "330": 2.427004337310791, + "331": 2.4808945655822754, + "332": 2.434130907058716, + "333": 2.42537260055542, + "334": 2.3684873580932617, + "335": 2.387106418609619, + "336": 2.416452407836914, + "337": 2.4211487770080566, + "338": 2.4016990661621094, + "339": 2.4311635494232178, + "340": 2.3922266960144043, + "341": 2.4100000858306885, + "342": 2.389313220977783, + "343": 2.383903980255127, + "344": 2.370375871658325, + "345": 2.4168379306793213, + "346": 2.3979382514953613, + "347": 2.4214749336242676, + "348": 2.3990001678466797, + "349": 2.402042865753174, + "350": 2.4201416969299316, + "351": 2.473811626434326, + "352": 2.5203981399536133, + "353": 2.4384689331054688, + "354": 2.440727710723877, + "355": 2.3802945613861084, + "356": 2.4108998775482178, + "357": 2.3781023025512695, + "358": 2.4347705841064453, + "359": 2.354454278945923, + "360": 2.3518245220184326, + "361": 2.3720993995666504, + "362": 2.4115350246429443, + "363": 2.3968918323516846, + "364": 2.3538870811462402, + "365": 2.389626979827881, + "366": 2.4068222045898438, + "367": 2.4024605751037598, + "368": 2.4263787269592285, + "369": 2.370750904083252, + "370": 2.3828773498535156, + "371": 2.4061851501464844, + "372": 2.4232969284057617, + "373": 2.425050735473633, + "374": 2.394852638244629, + "375": 2.3632655143737793, + "376": 2.4071457386016846, + "377": 2.393009662628174 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 1.55759, + 1.57792, + 2.00148, + 2.26041, + 2.2117, + 1.78512, + 1.25201, + 1.22814, + 1.53013, + 2.11839, + 2.45105, + 2.29473, + 1.97682, + 1.90566, + 1.82035, + 1.81128, + 2.00362, + 2.0598, + 1.99463, + 2.06731, + 1.80361, + 1.65486, + 1.73948, + 2.155, + 2.43973, + 1.95079, + 1.38196, + 0.901794, + 0.97253, + 1.23417, + 1.64647, + 1.75224, + 2.01058, + 1.9228, + 2.4615, + 3.62198, + 2.97161, + 1.7813, + 1.76824, + 2.43172, + 2.52465, + 2.1539, + 1.76706, + 1.50908, + 1.70988, + 2.06647, + 2.01417, + 1.62903, + 1.14515, + 1.73695, + 2.3211, + 1.8103, + 1.49974, + 1.23483 + ], + "train_epoch_time": 5.051822185516357, + "train_loss": 2.4235573906125434, + "train_score": 0.2781384503909813, + "val_loss": 2.4600568704572257, + "val_score": 0.26916798193063307 + }, + { + "epoch": 7, + "grad_norm": 1.2349190711975098, + "learning_rate": 0.215, + "model_norm": 87.56900787353516, + "step_logs": { + "grad_norm": { + "378": 1.250223159790039, + "379": 1.0774519443511963, + "380": 1.0756529569625854, + "381": 1.0746705532073975, + "382": 1.1278197765350342, + "383": 1.2192370891571045, + "384": 1.288550615310669, + "385": 1.2426871061325073, + "386": 1.175428032875061, + "387": 1.085137128829956, + "388": 1.0985182523727417, + "389": 1.1738382577896118, + "390": 1.182012677192688, + "391": 1.2103580236434937, + "392": 1.3914525508880615, + "393": 1.3073880672454834, + "394": 1.245774507522583, + "395": 1.229509949684143, + "396": 1.2988204956054688, + "397": 1.1870174407958984, + "398": 0.9405555129051208, + "399": 0.9783185720443726, + "400": 1.0132989883422852, + "401": 1.0678632259368896, + "402": 1.0937392711639404, + "403": 1.0450265407562256, + "404": 1.0873031616210938, + "405": 1.1243746280670166, + "406": 1.1756823062896729, + "407": 1.173513650894165, + "408": 1.160428524017334, + "409": 1.2300890684127808, + "410": 1.353251576423645, + "411": 1.381003975868225, + "412": 1.3498440980911255, + "413": 1.1837456226348877, + "414": 1.0914051532745361, + "415": 1.0649654865264893, + "416": 1.1831587553024292, + "417": 1.16208815574646, + "418": 0.9885045289993286, + "419": 0.8873646855354309, + "420": 0.8332418203353882, + "421": 0.9156327247619629, + "422": 0.9984257221221924, + "423": 0.9638368487358093, + "424": 0.9576247334480286, + "425": 1.05313241481781, + "426": 1.1539151668548584, + "427": 1.1707099676132202, + "428": 1.1698909997940063, + "429": 1.1404439210891724, + "430": 1.1631096601486206, + "431": 1.2349190711975098 + }, + "loss": { + "378": 2.425527572631836, + "379": 2.354468822479248, + "380": 2.384701728820801, + "381": 2.352278470993042, + "382": 2.3599302768707275, + "383": 2.375415325164795, + "384": 2.3820619583129883, + "385": 2.385037899017334, + "386": 2.3775856494903564, + "387": 2.3743581771850586, + "388": 2.3540422916412354, + "389": 2.3854315280914307, + "390": 2.37440824508667, + "391": 2.3822226524353027, + "392": 2.365360736846924, + "393": 2.4160287380218506, + "394": 2.3536853790283203, + "395": 2.376986026763916, + "396": 2.380284547805786, + "397": 2.3999688625335693, + "398": 2.3580517768859863, + "399": 2.3259963989257812, + "400": 2.3365097045898438, + "401": 2.3584940433502197, + "402": 2.3531861305236816, + "403": 2.361647844314575, + "404": 2.3399956226348877, + "405": 2.3749985694885254, + "406": 2.3618900775909424, + "407": 2.368696689605713, + "408": 2.3374476432800293, + "409": 2.3836843967437744, + "410": 2.367276191711426, + "411": 2.411839485168457, + "412": 2.3922245502471924, + "413": 2.3754727840423584, + "414": 2.3681883811950684, + "415": 2.350381374359131, + "416": 2.345921754837036, + "417": 2.3610222339630127, + "418": 2.3186309337615967, + "419": 2.3404953479766846, + "420": 2.3003005981445312, + "421": 2.3098666667938232, + "422": 2.321126937866211, + "423": 2.3332765102386475, + "424": 2.3189990520477295, + "425": 2.3287124633789062, + "426": 2.3312830924987793, + "427": 2.339560031890869, + "428": 2.320887565612793, + "429": 2.3463175296783447, + "430": 2.3140077590942383, + "431": 2.344412326812744 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 1.55178, + 2.02814, + 2.06106, + 2.03675, + 1.85532, + 1.59795, + 1.43466, + 1.54444, + 1.72085, + 2.0164, + 1.95074, + 1.73121, + 1.69946, + 1.62613, + 1.22169, + 1.41349, + 1.51659, + 1.5724, + 1.41101, + 1.7033, + 2.66554, + 2.43024, + 2.27558, + 2.06825, + 1.96711, + 2.16252, + 1.97931, + 1.87863, + 1.70875, + 1.72002, + 1.73582, + 1.57534, + 1.29268, + 1.26462, + 1.31291, + 1.69525, + 1.98813, + 2.07237, + 1.67582, + 1.74832, + 2.37287, + 2.97237, + 3.31316, + 2.75514, + 2.32845, + 2.51165, + 2.52877, + 2.09966, + 1.75084, + 1.70701, + 1.69576, + 1.80401, + 1.7105, + 1.53729 + ], + "train_epoch_time": 5.052142858505249, + "train_loss": 2.3236548091280924, + "train_score": 0.3173702026029228, + "val_loss": 2.37227043711359, + "val_score": 0.3091319245120551 + }, + { + "epoch": 8, + "grad_norm": 1.1614073514938354, + "learning_rate": 0.215, + "model_norm": 87.59609985351562, + "step_logs": { + "grad_norm": { + "432": 1.1335837841033936, + "433": 1.133182406425476, + "434": 1.3028398752212524, + "435": 1.3575388193130493, + "436": 1.183471918106079, + "437": 1.0903741121292114, + "438": 0.9942664504051208, + "439": 0.8852788805961609, + "440": 0.9137327075004578, + "441": 1.0063812732696533, + "442": 1.0718480348587036, + "443": 1.1188194751739502, + "444": 1.1481742858886719, + "445": 1.2257109880447388, + "446": 1.2147953510284424, + "447": 1.1156500577926636, + "448": 1.0477854013442993, + "449": 1.157600998878479, + "450": 1.1659703254699707, + "451": 1.209721326828003, + "452": 1.1933655738830566, + "453": 1.2303316593170166, + "454": 1.2471320629119873, + "455": 1.3114380836486816, + "456": 1.3324886560440063, + "457": 1.244615077972412, + "458": 1.325263500213623, + "459": 1.3488163948059082, + "460": 1.1333106756210327, + "461": 0.9534809589385986, + "462": 0.9193475246429443, + "463": 1.0124518871307373, + "464": 1.064584493637085, + "465": 1.0993348360061646, + "466": 1.108075737953186, + "467": 1.3078234195709229, + "468": 1.2591376304626465, + "469": 1.2338371276855469, + "470": 1.3050905466079712, + "471": 1.3439602851867676, + "472": 1.1983816623687744, + "473": 1.0942996740341187, + "474": 0.9860391020774841, + "475": 0.9721660614013672, + "476": 1.183914065361023, + "477": 1.3471086025238037, + "478": 1.2305814027786255, + "479": 1.2195003032684326, + "480": 1.250832438468933, + "481": 1.188461184501648, + "482": 1.2503068447113037, + "483": 1.2693363428115845, + "484": 1.1738686561584473, + "485": 1.1614073514938354 + }, + "loss": { + "432": 2.3429641723632812, + "433": 2.3273744583129883, + "434": 2.326981544494629, + "435": 2.3749589920043945, + "436": 2.337435245513916, + "437": 2.3211703300476074, + "438": 2.3299434185028076, + "439": 2.2885684967041016, + "440": 2.307781934738159, + "441": 2.311345100402832, + "442": 2.2958879470825195, + "443": 2.3113040924072266, + "444": 2.3194353580474854, + "445": 2.3361878395080566, + "446": 2.3112845420837402, + "447": 2.294637680053711, + "448": 2.2851107120513916, + "449": 2.2943053245544434, + "450": 2.3291707038879395, + "451": 2.3023414611816406, + "452": 2.302241325378418, + "453": 2.302578926086426, + "454": 2.345062732696533, + "455": 2.313366651535034, + "456": 2.3372836112976074, + "457": 2.330024003982544, + "458": 2.32963490486145, + "459": 2.3328371047973633, + "460": 2.3079590797424316, + "461": 2.282858371734619, + "462": 2.2656188011169434, + "463": 2.2717702388763428, + "464": 2.2871859073638916, + "465": 2.3089089393615723, + "466": 2.308109760284424, + "467": 2.29720401763916, + "468": 2.328185558319092, + "469": 2.2941536903381348, + "470": 2.31735897064209, + "471": 2.303525924682617, + "472": 2.3207876682281494, + "473": 2.2822265625, + "474": 2.260265827178955, + "475": 2.2539334297180176, + "476": 2.2617974281311035, + "477": 2.2976627349853516, + "478": 2.3209643363952637, + "479": 2.291518449783325, + "480": 2.323875904083252, + "481": 2.3008086681365967, + "482": 2.3029935359954834, + "483": 2.2736434936523438, + "484": 2.3120718002319336, + "485": 2.306636333465576 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 1.8233, + 1.81245, + 1.37092, + 1.2887, + 1.66887, + 1.95234, + 2.35689, + 2.92014, + 2.76412, + 2.28213, + 1.99841, + 1.84645, + 1.75941, + 1.555, + 1.5662, + 1.84356, + 2.08143, + 1.71212, + 1.71327, + 1.57325, + 1.6166, + 1.52114, + 1.50775, + 1.34508, + 1.31639, + 1.50415, + 1.32643, + 1.28227, + 1.79693, + 2.51105, + 2.68057, + 2.21623, + 2.01809, + 1.9105, + 1.87983, + 1.34308, + 1.46849, + 1.50698, + 1.36054, + 1.27532, + 1.61601, + 1.90584, + 2.32472, + 2.38485, + 1.61366, + 1.26614, + 1.53267, + 1.54085, + 1.4853, + 1.62896, + 1.47319, + 1.41114, + 1.67789, + 1.71005 + ], + "train_epoch_time": 5.051881790161133, + "train_loss": 2.3074515483642752, + "train_score": 0.31586374634478664, + "val_loss": 2.3635185950111715, + "val_score": 0.2983325556279871 + }, + { + "epoch": 9, + "grad_norm": 1.245952844619751, + "learning_rate": 0.215, + "model_norm": 87.62519073486328, + "step_logs": { + "grad_norm": { + "486": 1.4350836277008057, + "487": 1.3153775930404663, + "488": 1.1950019598007202, + "489": 1.1991479396820068, + "490": 1.1610701084136963, + "491": 1.0555616617202759, + "492": 0.8973591923713684, + "493": 0.8280812501907349, + "494": 0.8429233431816101, + "495": 1.0318495035171509, + "496": 1.2025450468063354, + "497": 1.4055798053741455, + "498": 1.4624541997909546, + "499": 1.7936570644378662, + "500": 1.3847507238388062, + "501": 1.2007139921188354, + "502": 0.9072302579879761, + "503": 0.688532292842865, + "504": 0.6601887345314026, + "505": 0.6672235131263733, + "506": 0.7431198358535767, + "507": 0.8004299402236938, + "508": 0.9479244947433472, + "509": 1.129452109336853, + "510": 1.4622944593429565, + "511": 1.4010891914367676, + "512": 1.1569700241088867, + "513": 1.1538740396499634, + "514": 1.229243516921997, + "515": 1.2271878719329834, + "516": 1.1357204914093018, + "517": 1.1053290367126465, + "518": 1.1159783601760864, + "519": 1.2433758974075317, + "520": 1.3332566022872925, + "521": 1.3406134843826294, + "522": 1.1998921632766724, + "523": 1.052549123764038, + "524": 0.9985735416412354, + "525": 0.9584429264068604, + "526": 0.9948418736457825, + "527": 1.0198593139648438, + "528": 1.073205590248108, + "529": 1.0430034399032593, + "530": 1.029845118522644, + "531": 1.1068893671035767, + "532": 1.2657700777053833, + "533": 1.1777558326721191, + "534": 0.9900950789451599, + "535": 0.9171857833862305, + "536": 0.9722216129302979, + "537": 0.9982343912124634, + "538": 1.0485411882400513, + "539": 1.245952844619751 + }, + "loss": { + "486": 2.3105132579803467, + "487": 2.3353779315948486, + "488": 2.2745251655578613, + "489": 2.3003311157226562, + "490": 2.259620189666748, + "491": 2.299048900604248, + "492": 2.2536635398864746, + "493": 2.2375235557556152, + "494": 2.2467522621154785, + "495": 2.2667975425720215, + "496": 2.274629831314087, + "497": 2.2951698303222656, + "498": 2.3376176357269287, + "499": 2.2875685691833496, + "500": 2.367274761199951, + "501": 2.294498920440674, + "502": 2.279972791671753, + "503": 2.2419590950012207, + "504": 2.2450153827667236, + "505": 2.208838701248169, + "506": 2.212721347808838, + "507": 2.233635902404785, + "508": 2.2223172187805176, + "509": 2.272603988647461, + "510": 2.2838568687438965, + "511": 2.3249762058258057, + "512": 2.2769415378570557, + "513": 2.293736696243286, + "514": 2.2783069610595703, + "515": 2.2808032035827637, + "516": 2.257035255432129, + "517": 2.2605504989624023, + "518": 2.271979331970215, + "519": 2.26370906829834, + "520": 2.299496650695801, + "521": 2.2778971195220947, + "522": 2.2844040393829346, + "523": 2.2698402404785156, + "524": 2.250211000442505, + "525": 2.2461366653442383, + "526": 2.24664044380188, + "527": 2.210922956466675, + "528": 2.2328033447265625, + "529": 2.2204999923706055, + "530": 2.267880439758301, + "531": 2.235379695892334, + "532": 2.2744719982147217, + "533": 2.27650785446167, + "534": 2.2195796966552734, + "535": 2.196235179901123, + "536": 2.222111940383911, + "537": 2.2188525199890137, + "538": 2.219332456588745, + "539": 2.228638172149658 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 1.1219, + 1.34976, + 1.59277, + 1.59972, + 1.67617, + 2.06339, + 2.7987, + 3.26304, + 3.16212, + 2.12902, + 1.57293, + 1.16173, + 1.09297, + 0.711042, + 1.23454, + 1.59151, + 2.77009, + 4.72911, + 5.1509, + 4.9616, + 4.0069, + 3.48631, + 2.4732, + 1.78151, + 1.06807, + 1.18437, + 1.70101, + 1.72277, + 1.50777, + 1.51449, + 1.74983, + 1.85025, + 1.82429, + 1.46425, + 1.29362, + 1.26744, + 1.58668, + 2.04885, + 2.25664, + 2.44514, + 2.27, + 2.12566, + 1.93858, + 2.04117, + 2.13834, + 1.8245, + 1.41962, + 1.64119, + 2.26421, + 2.61074, + 2.35091, + 2.22671, + 2.0186, + 1.43561 + ], + "train_epoch_time": 5.052018880844116, + "train_loss": 2.2684142873482176, + "train_score": 0.33120068155197024, + "val_loss": 2.337002307484536, + "val_score": 0.3179624710236296 + }, + { + "epoch": 10, + "grad_norm": 1.2299432754516602, + "learning_rate": 0.215, + "model_norm": 87.65270233154297, + "step_logs": { + "grad_norm": { + "540": 1.3511351346969604, + "541": 1.7157261371612549, + "542": 1.513992190361023, + "543": 1.422940731048584, + "544": 1.1099467277526855, + "545": 1.1671432256698608, + "546": 1.2268176078796387, + "547": 1.0674853324890137, + "548": 0.9695413112640381, + "549": 0.9712854623794556, + "550": 1.0565835237503052, + "551": 1.135793924331665, + "552": 1.126135230064392, + "553": 1.104817509651184, + "554": 1.1046085357666016, + "555": 1.2899866104125977, + "556": 1.3167997598648071, + "557": 1.109792709350586, + "558": 1.0866280794143677, + "559": 1.0575735569000244, + "560": 1.0726302862167358, + "561": 1.1983563899993896, + "562": 1.1723695993423462, + "563": 1.1241942644119263, + "564": 1.144669532775879, + "565": 1.082777738571167, + "566": 1.0937782526016235, + "567": 1.0855193138122559, + "568": 1.0865298509597778, + "569": 1.096518874168396, + "570": 1.1220263242721558, + "571": 1.074330449104309, + "572": 0.9960720539093018, + "573": 1.0139071941375732, + "574": 0.9430017471313477, + "575": 0.8367109298706055, + "576": 0.9074212312698364, + "577": 1.0143239498138428, + "578": 1.1238517761230469, + "579": 1.134877324104309, + "580": 1.0755152702331543, + "581": 1.0887564420700073, + "582": 1.1856040954589844, + "583": 1.1979775428771973, + "584": 1.0448088645935059, + "585": 1.0379643440246582, + "586": 1.1413239240646362, + "587": 1.2523587942123413, + "588": 1.2925063371658325, + "589": 1.2393759489059448, + "590": 1.1159319877624512, + "591": 1.0827888250350952, + "592": 1.1108096837997437, + "593": 1.2299432754516602 + }, + "loss": { + "540": 2.269345283508301, + "541": 2.298983573913574, + "542": 2.338210105895996, + "543": 2.31870174407959, + "544": 2.2218332290649414, + "545": 2.2401480674743652, + "546": 2.2606444358825684, + "547": 2.255159854888916, + "548": 2.2324373722076416, + "549": 2.2224931716918945, + "550": 2.238863945007324, + "551": 2.2543349266052246, + "552": 2.2394089698791504, + "553": 2.228637456893921, + "554": 2.237247943878174, + "555": 2.2630422115325928, + "556": 2.283402442932129, + "557": 2.23069167137146, + "558": 2.2173311710357666, + "559": 2.2453360557556152, + "560": 2.1964633464813232, + "561": 2.2507455348968506, + "562": 2.2712860107421875, + "563": 2.225886583328247, + "564": 2.2693567276000977, + "565": 2.2007830142974854, + "566": 2.223585605621338, + "567": 2.2126495838165283, + "568": 2.2432312965393066, + "569": 2.244802474975586, + "570": 2.2392830848693848, + "571": 2.2328972816467285, + "572": 2.235520124435425, + "573": 2.223702907562256, + "574": 2.201941967010498, + "575": 2.1812734603881836, + "576": 2.191406726837158, + "577": 2.2073256969451904, + "578": 2.229458808898926, + "579": 2.2427992820739746, + "580": 2.2275028228759766, + "581": 2.2165684700012207, + "582": 2.2341699600219727, + "583": 2.215458869934082, + "584": 2.2348432540893555, + "585": 2.2053182125091553, + "586": 2.2032017707824707, + "587": 2.2160887718200684, + "588": 2.2444844245910645, + "589": 2.2254462242126465, + "590": 2.2361598014831543, + "591": 2.222188711166382, + "592": 2.216907262802124, + "593": 2.2255680561065674 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 1.24309, + 0.78098, + 1.02008, + 1.14517, + 1.80346, + 1.64448, + 1.50201, + 1.97904, + 2.37491, + 2.35584, + 2.00549, + 1.74751, + 1.76584, + 1.82582, + 1.83357, + 1.35995, + 1.31687, + 1.81116, + 1.87788, + 2.00752, + 1.90908, + 1.56731, + 1.6525, + 1.76125, + 1.73198, + 1.87715, + 1.85864, + 1.87775, + 1.90016, + 1.86701, + 1.7787, + 1.93461, + 2.25319, + 2.16312, + 2.47617, + 3.11573, + 2.66137, + 2.14542, + 1.76515, + 1.74138, + 1.92568, + 1.86991, + 1.58941, + 1.54371, + 2.04726, + 2.04695, + 1.69136, + 1.41296, + 1.34354, + 1.44881, + 1.79567, + 1.89537, + 1.79667, + 1.4712 + ], + "train_epoch_time": 5.058496952056885, + "train_loss": 2.2306774993195937, + "train_score": 0.3424453013511094, + "val_loss": 2.296033101569087, + "val_score": 0.3286721436766615 + }, + { + "epoch": 11, + "grad_norm": 1.0349370241165161, + "learning_rate": 0.215, + "model_norm": 87.68195343017578, + "step_logs": { + "grad_norm": { + "594": 1.0943937301635742, + "595": 0.9796769022941589, + "596": 1.0310578346252441, + "597": 1.0670500993728638, + "598": 1.1307390928268433, + "599": 1.1710940599441528, + "600": 1.156296968460083, + "601": 1.2133711576461792, + "602": 1.1611528396606445, + "603": 1.1222338676452637, + "604": 1.2052091360092163, + "605": 1.2590749263763428, + "606": 1.176969051361084, + "607": 1.137434482574463, + "608": 1.080508828163147, + "609": 1.029705286026001, + "610": 1.0361080169677734, + "611": 1.053898572921753, + "612": 1.0411697626113892, + "613": 1.0509928464889526, + "614": 1.1699482202529907, + "615": 1.5786114931106567, + "616": 2.08183217048645, + "617": 1.4628498554229736, + "618": 2.068373918533325, + "619": 1.6212427616119385, + "620": 1.5444722175598145, + "621": 1.4550143480300903, + "622": 1.1803194284439087, + "623": 1.21443772315979, + "624": 1.2281582355499268, + "625": 1.2683097124099731, + "626": 1.3153064250946045, + "627": 1.1806786060333252, + "628": 0.9262410402297974, + "629": 0.944209635257721, + "630": 1.0660189390182495, + "631": 1.0335074663162231, + "632": 0.9160115718841553, + "633": 0.847176194190979, + "634": 0.7867324352264404, + "635": 0.8313835263252258, + "636": 0.8352426290512085, + "637": 0.8256402611732483, + "638": 0.839388370513916, + "639": 0.8972194194793701, + "640": 0.9569922089576721, + "641": 1.0144327878952026, + "642": 1.0443109273910522, + "643": 1.041453242301941, + "644": 1.0219300985336304, + "645": 0.9971755743026733, + "646": 0.9836614727973938, + "647": 1.0349370241165161 + }, + "loss": { + "594": 2.2265052795410156, + "595": 2.223076820373535, + "596": 2.2065114974975586, + "597": 2.1909635066986084, + "598": 2.200305461883545, + "599": 2.1981287002563477, + "600": 2.197580337524414, + "601": 2.230177164077759, + "602": 2.225196361541748, + "603": 2.223018169403076, + "604": 2.2023518085479736, + "605": 2.24660325050354, + "606": 2.214543104171753, + "607": 2.215930938720703, + "608": 2.190735340118408, + "609": 2.2196168899536133, + "610": 2.2095272541046143, + "611": 2.177351951599121, + "612": 2.2138514518737793, + "613": 2.1806013584136963, + "614": 2.2112765312194824, + "615": 2.2364535331726074, + "616": 2.3116908073425293, + "617": 2.3622446060180664, + "618": 2.392953872680664, + "619": 2.336181163787842, + "620": 2.2802348136901855, + "621": 2.2987122535705566, + "622": 2.2206177711486816, + "623": 2.2300620079040527, + "624": 2.2312326431274414, + "625": 2.216897964477539, + "626": 2.2371466159820557, + "627": 2.2626171112060547, + "628": 2.1957650184631348, + "629": 2.1836671829223633, + "630": 2.173825263977051, + "631": 2.192389965057373, + "632": 2.189667224884033, + "633": 2.1856131553649902, + "634": 2.1460840702056885, + "635": 2.162750720977783, + "636": 2.178058624267578, + "637": 2.1860759258270264, + "638": 2.1804001331329346, + "639": 2.1534974575042725, + "640": 2.1620194911956787, + "641": 2.195159912109375, + "642": 2.184896469116211, + "643": 2.2083370685577393, + "644": 2.1703310012817383, + "645": 2.191070556640625, + "646": 2.1832213401794434, + "647": 2.1793670654296875 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 1.85899, + 2.31627, + 2.07558, + 1.92427, + 1.72091, + 1.60276, + 1.64364, + 1.51479, + 1.6504, + 1.76513, + 1.51622, + 1.41717, + 1.59865, + 1.71279, + 1.87643, + 2.0934, + 2.05821, + 1.96034, + 2.04223, + 1.97413, + 1.61551, + 0.897448, + 0.533382, + 1.10389, + 0.55934, + 0.888813, + 0.955915, + 1.0858, + 1.59395, + 1.51205, + 1.47923, + 1.37815, + 1.29313, + 1.62311, + 2.5594, + 2.44934, + 1.91291, + 2.05254, + 2.60961, + 3.04527, + 3.46731, + 3.12899, + 3.12208, + 3.20689, + 3.09464, + 2.67514, + 2.36071, + 2.13314, + 2.00342, + 2.03604, + 2.07818, + 2.2035, + 2.25635, + 2.03471 + ], + "train_epoch_time": 5.0525007247924805, + "train_loss": 2.1892604366094517, + "train_score": 0.3530880111447366, + "val_loss": 2.271391089009089, + "val_score": 0.33596440841765685 + }, + { + "epoch": 12, + "grad_norm": 0.6247879266738892, + "learning_rate": 0.215, + "model_norm": 87.70884704589844, + "step_logs": { + "grad_norm": { + "648": 1.27436363697052, + "649": 1.3458645343780518, + "650": 1.3336223363876343, + "651": 1.283778190612793, + "652": 1.1900434494018555, + "653": 1.0207973718643188, + "654": 0.9495716094970703, + "655": 0.9589857459068298, + "656": 0.9915689826011658, + "657": 0.9953168630599976, + "658": 0.9650532603263855, + "659": 0.8956722021102905, + "660": 0.8467937111854553, + "661": 0.8934627771377563, + "662": 0.9381335377693176, + "663": 0.9531769752502441, + "664": 0.8920285105705261, + "665": 0.7697070837020874, + "666": 0.8196448087692261, + "667": 0.7981131672859192, + "668": 0.8364932537078857, + "669": 0.9031451344490051, + "670": 0.884121835231781, + "671": 0.9704537987709045, + "672": 1.037497639656067, + "673": 0.9718296527862549, + "674": 0.9248392581939697, + "675": 0.8426758050918579, + "676": 0.8113840222358704, + "677": 0.7985914349555969, + "678": 0.7930048108100891, + "679": 0.8237600922584534, + "680": 0.8027164340019226, + "681": 0.8228554129600525, + "682": 0.8873187899589539, + "683": 0.9340026378631592, + "684": 0.988332211971283, + "685": 1.0457377433776855, + "686": 0.9370012283325195, + "687": 0.8158568143844604, + "688": 0.7143938541412354, + "689": 0.6188098788261414, + "690": 0.6002197861671448, + "691": 0.5776981711387634, + "692": 0.5639108419418335, + "693": 0.5528355836868286, + "694": 0.6216949224472046, + "695": 0.7398737072944641, + "696": 0.7709805369377136, + "697": 0.7204237580299377, + "698": 0.7669119238853455, + "699": 0.8541778326034546, + "700": 0.7806927561759949, + "701": 0.6247879266738892 + }, + "loss": { + "648": 2.187340497970581, + "649": 2.236661672592163, + "650": 2.181426525115967, + "651": 2.2223172187805176, + "652": 2.206108570098877, + "653": 2.180610179901123, + "654": 2.176434278488159, + "655": 2.179388999938965, + "656": 2.1816482543945312, + "657": 2.1746327877044678, + "658": 2.169236183166504, + "659": 2.1697540283203125, + "660": 2.1555299758911133, + "661": 2.1336028575897217, + "662": 2.1822409629821777, + "663": 2.1862268447875977, + "664": 2.1699609756469727, + "665": 2.158590078353882, + "666": 2.123161792755127, + "667": 2.118091583251953, + "668": 2.1530086994171143, + "669": 2.1503145694732666, + "670": 2.151559591293335, + "671": 2.1622719764709473, + "672": 2.174621820449829, + "673": 2.1316094398498535, + "674": 2.1538071632385254, + "675": 2.1402854919433594, + "676": 2.10453200340271, + "677": 2.126291036605835, + "678": 2.116116523742676, + "679": 2.146627426147461, + "680": 2.1377975940704346, + "681": 2.140343189239502, + "682": 2.1301398277282715, + "683": 2.143266201019287, + "684": 2.1528308391571045, + "685": 2.147887945175171, + "686": 2.164555549621582, + "687": 2.168947696685791, + "688": 2.127497673034668, + "689": 2.109205961227417, + "690": 2.1138579845428467, + "691": 2.112443447113037, + "692": 2.08705997467041, + "693": 2.0793814659118652, + "694": 2.109280586242676, + "695": 2.128284215927124, + "696": 2.1036956310272217, + "697": 2.141223430633545, + "698": 2.1119680404663086, + "699": 2.1471896171569824, + "700": 2.1311025619506836, + "701": 2.100067615509033 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 1.34688, + 1.2348, + 1.22652, + 1.34842, + 1.55776, + 2.09266, + 2.41374, + 2.36979, + 2.21891, + 2.19514, + 2.32919, + 2.70466, + 3.00607, + 2.67276, + 2.47955, + 2.40629, + 2.72706, + 3.6435, + 3.16032, + 3.32518, + 3.07695, + 2.63625, + 2.75251, + 2.29594, + 2.02027, + 2.25698, + 2.51811, + 3.01405, + 3.19671, + 3.33406, + 3.36502, + 3.16341, + 3.31774, + 3.16109, + 2.70551, + 2.45686, + 2.20396, + 1.96411, + 2.46541, + 3.25853, + 4.16863, + 5.50813, + 5.86753, + 6.3297, + 6.56318, + 6.80365, + 5.45732, + 3.88789, + 3.53913, + 4.12559, + 3.59084, + 2.94289, + 3.49658, + 5.37982 + ], + "train_epoch_time": 5.051907539367676, + "train_loss": 2.1044700938625693, + "train_score": 0.3756164813742918, + "val_loss": 2.1938876284523756, + "val_score": 0.35201546313705184 + }, + { + "epoch": 13, + "grad_norm": 0.502764880657196, + "learning_rate": 0.14333333333333334, + "model_norm": 87.7265853881836, + "step_logs": { + "grad_norm": { + "702": 0.6563643217086792, + "703": 0.6571993827819824, + "704": 0.6090049147605896, + "705": 0.5090834498405457, + "706": 0.5032826066017151, + "707": 0.5454604625701904, + "708": 0.5655675530433655, + "709": 0.5795179009437561, + "710": 0.5758205652236938, + "711": 0.5833280086517334, + "712": 0.5649846196174622, + "713": 0.6254504919052124, + "714": 0.658409595489502, + "715": 0.6992095708847046, + "716": 0.648937463760376, + "717": 0.5740047693252563, + "718": 0.5344383716583252, + "719": 0.5104644894599915, + "720": 0.521329402923584, + "721": 0.5099399089813232, + "722": 0.5251731276512146, + "723": 0.5438639521598816, + "724": 0.5583299994468689, + "725": 0.5773388147354126, + "726": 0.5739305019378662, + "727": 0.5646443367004395, + "728": 0.584309995174408, + "729": 0.5981665253639221, + "730": 0.5569187998771667, + "731": 0.5464934706687927, + "732": 0.5473659634590149, + "733": 0.5321969985961914, + "734": 0.49224385619163513, + "735": 0.5335285663604736, + "736": 0.6055594086647034, + "737": 0.6290929317474365, + "738": 0.6229736804962158, + "739": 0.5933385491371155, + "740": 0.514173686504364, + "741": 0.504342794418335, + "742": 0.44745633006095886, + "743": 0.49347758293151855, + "744": 0.4832867383956909, + "745": 0.4926929175853729, + "746": 0.48200470209121704, + "747": 0.4366026818752289, + "748": 0.48769909143447876, + "749": 0.5025731921195984, + "750": 0.5172049403190613, + "751": 0.46526020765304565, + "752": 0.5184414982795715, + "753": 0.43663185834884644, + "754": 0.4778238534927368, + "755": 0.502764880657196 + }, + "loss": { + "702": 2.0921363830566406, + "703": 2.1032629013061523, + "704": 2.0804810523986816, + "705": 2.0745372772216797, + "706": 2.099489688873291, + "707": 2.1054189205169678, + "708": 2.1142611503601074, + "709": 2.095829486846924, + "710": 2.1037867069244385, + "711": 2.104379653930664, + "712": 2.1010234355926514, + "713": 2.107572555541992, + "714": 2.091850757598877, + "715": 2.1197190284729004, + "716": 2.0960986614227295, + "717": 2.1105685234069824, + "718": 2.103867530822754, + "719": 2.089076042175293, + "720": 2.0880379676818848, + "721": 2.0939292907714844, + "722": 2.087529420852661, + "723": 2.075826406478882, + "724": 2.0881247520446777, + "725": 2.078279972076416, + "726": 2.0891664028167725, + "727": 2.065402030944824, + "728": 2.083779811859131, + "729": 2.094331741333008, + "730": 2.1098685264587402, + "731": 2.0717971324920654, + "732": 2.0999763011932373, + "733": 2.0813305377960205, + "734": 2.0774643421173096, + "735": 2.0797691345214844, + "736": 2.092348098754883, + "737": 2.068068027496338, + "738": 2.0782954692840576, + "739": 2.098259925842285, + "740": 2.100529909133911, + "741": 2.070366621017456, + "742": 2.1026504039764404, + "743": 2.1024317741394043, + "744": 2.084796905517578, + "745": 2.0935921669006348, + "746": 2.07623553276062, + "747": 2.0815622806549072, + "748": 2.073194742202759, + "749": 2.0360491275787354, + "750": 2.069887638092041, + "751": 2.0653672218322754, + "752": 2.0628838539123535, + "753": 2.066896677017212, + "754": 2.0598807334899902, + "755": 2.0590217113494873 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 4.85624, + 4.86967, + 5.60947, + 8.00467, + 8.28877, + 7.07639, + 6.60982, + 6.24054, + 6.34494, + 6.18441, + 6.582, + 5.38762, + 4.82546, + 4.33574, + 4.97744, + 6.40573, + 7.36585, + 8.01721, + 7.6827, + 8.05238, + 7.56881, + 7.01795, + 6.69845, + 6.23509, + 6.34241, + 6.4782, + 6.10331, + 5.85331, + 6.80255, + 6.93709, + 7.00904, + 7.34846, + 8.57379, + 7.30634, + 5.70585, + 5.22559, + 5.3551, + 5.96011, + 7.94528, + 8.13946, + 10.5018, + 8.6335, + 8.92594, + 8.62461, + 8.93664, + 10.9198, + 8.71638, + 8.06101, + 7.73787, + 9.54125, + 7.67495, + 10.8415, + 9.02207, + 8.14575 + ], + "train_epoch_time": 5.052689075469971, + "train_loss": 2.069272039610481, + "train_score": 0.3860081152474692, + "val_loss": 2.1647764798831175, + "val_score": 0.3620434486907057 + }, + { + "epoch": 14, + "grad_norm": 0.4239129424095154, + "learning_rate": 0.07166666666666667, + "model_norm": 87.73258209228516, + "step_logs": { + "grad_norm": { + "756": 0.487050861120224, + "757": 0.4544031322002411, + "758": 0.42830315232276917, + "759": 0.4815734326839447, + "760": 0.4934178292751312, + "761": 0.4224790334701538, + "762": 0.43204110860824585, + "763": 0.45534443855285645, + "764": 0.4864194095134735, + "765": 0.47531047463417053, + "766": 0.4640043377876282, + "767": 0.4463971257209778, + "768": 0.49678027629852295, + "769": 0.4804724454879761, + "770": 0.4824571907520294, + "771": 0.45870834589004517, + "772": 0.4417542517185211, + "773": 0.48939937353134155, + "774": 0.4479179084300995, + "775": 0.44021567702293396, + "776": 0.44497251510620117, + "777": 0.4297047555446625, + "778": 0.4160434603691101, + "779": 0.45847994089126587, + "780": 0.4314284920692444, + "781": 0.44189924001693726, + "782": 0.47223207354545593, + "783": 0.5139621496200562, + "784": 0.4329736828804016, + "785": 0.4623982012271881, + "786": 0.43171170353889465, + "787": 0.48285242915153503, + "788": 0.46608272194862366, + "789": 0.4571160674095154, + "790": 0.4436997175216675, + "791": 0.4596877694129944, + "792": 0.41174277663230896, + "793": 0.415783166885376, + "794": 0.4652421772480011, + "795": 0.4222385883331299, + "796": 0.4403056502342224, + "797": 0.42745882272720337, + "798": 0.41921210289001465, + "799": 0.4269837737083435, + "800": 0.41298189759254456, + "801": 0.4164678454399109, + "802": 0.43775880336761475, + "803": 0.4333553910255432, + "804": 0.42356058955192566, + "805": 0.47225651144981384, + "806": 0.4127597212791443, + "807": 0.42735517024993896, + "808": 0.4763546586036682, + "809": 0.4239129424095154 + }, + "loss": { + "756": 2.076101303100586, + "757": 2.068575620651245, + "758": 2.067920207977295, + "759": 2.0638036727905273, + "760": 2.044358968734741, + "761": 2.072505474090576, + "762": 2.0743305683135986, + "763": 2.0558788776397705, + "764": 2.060443878173828, + "765": 2.0599725246429443, + "766": 2.0845131874084473, + "767": 2.0658109188079834, + "768": 2.0891785621643066, + "769": 2.0539777278900146, + "770": 2.064149856567383, + "771": 2.043766975402832, + "772": 2.073176383972168, + "773": 2.0630340576171875, + "774": 2.079786539077759, + "775": 2.0677788257598877, + "776": 2.051941394805908, + "777": 2.0560293197631836, + "778": 2.0794830322265625, + "779": 2.068795680999756, + "780": 2.0758466720581055, + "781": 2.0535378456115723, + "782": 2.033247470855713, + "783": 2.07309627532959, + "784": 2.060553550720215, + "785": 2.0748229026794434, + "786": 2.0392563343048096, + "787": 2.074115514755249, + "788": 2.0639984607696533, + "789": 2.0741524696350098, + "790": 2.050894260406494, + "791": 2.074471950531006, + "792": 2.062798023223877, + "793": 2.0665621757507324, + "794": 2.078562021255493, + "795": 2.057570457458496, + "796": 2.0507564544677734, + "797": 2.069823741912842, + "798": 2.0648856163024902, + "799": 2.051821231842041, + "800": 2.048342227935791, + "801": 2.055738925933838, + "802": 2.051240921020508, + "803": 2.0423755645751953, + "804": 2.0334811210632324, + "805": 2.0591909885406494, + "806": 2.059598207473755, + "807": 2.0358939170837402, + "808": 2.0849485397338867, + "809": 2.072460174560547 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 8.75185, + 10.0182, + 11.2728, + 8.89904, + 8.39706, + 11.6114, + 11.1129, + 9.91556, + 8.70841, + 9.11815, + 9.6819, + 10.3669, + 8.46539, + 8.89731, + 8.86796, + 9.71311, + 10.6237, + 8.6135, + 10.3663, + 10.6702, + 10.3633, + 11.135, + 12.0137, + 9.84185, + 11.1526, + 10.5161, + 9.11757, + 7.84797, + 10.9916, + 9.70395, + 10.9417, + 8.89619, + 9.50131, + 9.92631, + 10.4175, + 9.81707, + 12.1676, + 11.954, + 9.60295, + 11.5409, + 10.578, + 11.3278, + 11.7497, + 11.2543, + 12.0099, + 11.8524, + 10.704, + 10.8754, + 11.3347, + 9.23295, + 12.0889, + 11.1475, + 9.18829, + 11.5327 + ], + "train_epoch_time": 5.05340576171875, + "train_loss": 2.0568622892179995, + "train_score": 0.3888607425532348, + "val_loss": 2.154515545896493, + "val_score": 0.36438450822873997 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:23:41.394636", + "final_model_norm": 87.73258209228516, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:21:56.604458", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 4.7496337890625, + "learning_rate": 2.15e-11, + "model_norm": 87.43766021728516, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.733994483947754, + "3": 8.656057357788086, + "4": 20.53462028503418, + "5": 7.524908065795898, + "6": 5.193840503692627, + "7": 4.4241251945495605, + "8": 3.958240032196045, + "9": 6.524819850921631, + "10": 4.840961456298828, + "11": 53.18486404418945, + "12": 5.4986419677734375, + "13": 12.442137718200684, + "14": 6.462600231170654, + "15": 11.177252769470215, + "16": 10.306532859802246, + "17": 8.236608505249023, + "18": 6.942298412322998, + "19": 3.816582202911377, + "20": 5.433033466339111, + "21": 11.737058639526367, + "22": 15.485664367675781, + "23": 4.85066032409668, + "24": 13.537721633911133, + "25": 6.891615867614746, + "26": 8.508872032165527, + "27": 12.443584442138672, + "28": 3.6626434326171875, + "29": 6.212114334106445, + "30": 3.7766146659851074, + "31": 3.7971911430358887, + "32": 6.624378204345703, + "33": 4.688662528991699, + "34": 7.838125228881836, + "35": 5.432688236236572, + "36": 8.92573070526123, + "37": 6.702051639556885, + "38": 3.7504196166992188, + "39": 9.982173919677734, + "40": 4.434970855712891, + "41": 8.199506759643555, + "42": 3.068848133087158, + "43": 5.904267311096191, + "44": 3.240138530731201, + "45": 5.36593770980835, + "46": 2.441917657852173, + "47": 5.976339340209961, + "48": 11.575305938720703, + "49": 1.9051425457000732, + "50": 6.530988693237305, + "51": 2.7845442295074463, + "52": 3.0467329025268555, + "53": 4.7496337890625 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.8340747356414795, + "3": 3.709585666656494, + "4": 4.2086381912231445, + "5": 4.107909202575684, + "6": 3.57332706451416, + "7": 3.5852417945861816, + "8": 3.5281410217285156, + "9": 3.475802421569824, + "10": 3.82902193069458, + "11": 3.70784854888916, + "12": 3.4101204872131348, + "13": 3.5326273441314697, + "14": 3.489149570465088, + "15": 5.853799819946289, + "16": 3.937934398651123, + "17": 3.9702212810516357, + "18": 3.6945838928222656, + "19": 3.658841609954834, + "20": 3.4033401012420654, + "21": 3.869394063949585, + "22": 5.435483932495117, + "23": 3.5983214378356934, + "24": 3.690361261367798, + "25": 3.3407294750213623, + "26": 3.743614912033081, + "27": 3.74813175201416, + "28": 3.2972519397735596, + "29": 3.2863545417785645, + "30": 3.3834760189056396, + "31": 3.21584415435791, + "32": 3.399313449859619, + "33": 3.2554330825805664, + "34": 3.6103506088256836, + "35": 3.293485641479492, + "36": 3.268389940261841, + "37": 3.3465490341186523, + "38": 3.397594928741455, + "39": 3.386854648590088, + "40": 3.106630802154541, + "41": 3.451869249343872, + "42": 3.1569747924804688, + "43": 3.1236138343811035, + "44": 3.286839723587036, + "45": 3.2410733699798584, + "46": 2.995837926864624, + "47": 3.1895546913146973, + "48": 3.5327181816101074, + "49": 2.920980930328369, + "50": 3.262308120727539, + "51": 3.108814239501953, + "52": 3.072892189025879, + "53": 3.399852752685547 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 0.00869338, + 0.00874607, + 0.0845503, + 0.0495091, + 0.00998087, + 0.0725468, + 0.132463, + 0.183174, + 0.225186, + 0.0816428, + 0.16339, + 0.00131083, + 0.112787, + 0.0228196, + 0.083542, + 0.0468563, + 0.0370718, + 0.0585218, + 0.0766583, + 0.251185, + 0.115298, + 0.0280882, + 0.0226662, + 0.152932, + 0.0201362, + 0.0703395, + 0.0517067, + 0.024206, + 0.245789, + 0.08516, + 0.237224, + 0.223033, + 0.0774642, + 0.148085, + 0.0587658, + 0.11159, + 0.0410248, + 0.0745044, + 0.241553, + 0.0339896, + 0.157946, + 0.0513427, + 0.335213, + 0.0896036, + 0.313077, + 0.112564, + 0.502408, + 0.0893017, + 0.026366, + 0.804774, + 0.0764834, + 0.400947, + 0.331039, + 0.150709 + ], + "train_epoch_time": 5.061100482940674, + "train_loss": 3.242945489513994, + "train_score": 0.19822565453979515, + "val_loss": 3.2406251821944965, + "val_score": 0.19662654280115077 + }, + { + "epoch": 1, + "grad_norm": 0.9895183444023132, + "learning_rate": 0.215, + "model_norm": 87.45398712158203, + "step_logs": { + "grad_norm": { + "54": 3.5259673595428467, + "55": 2.9568469524383545, + "56": 3.482332229614258, + "57": 4.4543046951293945, + "58": 4.397068500518799, + "59": 3.0835766792297363, + "60": 3.778782367706299, + "61": 6.872467517852783, + "62": 2.8213114738464355, + "63": 2.861396074295044, + "64": 3.0130221843719482, + "65": 2.8980464935302734, + "66": 2.922877073287964, + "67": 2.683718204498291, + "68": 5.099470138549805, + "69": 2.7585859298706055, + "70": 2.321791172027588, + "71": 2.312845230102539, + "72": 2.170801877975464, + "73": 2.049748182296753, + "74": 3.4217114448547363, + "75": 1.847901463508606, + "76": 1.9583847522735596, + "77": 1.7942246198654175, + "78": 1.5130752325057983, + "79": 1.0910139083862305, + "80": 1.5957891941070557, + "81": 1.668270230293274, + "82": 1.5282151699066162, + "83": 1.5714319944381714, + "84": 1.681351661682129, + "85": 1.540235161781311, + "86": 1.353806495666504, + "87": 1.3609486818313599, + "88": 1.676891803741455, + "89": 1.5412225723266602, + "90": 1.1339129209518433, + "91": 1.1716455221176147, + "92": 1.718323826789856, + "93": 1.705134630203247, + "94": 1.257216215133667, + "95": 1.2187247276306152, + "96": 1.6744968891143799, + "97": 1.5709903240203857, + "98": 1.5189322233200073, + "99": 1.488869547843933, + "100": 1.346064805984497, + "101": 1.2822684049606323, + "102": 1.3047657012939453, + "103": 1.5115406513214111, + "104": 1.8590677976608276, + "105": 1.5442497730255127, + "106": 0.9806906580924988, + "107": 0.9895183444023132 + }, + "loss": { + "54": 3.2356085777282715, + "55": 3.3969006538391113, + "56": 3.1670751571655273, + "57": 3.4155526161193848, + "58": 3.246426582336426, + "59": 3.0758066177368164, + "60": 3.3291893005371094, + "61": 3.236724853515625, + "62": 2.9802818298339844, + "63": 3.085689067840576, + "64": 3.1118011474609375, + "65": 3.2212648391723633, + "66": 3.060415267944336, + "67": 3.0899100303649902, + "68": 3.204525947570801, + "69": 3.2052576541900635, + "70": 2.875473976135254, + "71": 3.0130062103271484, + "72": 2.8758559226989746, + "73": 2.9374003410339355, + "74": 2.918041706085205, + "75": 3.2483158111572266, + "76": 3.008821487426758, + "77": 2.7938528060913086, + "78": 2.7898197174072266, + "79": 2.651493787765503, + "80": 2.651546001434326, + "81": 2.798872470855713, + "82": 2.687563180923462, + "83": 2.7098217010498047, + "84": 2.698456287384033, + "85": 2.762613534927368, + "86": 2.639796018600464, + "87": 2.656764268875122, + "88": 2.6556339263916016, + "89": 2.75466251373291, + "90": 2.668826103210449, + "91": 2.5953469276428223, + "92": 2.680950164794922, + "93": 2.748776435852051, + "94": 2.6541833877563477, + "95": 2.6150248050689697, + "96": 2.6706058979034424, + "97": 2.7313241958618164, + "98": 2.647160053253174, + "99": 2.708970546722412, + "100": 2.629701852798462, + "101": 2.623826503753662, + "102": 2.6218488216400146, + "103": 2.624159097671509, + "104": 2.6723456382751465, + "105": 2.7639923095703125, + "106": 2.566903591156006, + "107": 2.5794601440429688 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.260255, + 0.388531, + 0.261167, + 0.172148, + 0.167911, + 0.323482, + 0.23315, + 0.0685299, + 0.374417, + 0.376874, + 0.342773, + 0.383545, + 0.358228, + 0.429015, + 0.123229, + 0.421201, + 0.533412, + 0.563257, + 0.610277, + 0.699137, + 0.249233, + 0.951263, + 0.784513, + 0.86786, + 1.21858, + 2.22756, + 1.04123, + 1.00566, + 1.15077, + 1.09736, + 0.954549, + 1.16452, + 1.44031, + 1.4344, + 0.944405, + 1.15968, + 2.07568, + 1.89061, + 0.907985, + 0.945414, + 1.67923, + 1.76062, + 0.952448, + 1.10669, + 1.14737, + 1.22206, + 1.45136, + 1.5958, + 1.54008, + 1.14855, + 0.773218, + 1.15905, + 2.66898, + 2.6344 + ], + "train_epoch_time": 5.051178932189941, + "train_loss": 2.5824617500797746, + "train_score": 0.253816580002756, + "val_loss": 2.6019529436814386, + "val_score": 0.24687858772743174 + }, + { + "epoch": 2, + "grad_norm": 1.438843846321106, + "learning_rate": 0.215, + "model_norm": 87.4708480834961, + "step_logs": { + "grad_norm": { + "108": 1.2233319282531738, + "109": 1.2875303030014038, + "110": 1.4738836288452148, + "111": 1.4504550695419312, + "112": 1.3059566020965576, + "113": 1.3362373113632202, + "114": 1.232397437095642, + "115": 1.2788878679275513, + "116": 1.3838263750076294, + "117": 1.3362531661987305, + "118": 1.189020037651062, + "119": 1.1656744480133057, + "120": 1.409247875213623, + "121": 1.4537246227264404, + "122": 1.6610594987869263, + "123": 1.5994174480438232, + "124": 1.2185155153274536, + "125": 1.1961498260498047, + "126": 1.2616292238235474, + "127": 1.2709039449691772, + "128": 1.336777925491333, + "129": 1.2213470935821533, + "130": 1.0613573789596558, + "131": 1.0799349546432495, + "132": 1.2445788383483887, + "133": 1.2968882322311401, + "134": 1.3268803358078003, + "135": 1.312757968902588, + "136": 1.2625645399093628, + "137": 1.2799403667449951, + "138": 1.3694570064544678, + "139": 1.4357945919036865, + "140": 1.4158834218978882, + "141": 1.4277855157852173, + "142": 1.3735074996948242, + "143": 1.360901951789856, + "144": 1.235777735710144, + "145": 1.2103146314620972, + "146": 1.0776960849761963, + "147": 0.950208842754364, + "148": 0.973766028881073, + "149": 1.1087901592254639, + "150": 1.2300769090652466, + "151": 1.2014013528823853, + "152": 1.0850319862365723, + "153": 1.2147715091705322, + "154": 1.4230504035949707, + "155": 1.9762364625930786, + "156": 1.5402697324752808, + "157": 1.1045266389846802, + "158": 1.1548722982406616, + "159": 1.2398775815963745, + "160": 1.659959077835083, + "161": 1.438843846321106 + }, + "loss": { + "108": 2.5846681594848633, + "109": 2.604456901550293, + "110": 2.6168222427368164, + "111": 2.683922529220581, + "112": 2.5865767002105713, + "113": 2.633469581604004, + "114": 2.5922837257385254, + "115": 2.6114470958709717, + "116": 2.6279919147491455, + "117": 2.6242949962615967, + "118": 2.5978493690490723, + "119": 2.578488349914551, + "120": 2.5887646675109863, + "121": 2.6442418098449707, + "122": 2.602750778198242, + "123": 2.738786458969116, + "124": 2.5735490322113037, + "125": 2.599130630493164, + "126": 2.5824694633483887, + "127": 2.6221070289611816, + "128": 2.578014373779297, + "129": 2.6319899559020996, + "130": 2.5426599979400635, + "131": 2.566744565963745, + "132": 2.5594983100891113, + "133": 2.602893352508545, + "134": 2.5885303020477295, + "135": 2.59574031829834, + "136": 2.559732437133789, + "137": 2.5966176986694336, + "138": 2.569796085357666, + "139": 2.6154351234436035, + "140": 2.5899815559387207, + "141": 2.620474100112915, + "142": 2.5939788818359375, + "143": 2.6111207008361816, + "144": 2.5724313259124756, + "145": 2.593980312347412, + "146": 2.5657131671905518, + "147": 2.5307910442352295, + "148": 2.522359848022461, + "149": 2.5407354831695557, + "150": 2.547524929046631, + "151": 2.5607824325561523, + "152": 2.546299457550049, + "153": 2.539485454559326, + "154": 2.5851902961730957, + "155": 2.632418155670166, + "156": 2.7197184562683105, + "157": 2.5422887802124023, + "158": 2.547421932220459, + "159": 2.5587520599365234, + "160": 2.5815212726593018, + "161": 2.6597466468811035 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 1.72709, + 1.57109, + 1.20461, + 1.27574, + 1.51659, + 1.4749, + 1.70679, + 1.59667, + 1.37234, + 1.46972, + 1.83754, + 1.89763, + 1.30352, + 1.25123, + 0.943327, + 1.07062, + 1.73329, + 1.81659, + 1.62245, + 1.6234, + 1.44267, + 1.76444, + 2.25717, + 2.20083, + 1.65238, + 1.54757, + 1.47025, + 1.50623, + 1.60579, + 1.585, + 1.37026, + 1.2687, + 1.29194, + 1.28545, + 1.37501, + 1.40985, + 1.68447, + 1.7708, + 2.2091, + 2.80297, + 2.6601, + 2.06662, + 1.68366, + 1.77417, + 2.16284, + 1.7209, + 1.27659, + 0.674027, + 1.14638, + 2.08388, + 1.91, + 1.66445, + 0.936873, + 1.28473 + ], + "train_epoch_time": 5.051164865493774, + "train_loss": 2.5496641852763324, + "train_score": 0.2510412930715375, + "val_loss": 2.58637697606355, + "val_score": 0.24500394671573705 + }, + { + "epoch": 3, + "grad_norm": 1.1497336626052856, + "learning_rate": 0.215, + "model_norm": 87.48700714111328, + "step_logs": { + "grad_norm": { + "162": 1.050085425376892, + "163": 1.0326648950576782, + "164": 1.4168496131896973, + "165": 1.5214149951934814, + "166": 1.3445160388946533, + "167": 1.2294894456863403, + "168": 1.1759703159332275, + "169": 1.2555344104766846, + "170": 1.795857310295105, + "171": 1.568656325340271, + "172": 1.1205564737319946, + "173": 1.0407679080963135, + "174": 0.9934329986572266, + "175": 1.0689603090286255, + "176": 1.1351724863052368, + "177": 1.1269174814224243, + "178": 1.094292163848877, + "179": 1.1241751909255981, + "180": 1.207863688468933, + "181": 1.260880708694458, + "182": 1.2338262796401978, + "183": 1.3229544162750244, + "184": 1.7132389545440674, + "185": 1.4439713954925537, + "186": 1.0494155883789062, + "187": 1.08171546459198, + "188": 1.243632435798645, + "189": 1.132729411125183, + "190": 1.1325690746307373, + "191": 1.2239532470703125, + "192": 1.2383285760879517, + "193": 1.1728230714797974, + "194": 1.0139098167419434, + "195": 1.0753616094589233, + "196": 1.2727397680282593, + "197": 1.2477887868881226, + "198": 1.203045129776001, + "199": 1.1582870483398438, + "200": 1.0660293102264404, + "201": 1.074910283088684, + "202": 1.1148866415023804, + "203": 1.275556206703186, + "204": 1.4545255899429321, + "205": 1.4447495937347412, + "206": 1.1876591444015503, + "207": 1.1161009073257446, + "208": 1.0649995803833008, + "209": 1.0433517694473267, + "210": 1.164297342300415, + "211": 1.179956316947937, + "212": 1.1639496088027954, + "213": 1.2394453287124634, + "214": 1.1911393404006958, + "215": 1.1497336626052856 + }, + "loss": { + "162": 2.553956985473633, + "163": 2.5160884857177734, + "164": 2.5572195053100586, + "165": 2.6393539905548096, + "166": 2.573037624359131, + "167": 2.5481529235839844, + "168": 2.55696177482605, + "169": 2.575908660888672, + "170": 2.593052864074707, + "171": 2.713900089263916, + "172": 2.5387673377990723, + "173": 2.551333427429199, + "174": 2.5312585830688477, + "175": 2.529991626739502, + "176": 2.536864757537842, + "177": 2.5450539588928223, + "178": 2.5372281074523926, + "179": 2.5184242725372314, + "180": 2.5571441650390625, + "181": 2.5595500469207764, + "182": 2.5660524368286133, + "183": 2.554115056991577, + "184": 2.6031370162963867, + "185": 2.661167621612549, + "186": 2.5279197692871094, + "187": 2.5267162322998047, + "188": 2.5198121070861816, + "189": 2.565575361251831, + "190": 2.5168821811676025, + "191": 2.5461485385894775, + "192": 2.535763740539551, + "193": 2.549503803253174, + "194": 2.506636381149292, + "195": 2.5353403091430664, + "196": 2.5276269912719727, + "197": 2.556729793548584, + "198": 2.5351266860961914, + "199": 2.5549492835998535, + "200": 2.518739700317383, + "201": 2.5266060829162598, + "202": 2.5180256366729736, + "203": 2.532019853591919, + "204": 2.548419713973999, + "205": 2.5820398330688477, + "206": 2.5259439945220947, + "207": 2.5355849266052246, + "208": 2.516207695007324, + "209": 2.4979496002197266, + "210": 2.5298194885253906, + "211": 2.5399580001831055, + "212": 2.519296169281006, + "213": 2.5140347480773926, + "214": 2.535700798034668, + "215": 2.520113468170166 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 2.31614, + 2.35943, + 1.27386, + 1.14026, + 1.42336, + 1.68568, + 1.84898, + 1.63408, + 0.804022, + 1.10291, + 2.02188, + 2.35537, + 2.56483, + 2.21409, + 1.96867, + 2.00407, + 2.11881, + 1.99279, + 1.75275, + 1.60996, + 1.68561, + 1.45932, + 0.886872, + 1.27631, + 2.29545, + 2.15939, + 1.62924, + 1.99955, + 1.96216, + 1.69963, + 1.65362, + 1.85349, + 2.43833, + 2.19244, + 1.56039, + 1.64211, + 1.7516, + 1.90436, + 2.21638, + 2.18672, + 2.02581, + 1.55621, + 1.20456, + 1.23702, + 1.79077, + 2.0355, + 2.21844, + 2.29468, + 1.86622, + 1.82429, + 1.85956, + 1.6365, + 1.7872, + 1.90645 + ], + "train_epoch_time": 5.052144289016724, + "train_loss": 2.5310348666039224, + "train_score": 0.23321265247323078, + "val_loss": 2.5781733811791265, + "val_score": 0.23483693355054996 + }, + { + "epoch": 4, + "grad_norm": 1.4136536121368408, + "learning_rate": 0.215, + "model_norm": 87.5055160522461, + "step_logs": { + "grad_norm": { + "216": 1.1256388425827026, + "217": 1.0439687967300415, + "218": 1.0271297693252563, + "219": 1.0465854406356812, + "220": 1.1141142845153809, + "221": 1.3420478105545044, + "222": 1.388972282409668, + "223": 1.1843324899673462, + "224": 1.0988532304763794, + "225": 0.9907215237617493, + "226": 1.0126491785049438, + "227": 1.113301396369934, + "228": 1.1795713901519775, + "229": 1.2027106285095215, + "230": 1.2602288722991943, + "231": 1.2933470010757446, + "232": 1.1825640201568604, + "233": 0.970707356929779, + "234": 0.9286707043647766, + "235": 1.0501418113708496, + "236": 1.1006262302398682, + "237": 1.1589261293411255, + "238": 1.1930242776870728, + "239": 1.3182697296142578, + "240": 1.2061333656311035, + "241": 1.0257818698883057, + "242": 1.0941945314407349, + "243": 1.1340782642364502, + "244": 1.051352620124817, + "245": 1.0220674276351929, + "246": 1.0296192169189453, + "247": 1.1951388120651245, + "248": 1.1458808183670044, + "249": 0.9668431282043457, + "250": 0.9969336986541748, + "251": 1.2322670221328735, + "252": 1.260117530822754, + "253": 1.3114705085754395, + "254": 1.3221094608306885, + "255": 1.1988939046859741, + "256": 1.1063432693481445, + "257": 1.0850253105163574, + "258": 1.1996195316314697, + "259": 1.3038287162780762, + "260": 1.3045973777770996, + "261": 1.0922925472259521, + "262": 0.9888884425163269, + "263": 1.0132226943969727, + "264": 1.182775616645813, + "265": 1.348366141319275, + "266": 1.3008694648742676, + "267": 1.2511227130889893, + "268": 1.27561354637146, + "269": 1.4136536121368408 + }, + "loss": { + "216": 2.5313754081726074, + "217": 2.4926369190216064, + "218": 2.5185351371765137, + "219": 2.5040764808654785, + "220": 2.5085480213165283, + "221": 2.5251452922821045, + "222": 2.579390048980713, + "223": 2.5221755504608154, + "224": 2.550185203552246, + "225": 2.4988090991973877, + "226": 2.517817497253418, + "227": 2.4847073554992676, + "228": 2.5304605960845947, + "229": 2.510681629180908, + "230": 2.54360032081604, + "231": 2.5163745880126953, + "232": 2.5476391315460205, + "233": 2.5137102603912354, + "234": 2.4717116355895996, + "235": 2.4896535873413086, + "236": 2.528299331665039, + "237": 2.501495599746704, + "238": 2.530106782913208, + "239": 2.5251595973968506, + "240": 2.5540010929107666, + "241": 2.488203525543213, + "242": 2.52704119682312, + "243": 2.50433611869812, + "244": 2.5183701515197754, + "245": 2.492636203765869, + "246": 2.4824111461639404, + "247": 2.4939348697662354, + "248": 2.5156242847442627, + "249": 2.472944736480713, + "250": 2.480274200439453, + "251": 2.477083206176758, + "252": 2.521735429763794, + "253": 2.506953716278076, + "254": 2.5420007705688477, + "255": 2.5196309089660645, + "256": 2.5012266635894775, + "257": 2.4918296337127686, + "258": 2.506340265274048, + "259": 2.5137476921081543, + "260": 2.5228233337402344, + "261": 2.512665271759033, + "262": 2.4894466400146484, + "263": 2.4663496017456055, + "264": 2.4855427742004395, + "265": 2.5255002975463867, + "266": 2.5225110054016113, + "267": 2.4934170246124268, + "268": 2.487058401107788, + "269": 2.5473792552948 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 1.99783, + 2.28709, + 2.38725, + 2.28612, + 2.02098, + 1.40201, + 1.337, + 1.79816, + 2.11199, + 2.54583, + 2.45531, + 2.0047, + 1.81866, + 1.73568, + 1.60159, + 1.50434, + 1.82175, + 2.66771, + 2.86599, + 2.25758, + 2.08713, + 1.86247, + 1.77763, + 1.45305, + 1.75562, + 2.3647, + 2.11068, + 1.94718, + 2.27836, + 2.38616, + 2.34164, + 1.74602, + 1.91587, + 2.64547, + 2.49555, + 1.63129, + 1.5881, + 1.45757, + 1.45426, + 1.75297, + 2.04349, + 2.1166, + 1.74162, + 1.4787, + 1.48229, + 2.10599, + 2.54571, + 2.4024, + 1.77671, + 1.38909, + 1.49062, + 1.59292, + 1.52844, + 1.2747 + ], + "train_epoch_time": 5.051509618759155, + "train_loss": 2.5299568458130235, + "train_score": 0.2571747220828167, + "val_loss": 2.565680218892585, + "val_score": 0.2521930607944624 + }, + { + "epoch": 5, + "grad_norm": 1.0233439207077026, + "learning_rate": 0.215, + "model_norm": 87.53192901611328, + "step_logs": { + "grad_norm": { + "270": 1.4621710777282715, + "271": 1.2549599409103394, + "272": 0.9583402872085571, + "273": 0.9238143563270569, + "274": 0.9930229783058167, + "275": 1.0756828784942627, + "276": 1.206313967704773, + "277": 1.2826403379440308, + "278": 1.1965733766555786, + "279": 1.1301473379135132, + "280": 1.1039425134658813, + "281": 1.1750026941299438, + "282": 1.3242261409759521, + "283": 1.2207683324813843, + "284": 1.1407650709152222, + "285": 1.0587184429168701, + "286": 0.9370437264442444, + "287": 1.0092836618423462, + "288": 1.2136086225509644, + "289": 1.2424288988113403, + "290": 1.314866542816162, + "291": 1.2721630334854126, + "292": 1.1575560569763184, + "293": 1.1037946939468384, + "294": 1.1253407001495361, + "295": 1.1398760080337524, + "296": 1.1920768022537231, + "297": 1.2422540187835693, + "298": 1.3192061185836792, + "299": 1.3274704217910767, + "300": 1.1823089122772217, + "301": 1.0793026685714722, + "302": 1.1569048166275024, + "303": 1.4129902124404907, + "304": 1.3153616189956665, + "305": 1.176673173904419, + "306": 1.2608675956726074, + "307": 1.2719707489013672, + "308": 1.168274998664856, + "309": 1.134477138519287, + "310": 1.115610122680664, + "311": 1.264222264289856, + "312": 1.3564943075180054, + "313": 1.4025533199310303, + "314": 1.2757071256637573, + "315": 1.0213652849197388, + "316": 0.9081512689590454, + "317": 0.9712821841239929, + "318": 1.050710916519165, + "319": 1.0866272449493408, + "320": 1.114082932472229, + "321": 1.2214831113815308, + "322": 1.2293280363082886, + "323": 1.0233439207077026 + }, + "loss": { + "270": 2.514986515045166, + "271": 2.5509705543518066, + "272": 2.4738762378692627, + "273": 2.482973575592041, + "274": 2.4760775566101074, + "275": 2.474302053451538, + "276": 2.4774842262268066, + "277": 2.5124454498291016, + "278": 2.488851308822632, + "279": 2.4821102619171143, + "280": 2.470085620880127, + "281": 2.487722635269165, + "282": 2.5030651092529297, + "283": 2.529686212539673, + "284": 2.445143699645996, + "285": 2.480433464050293, + "286": 2.430105686187744, + "287": 2.4367318153381348, + "288": 2.440614700317383, + "289": 2.4875807762145996, + "290": 2.47745418548584, + "291": 2.4872961044311523, + "292": 2.4253389835357666, + "293": 2.44598126411438, + "294": 2.434525489807129, + "295": 2.465590000152588, + "296": 2.451469898223877, + "297": 2.4583263397216797, + "298": 2.489090919494629, + "299": 2.473580837249756, + "300": 2.4507694244384766, + "301": 2.4316000938415527, + "302": 2.422515630722046, + "303": 2.4787473678588867, + "304": 2.466991901397705, + "305": 2.4323315620422363, + "306": 2.416975975036621, + "307": 2.4670331478118896, + "308": 2.4527292251586914, + "309": 2.403153896331787, + "310": 2.4297921657562256, + "311": 2.433335542678833, + "312": 2.466033458709717, + "313": 2.4460132122039795, + "314": 2.460947275161743, + "315": 2.4223339557647705, + "316": 2.402672529220581, + "317": 2.4009952545166016, + "318": 2.4176480770111084, + "319": 2.405160665512085, + "320": 2.4187331199645996, + "321": 2.4114279747009277, + "322": 2.4444549083709717, + "323": 2.39345645904541 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 1.17636, + 1.61974, + 2.69363, + 2.90939, + 2.51099, + 2.13838, + 1.70251, + 1.52717, + 1.73828, + 1.94335, + 2.02684, + 1.80187, + 1.42741, + 1.69746, + 1.87894, + 2.21292, + 2.76761, + 2.39211, + 1.65707, + 1.61151, + 1.43299, + 1.53689, + 1.81004, + 2.0076, + 1.92241, + 1.8976, + 1.72512, + 1.59301, + 1.43026, + 1.40371, + 1.75324, + 2.0874, + 1.80997, + 1.24152, + 1.42586, + 1.75675, + 1.52031, + 1.52483, + 1.79705, + 1.8672, + 1.95229, + 1.52249, + 1.34018, + 1.24343, + 1.51217, + 2.32205, + 2.91325, + 2.54507, + 2.18991, + 2.03696, + 1.94874, + 1.61622, + 1.61751, + 2.28551 + ], + "train_epoch_time": 5.052793741226196, + "train_loss": 2.3776131450702334, + "train_score": 0.3044767754320777, + "val_loss": 2.438130426899574, + "val_score": 0.2920448832752784 + }, + { + "epoch": 6, + "grad_norm": 1.1622827053070068, + "learning_rate": 0.215, + "model_norm": 87.55876922607422, + "step_logs": { + "grad_norm": { + "324": 0.8716362714767456, + "325": 0.89914870262146, + "326": 1.0766135454177856, + "327": 1.2029300928115845, + "328": 1.468245506286621, + "329": 1.4433614015579224, + "330": 1.2462615966796875, + "331": 1.1867982149124146, + "332": 1.133683681488037, + "333": 1.1377207040786743, + "334": 1.117013931274414, + "335": 1.1750456094741821, + "336": 1.2225422859191895, + "337": 1.1788886785507202, + "338": 1.1226738691329956, + "339": 1.145333170890808, + "340": 1.1781138181686401, + "341": 1.1886587142944336, + "342": 1.2688055038452148, + "343": 1.4411208629608154, + "344": 1.1073471307754517, + "345": 0.8979843258857727, + "346": 0.8883922696113586, + "347": 0.9640265107154846, + "348": 1.3548240661621094, + "349": 1.4462450742721558, + "350": 1.356637716293335, + "351": 1.3427813053131104, + "352": 1.3249545097351074, + "353": 1.2640893459320068, + "354": 1.122170329093933, + "355": 1.0957119464874268, + "356": 1.174031376838684, + "357": 1.2044655084609985, + "358": 1.0939375162124634, + "359": 1.1627094745635986, + "360": 1.2441326379776, + "361": 1.1325262784957886, + "362": 0.9843354821205139, + "363": 1.0464366674423218, + "364": 1.0454834699630737, + "365": 1.036085605621338, + "366": 1.1615955829620361, + "367": 1.4211864471435547, + "368": 1.2872300148010254, + "369": 1.1462335586547852, + "370": 1.2676925659179688, + "371": 1.2726163864135742, + "372": 1.0992878675460815, + "373": 1.110335350036621, + "374": 1.3180903196334839, + "375": 1.778267741203308, + "376": 1.4958765506744385, + "377": 1.1622827053070068 + }, + "loss": { + "324": 2.378471612930298, + "325": 2.3629865646362305, + "326": 2.385097026824951, + "327": 2.4174094200134277, + "328": 2.410780191421509, + "329": 2.4900832176208496, + "330": 2.4151382446289062, + "331": 2.442502498626709, + "332": 2.381744384765625, + "333": 2.4000790119171143, + "334": 2.393716812133789, + "335": 2.385098457336426, + "336": 2.3959171772003174, + "337": 2.403446912765503, + "338": 2.384711265563965, + "339": 2.4059650897979736, + "340": 2.3941526412963867, + "341": 2.411202907562256, + "342": 2.394829273223877, + "343": 2.4191174507141113, + "344": 2.4327807426452637, + "345": 2.3698110580444336, + "346": 2.372457265853882, + "347": 2.358297348022461, + "348": 2.3747143745422363, + "349": 2.462536334991455, + "350": 2.3946521282196045, + "351": 2.444960117340088, + "352": 2.374955892562866, + "353": 2.397289752960205, + "354": 2.393702507019043, + "355": 2.3784091472625732, + "356": 2.3567159175872803, + "357": 2.3950634002685547, + "358": 2.3703205585479736, + "359": 2.3878798484802246, + "360": 2.385012626647949, + "361": 2.3864059448242188, + "362": 2.349491834640503, + "363": 2.358891487121582, + "364": 2.3372199535369873, + "365": 2.3621435165405273, + "366": 2.3578944206237793, + "367": 2.3868937492370605, + "368": 2.4096920490264893, + "369": 2.384051561355591, + "370": 2.369637966156006, + "371": 2.3805456161499023, + "372": 2.361449718475342, + "373": 2.335862398147583, + "374": 2.347748279571533, + "375": 2.44773006439209, + "376": 2.4842140674591064, + "377": 2.3896615505218506 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 3.1306, + 2.92279, + 2.05772, + 1.67059, + 1.1183, + 1.19526, + 1.55498, + 1.73413, + 1.85315, + 1.85419, + 1.91847, + 1.72742, + 1.60304, + 1.72937, + 1.89203, + 1.83411, + 1.72495, + 1.70655, + 1.48759, + 1.16481, + 1.98397, + 2.93884, + 3.006, + 2.53759, + 1.29374, + 1.17733, + 1.30111, + 1.356, + 1.35286, + 1.50025, + 1.90087, + 1.98104, + 1.70981, + 1.65093, + 1.98072, + 1.76632, + 1.54084, + 1.86058, + 2.42487, + 2.15418, + 2.13828, + 2.20047, + 1.74749, + 1.18176, + 1.45428, + 1.81455, + 1.47453, + 1.46988, + 1.95414, + 1.89469, + 1.35133, + 0.77405, + 1.11019, + 1.76894 + ], + "train_epoch_time": 5.051965713500977, + "train_loss": 2.3595358218489966, + "train_score": 0.30445659962753996, + "val_loss": 2.4094951702998233, + "val_score": 0.2916233135487812 + }, + { + "epoch": 7, + "grad_norm": 1.122349500656128, + "learning_rate": 0.215, + "model_norm": 87.58779907226562, + "step_logs": { + "grad_norm": { + "378": 1.2289165258407593, + "379": 1.2876077890396118, + "380": 1.253775954246521, + "381": 1.1601649522781372, + "382": 1.0149775743484497, + "383": 0.8874648213386536, + "384": 0.8765559196472168, + "385": 0.9939548373222351, + "386": 1.1471822261810303, + "387": 1.2171940803527832, + "388": 1.4047293663024902, + "389": 1.3704050779342651, + "390": 1.2447500228881836, + "391": 1.1996443271636963, + "392": 1.0778791904449463, + "393": 1.0508983135223389, + "394": 1.0584830045700073, + "395": 1.075810432434082, + "396": 1.121604323387146, + "397": 1.2505501508712769, + "398": 1.2425906658172607, + "399": 1.1320399045944214, + "400": 0.9426090717315674, + "401": 0.9163249731063843, + "402": 1.163918375968933, + "403": 1.693349838256836, + "404": 1.4655699729919434, + "405": 1.0728397369384766, + "406": 0.9973278045654297, + "407": 1.0631458759307861, + "408": 1.1138743162155151, + "409": 1.3411601781845093, + "410": 1.3309574127197266, + "411": 1.190207600593567, + "412": 1.1875566244125366, + "413": 1.0829166173934937, + "414": 0.9782698750495911, + "415": 1.0266886949539185, + "416": 1.0581934452056885, + "417": 1.1176337003707886, + "418": 1.1155422925949097, + "419": 1.1001964807510376, + "420": 1.062107801437378, + "421": 1.0173048973083496, + "422": 1.0776845216751099, + "423": 1.1612517833709717, + "424": 1.3631857633590698, + "425": 1.4028018712997437, + "426": 1.3414517641067505, + "427": 1.2491090297698975, + "428": 1.1552237272262573, + "429": 1.0854108333587646, + "430": 1.150957703590393, + "431": 1.122349500656128 + }, + "loss": { + "378": 2.3674890995025635, + "379": 2.3736331462860107, + "380": 2.374690055847168, + "381": 2.3619821071624756, + "382": 2.3689804077148438, + "383": 2.3117270469665527, + "384": 2.319326400756836, + "385": 2.311298370361328, + "386": 2.3461179733276367, + "387": 2.3617935180664062, + "388": 2.3598344326019287, + "389": 2.419098377227783, + "390": 2.331808567047119, + "391": 2.3663904666900635, + "392": 2.3436355590820312, + "393": 2.3307714462280273, + "394": 2.337489604949951, + "395": 2.317723512649536, + "396": 2.3408355712890625, + "397": 2.3330633640289307, + "398": 2.375703811645508, + "399": 2.3548202514648438, + "400": 2.3112916946411133, + "401": 2.3086235523223877, + "402": 2.3135900497436523, + "403": 2.3431601524353027, + "404": 2.4124484062194824, + "405": 2.333739995956421, + "406": 2.2959189414978027, + "407": 2.297210693359375, + "408": 2.3128461837768555, + "409": 2.3254969120025635, + "410": 2.3781135082244873, + "411": 2.3242831230163574, + "412": 2.3365964889526367, + "413": 2.3092870712280273, + "414": 2.314084053039551, + "415": 2.2937607765197754, + "416": 2.2956981658935547, + "417": 2.3010544776916504, + "418": 2.3063392639160156, + "419": 2.2883734703063965, + "420": 2.307068347930908, + "421": 2.2768523693084717, + "422": 2.311478853225708, + "423": 2.3039536476135254, + "424": 2.3166608810424805, + "425": 2.346284866333008, + "426": 2.3382768630981445, + "427": 2.3504528999328613, + "428": 2.298037052154541, + "429": 2.320526599884033, + "430": 2.2736430168151855, + "431": 2.3213510513305664 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 1.56763, + 1.43168, + 1.51066, + 1.75484, + 2.29958, + 2.93518, + 3.01858, + 2.3395, + 1.78273, + 1.59412, + 1.1959, + 1.28812, + 1.50497, + 1.6443, + 2.0172, + 2.11047, + 2.08632, + 2.00258, + 1.86077, + 1.49185, + 1.53864, + 1.83753, + 2.60131, + 2.7495, + 1.70782, + 0.817163, + 1.12317, + 2.0276, + 2.30824, + 2.03243, + 1.86412, + 1.29287, + 1.34247, + 1.64075, + 1.65682, + 1.96919, + 2.41803, + 2.17606, + 2.05015, + 1.84216, + 1.85332, + 1.89054, + 2.04514, + 2.20005, + 1.99025, + 1.70852, + 1.24667, + 1.19231, + 1.29941, + 1.50644, + 1.72197, + 1.96969, + 1.71634, + 1.84283 + ], + "train_epoch_time": 5.052664041519165, + "train_loss": 2.283510016847717, + "train_score": 0.3188654502204364, + "val_loss": 2.3316025684676407, + "val_score": 0.30808248376873687 + }, + { + "epoch": 8, + "grad_norm": 0.9300982356071472, + "learning_rate": 0.215, + "model_norm": 87.61836242675781, + "step_logs": { + "grad_norm": { + "432": 1.173887014389038, + "433": 1.1409796476364136, + "434": 1.1342936754226685, + "435": 1.2576640844345093, + "436": 1.2820959091186523, + "437": 1.1802228689193726, + "438": 1.0372898578643799, + "439": 1.0499985218048096, + "440": 1.108478307723999, + "441": 1.207029104232788, + "442": 1.290515661239624, + "443": 1.2015646696090698, + "444": 1.0155858993530273, + "445": 0.9483391046524048, + "446": 0.9841550588607788, + "447": 1.0490765571594238, + "448": 1.2082215547561646, + "449": 1.3061292171478271, + "450": 1.3867985010147095, + "451": 1.718166708946228, + "452": 1.3583420515060425, + "453": 1.1765997409820557, + "454": 1.21886146068573, + "455": 1.1780078411102295, + "456": 1.1769036054611206, + "457": 1.0307259559631348, + "458": 0.9529721140861511, + "459": 1.0447609424591064, + "460": 1.4128073453903198, + "461": 1.3071491718292236, + "462": 1.0343782901763916, + "463": 0.9364259243011475, + "464": 0.8971028923988342, + "465": 0.9150164127349854, + "466": 1.0134968757629395, + "467": 1.0172163248062134, + "468": 1.0775176286697388, + "469": 1.1535265445709229, + "470": 1.191583514213562, + "471": 1.1831544637680054, + "472": 1.0748728513717651, + "473": 0.8862811326980591, + "474": 0.866497814655304, + "475": 0.957634687423706, + "476": 1.0472304821014404, + "477": 1.1254465579986572, + "478": 1.1687746047973633, + "479": 1.096500039100647, + "480": 1.038978099822998, + "481": 1.050185203552246, + "482": 1.06312894821167, + "483": 1.004075288772583, + "484": 0.9146953821182251, + "485": 0.9300982356071472 + }, + "loss": { + "432": 2.2915685176849365, + "433": 2.325540542602539, + "434": 2.2867980003356934, + "435": 2.3208999633789062, + "436": 2.3207876682281494, + "437": 2.3049392700195312, + "438": 2.2741847038269043, + "439": 2.2700278759002686, + "440": 2.29074764251709, + "441": 2.2956035137176514, + "442": 2.301769256591797, + "443": 2.3018126487731934, + "444": 2.3019819259643555, + "445": 2.244292736053467, + "446": 2.2597298622131348, + "447": 2.2670047283172607, + "448": 2.2799508571624756, + "449": 2.3224425315856934, + "450": 2.3306963443756104, + "451": 2.3314719200134277, + "452": 2.3882226943969727, + "453": 2.2948594093322754, + "454": 2.3057758808135986, + "455": 2.291252613067627, + "456": 2.2728543281555176, + "457": 2.26200008392334, + "458": 2.2534117698669434, + "459": 2.268404483795166, + "460": 2.2851171493530273, + "461": 2.3509154319763184, + "462": 2.2637104988098145, + "463": 2.261314868927002, + "464": 2.235646963119507, + "465": 2.2582640647888184, + "466": 2.242279529571533, + "467": 2.2655491828918457, + "468": 2.2640581130981445, + "469": 2.2845754623413086, + "470": 2.2636184692382812, + "471": 2.3068342208862305, + "472": 2.2654080390930176, + "473": 2.266956329345703, + "474": 2.239006519317627, + "475": 2.24525785446167, + "476": 2.244333267211914, + "477": 2.2878918647766113, + "478": 2.2697043418884277, + "479": 2.243941307067871, + "480": 2.2570700645446777, + "481": 2.2509193420410156, + "482": 2.249399423599243, + "483": 2.2804508209228516, + "484": 2.2411937713623047, + "485": 2.2560853958129883 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 1.66295, + 1.78636, + 1.77737, + 1.46733, + 1.41187, + 1.65475, + 2.11361, + 2.05899, + 1.86433, + 1.57566, + 1.38209, + 1.59432, + 2.23187, + 2.49547, + 2.33308, + 2.05986, + 1.56182, + 1.36136, + 1.21188, + 0.789768, + 1.29436, + 1.65767, + 1.55206, + 1.65111, + 1.64093, + 2.12915, + 2.48131, + 2.0782, + 1.14483, + 1.3759, + 2.11574, + 2.57878, + 2.77791, + 2.69722, + 2.18296, + 2.18951, + 1.95002, + 1.71692, + 1.59424, + 1.64791, + 1.9608, + 2.88603, + 2.98209, + 2.44831, + 2.04646, + 1.80628, + 1.66153, + 1.86635, + 2.0909, + 2.04093, + 1.99019, + 2.26198, + 2.67871, + 2.60794 + ], + "train_epoch_time": 5.051317930221558, + "train_loss": 2.230953530977925, + "train_score": 0.34803846847036135, + "val_loss": 2.280944639724924, + "val_score": 0.3334215349088991 + }, + { + "epoch": 9, + "grad_norm": 1.1118519306182861, + "learning_rate": 0.215, + "model_norm": 87.64906311035156, + "step_logs": { + "grad_norm": { + "486": 0.9912803769111633, + "487": 1.1801499128341675, + "488": 1.3727082014083862, + "489": 1.4574068784713745, + "490": 1.1562776565551758, + "491": 1.0924196243286133, + "492": 1.1069799661636353, + "493": 1.1005313396453857, + "494": 1.1039934158325195, + "495": 1.0187159776687622, + "496": 1.1182432174682617, + "497": 1.3187373876571655, + "498": 1.4477620124816895, + "499": 1.3412322998046875, + "500": 1.153167486190796, + "501": 1.0750313997268677, + "502": 1.025566577911377, + "503": 1.0409637689590454, + "504": 1.011820673942566, + "505": 1.1079769134521484, + "506": 1.1562845706939697, + "507": 1.0853391885757446, + "508": 1.0119813680648804, + "509": 0.9975836873054504, + "510": 1.0724613666534424, + "511": 1.1301614046096802, + "512": 1.1464024782180786, + "513": 1.1840053796768188, + "514": 1.2161908149719238, + "515": 1.1920208930969238, + "516": 1.0625874996185303, + "517": 0.9278948903083801, + "518": 0.9675673842430115, + "519": 1.0902743339538574, + "520": 1.1268482208251953, + "521": 1.178515076637268, + "522": 1.1936978101730347, + "523": 1.27751624584198, + "524": 1.2797014713287354, + "525": 1.255751371383667, + "526": 1.2691787481307983, + "527": 1.34010648727417, + "528": 1.4972039461135864, + "529": 1.4528220891952515, + "530": 1.1309788227081299, + "531": 0.9770186543464661, + "532": 1.0028085708618164, + "533": 1.033262848854065, + "534": 1.0508148670196533, + "535": 1.0891156196594238, + "536": 1.190722942352295, + "537": 1.1033893823623657, + "538": 1.0907820463180542, + "539": 1.1118519306182861 + }, + "loss": { + "486": 2.244365692138672, + "487": 2.258953094482422, + "488": 2.291658401489258, + "489": 2.301762342453003, + "490": 2.26674222946167, + "491": 2.240677833557129, + "492": 2.2656078338623047, + "493": 2.254861831665039, + "494": 2.2496354579925537, + "495": 2.222031354904175, + "496": 2.2465157508850098, + "497": 2.263211727142334, + "498": 2.27559494972229, + "499": 2.298959255218506, + "500": 2.253042221069336, + "501": 2.2481675148010254, + "502": 2.2057642936706543, + "503": 2.243898868560791, + "504": 2.2219278812408447, + "505": 2.225553274154663, + "506": 2.251554012298584, + "507": 2.2208924293518066, + "508": 2.223480701446533, + "509": 2.221001148223877, + "510": 2.2008442878723145, + "511": 2.246204376220703, + "512": 2.223757266998291, + "513": 2.253476619720459, + "514": 2.2615246772766113, + "515": 2.238771438598633, + "516": 2.219839572906494, + "517": 2.1763017177581787, + "518": 2.2072110176086426, + "519": 2.2068400382995605, + "520": 2.225220203399658, + "521": 2.210195779800415, + "522": 2.2160658836364746, + "523": 2.219590663909912, + "524": 2.2667903900146484, + "525": 2.2522406578063965, + "526": 2.2427616119384766, + "527": 2.233675479888916, + "528": 2.2969250679016113, + "529": 2.272357702255249, + "530": 2.264460563659668, + "531": 2.191066265106201, + "532": 2.1999876499176025, + "533": 2.183898448944092, + "534": 2.2113471031188965, + "535": 2.2229721546173096, + "536": 2.223538875579834, + "537": 2.246978521347046, + "538": 2.211336612701416, + "539": 2.2184529304504395 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 2.28402, + 1.62193, + 1.21617, + 1.08367, + 1.69542, + 1.87759, + 1.84887, + 1.86172, + 1.84578, + 2.14113, + 1.79654, + 1.30139, + 1.08568, + 1.27798, + 1.69428, + 1.9453, + 2.09716, + 2.07077, + 2.17032, + 1.81291, + 1.68404, + 1.88537, + 2.17114, + 2.23177, + 1.91349, + 1.75861, + 1.69205, + 1.60748, + 1.52897, + 1.57559, + 1.96604, + 2.52768, + 2.35766, + 1.85652, + 1.75244, + 1.59133, + 1.55523, + 1.36, + 1.38418, + 1.42826, + 1.39232, + 1.24377, + 1.02467, + 1.07659, + 1.77034, + 2.29535, + 2.18768, + 2.04555, + 2.00265, + 1.87407, + 1.56828, + 1.84562, + 1.85857, + 1.79455 + ], + "train_epoch_time": 5.059020757675171, + "train_loss": 2.200929508250276, + "train_score": 0.35788199437228985, + "val_loss": 2.2678967945611297, + "val_score": 0.34036847093622663 + }, + { + "epoch": 10, + "grad_norm": 1.0116633176803589, + "learning_rate": 0.215, + "model_norm": 87.67935180664062, + "step_logs": { + "grad_norm": { + "540": 1.169072151184082, + "541": 1.1783757209777832, + "542": 1.2139266729354858, + "543": 1.2846906185150146, + "544": 1.3508814573287964, + "545": 1.332606315612793, + "546": 1.19757080078125, + "547": 1.1094799041748047, + "548": 1.0778957605361938, + "549": 1.1209033727645874, + "550": 1.1984648704528809, + "551": 1.252472162246704, + "552": 1.183653473854065, + "553": 1.145930528640747, + "554": 1.0975148677825928, + "555": 1.0804927349090576, + "556": 1.1179344654083252, + "557": 1.11802339553833, + "558": 1.0764483213424683, + "559": 1.1522531509399414, + "560": 1.1919751167297363, + "561": 1.1006834506988525, + "562": 1.0570098161697388, + "563": 1.0119645595550537, + "564": 1.032116413116455, + "565": 1.0845112800598145, + "566": 1.1901994943618774, + "567": 1.1658177375793457, + "568": 1.056470274925232, + "569": 1.1020967960357666, + "570": 1.228576898574829, + "571": 1.2427558898925781, + "572": 1.2071932554244995, + "573": 1.275977373123169, + "574": 1.3003004789352417, + "575": 1.2350523471832275, + "576": 1.5249589681625366, + "577": 1.4124336242675781, + "578": 1.4135326147079468, + "579": 1.5690242052078247, + "580": 1.573901891708374, + "581": 1.436079740524292, + "582": 1.3040106296539307, + "583": 1.1048146486282349, + "584": 1.0893659591674805, + "585": 1.1002446413040161, + "586": 1.0650063753128052, + "587": 1.0706894397735596, + "588": 1.0903651714324951, + "589": 1.219233512878418, + "590": 1.2892698049545288, + "591": 1.1891388893127441, + "592": 1.0812523365020752, + "593": 1.0116633176803589 + }, + "loss": { + "540": 2.204136848449707, + "541": 2.2254788875579834, + "542": 2.20072603225708, + "543": 2.2437100410461426, + "544": 2.197460651397705, + "545": 2.2728939056396484, + "546": 2.2356958389282227, + "547": 2.214299201965332, + "548": 2.188335418701172, + "549": 2.2122912406921387, + "550": 2.20137882232666, + "551": 2.2458462715148926, + "552": 2.18747615814209, + "553": 2.2249817848205566, + "554": 2.2014222145080566, + "555": 2.186837673187256, + "556": 2.215104103088379, + "557": 2.208815336227417, + "558": 2.1690101623535156, + "559": 2.1853578090667725, + "560": 2.2086341381073, + "561": 2.19753360748291, + "562": 2.196995735168457, + "563": 2.1907670497894287, + "564": 2.1760456562042236, + "565": 2.182063341140747, + "566": 2.197408676147461, + "567": 2.2246310710906982, + "568": 2.1983628273010254, + "569": 2.189992904663086, + "570": 2.1991801261901855, + "571": 2.2129592895507812, + "572": 2.210176467895508, + "573": 2.194798469543457, + "574": 2.20719838142395, + "575": 2.203917980194092, + "576": 2.2294514179229736, + "577": 2.272395610809326, + "578": 2.2360830307006836, + "579": 2.2558693885803223, + "580": 2.267986297607422, + "581": 2.2737629413604736, + "582": 2.235292673110962, + "583": 2.1753664016723633, + "584": 2.200342893600464, + "585": 2.192239999771118, + "586": 2.2137129306793213, + "587": 2.153522253036499, + "588": 2.160778522491455, + "589": 2.210700511932373, + "590": 2.217236042022705, + "591": 2.195439100265503, + "592": 2.1788392066955566, + "593": 2.1664459705352783 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 1.61271, + 1.60271, + 1.49342, + 1.35947, + 1.20417, + 1.2799, + 1.55887, + 1.79886, + 1.88348, + 1.76078, + 1.53265, + 1.43167, + 1.56133, + 1.69438, + 1.82761, + 1.87315, + 1.7724, + 1.76709, + 1.87187, + 1.64599, + 1.5545, + 1.81389, + 1.9664, + 2.13927, + 2.04273, + 1.85524, + 1.55121, + 1.6368, + 1.96963, + 1.80303, + 1.45699, + 1.43285, + 1.51661, + 1.34806, + 1.30543, + 1.44486, + 0.958698, + 1.13906, + 1.11912, + 0.916336, + 0.915556, + 1.10252, + 1.31453, + 1.78219, + 1.85414, + 1.81096, + 1.95172, + 1.87855, + 1.81747, + 1.48715, + 1.3339, + 1.55259, + 1.86368, + 2.11678 + ], + "train_epoch_time": 5.05180287361145, + "train_loss": 2.1669004165288195, + "train_score": 0.36084334651059025, + "val_loss": 2.2336182996681995, + "val_score": 0.3415524546574506 + }, + { + "epoch": 11, + "grad_norm": 1.0813859701156616, + "learning_rate": 0.215, + "model_norm": 87.71249389648438, + "step_logs": { + "grad_norm": { + "594": 1.0095385313034058, + "595": 1.118397831916809, + "596": 1.113401174545288, + "597": 0.9805076122283936, + "598": 0.9841046929359436, + "599": 1.229051113128662, + "600": 1.213972568511963, + "601": 1.0447558164596558, + "602": 0.9220883846282959, + "603": 0.936968982219696, + "604": 0.9985181093215942, + "605": 1.0668905973434448, + "606": 1.2745782136917114, + "607": 1.29975426197052, + "608": 1.2980278730392456, + "609": 1.3180011510849, + "610": 1.2992914915084839, + "611": 1.2111988067626953, + "612": 1.1126219034194946, + "613": 1.0514963865280151, + "614": 0.9431167244911194, + "615": 0.9624524116516113, + "616": 1.0076013803482056, + "617": 1.0628156661987305, + "618": 1.1847156286239624, + "619": 1.2134593725204468, + "620": 1.1799983978271484, + "621": 1.1404943466186523, + "622": 1.0369693040847778, + "623": 1.033238172531128, + "624": 1.1578330993652344, + "625": 1.2227585315704346, + "626": 1.3081293106079102, + "627": 1.367281198501587, + "628": 1.358440637588501, + "629": 1.246859073638916, + "630": 1.2392390966415405, + "631": 1.260047435760498, + "632": 1.3502540588378906, + "633": 1.3460414409637451, + "634": 1.2329233884811401, + "635": 1.124908208847046, + "636": 1.0964032411575317, + "637": 1.1042762994766235, + "638": 1.0895113945007324, + "639": 1.1002918481826782, + "640": 1.1614863872528076, + "641": 1.1491286754608154, + "642": 1.0553640127182007, + "643": 1.0744247436523438, + "644": 1.1322510242462158, + "645": 1.1758286952972412, + "646": 1.1297539472579956, + "647": 1.0813859701156616 + }, + "loss": { + "594": 2.1929025650024414, + "595": 2.164384365081787, + "596": 2.170667886734009, + "597": 2.1898555755615234, + "598": 2.1332521438598633, + "599": 2.165065288543701, + "600": 2.214233636856079, + "601": 2.166761875152588, + "602": 2.16964054107666, + "603": 2.150141954421997, + "604": 2.1509041786193848, + "605": 2.169931411743164, + "606": 2.1968047618865967, + "607": 2.2210962772369385, + "608": 2.189572811126709, + "609": 2.2153797149658203, + "610": 2.1691596508026123, + "611": 2.2113218307495117, + "612": 2.160707712173462, + "613": 2.179276466369629, + "614": 2.14737868309021, + "615": 2.1177570819854736, + "616": 2.1631083488464355, + "617": 2.1379146575927734, + "618": 2.152888059616089, + "619": 2.205331325531006, + "620": 2.167412281036377, + "621": 2.18001389503479, + "622": 2.1571598052978516, + "623": 2.165440082550049, + "624": 2.165348768234253, + "625": 2.1720526218414307, + "626": 2.1637320518493652, + "627": 2.215428113937378, + "628": 2.184414863586426, + "629": 2.1814124584198, + "630": 2.1491243839263916, + "631": 2.178330898284912, + "632": 2.1728720664978027, + "633": 2.2016239166259766, + "634": 2.169736862182617, + "635": 2.1725568771362305, + "636": 2.137773036956787, + "637": 2.1530861854553223, + "638": 2.151876211166382, + "639": 2.164989471435547, + "640": 2.168134927749634, + "641": 2.168419361114502, + "642": 2.1598010063171387, + "643": 2.121164560317993, + "644": 2.1406288146972656, + "645": 2.1656789779663086, + "646": 2.148796558380127, + "647": 2.133854627609253 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 2.15166, + 1.73038, + 1.75102, + 2.27779, + 2.20272, + 1.43328, + 1.50247, + 1.9851, + 2.55178, + 2.44916, + 2.15729, + 1.90637, + 1.35225, + 1.31476, + 1.29955, + 1.27531, + 1.28493, + 1.50737, + 1.74542, + 1.97105, + 2.41423, + 2.28622, + 2.13059, + 1.89267, + 1.53389, + 1.49769, + 1.55661, + 1.676, + 2.00609, + 2.02836, + 1.61524, + 1.45274, + 1.26445, + 1.18506, + 1.18373, + 1.40315, + 1.39943, + 1.37199, + 1.1918, + 1.21514, + 1.42736, + 1.71687, + 1.77837, + 1.76566, + 1.81282, + 1.7883, + 1.60716, + 1.64212, + 1.93914, + 1.83748, + 1.66977, + 1.56641, + 1.68356, + 1.82475 + ], + "train_epoch_time": 5.0524678230285645, + "train_loss": 2.1371430958702713, + "train_score": 0.37163849522598846, + "val_loss": 2.2138783608731116, + "val_score": 0.3534909584971164 + }, + { + "epoch": 12, + "grad_norm": 0.8866685628890991, + "learning_rate": 0.215, + "model_norm": 87.7420654296875, + "step_logs": { + "grad_norm": { + "648": 1.0472753047943115, + "649": 0.9115982055664062, + "650": 0.8244289755821228, + "651": 0.8689126968383789, + "652": 0.9928649067878723, + "653": 1.167019009590149, + "654": 1.26961350440979, + "655": 1.2213505506515503, + "656": 1.2602888345718384, + "657": 1.3108564615249634, + "658": 1.180912971496582, + "659": 1.1064778566360474, + "660": 1.0781797170639038, + "661": 1.006677269935608, + "662": 0.9838483929634094, + "663": 0.9756195545196533, + "664": 0.9793493747711182, + "665": 1.070969581604004, + "666": 1.1430097818374634, + "667": 1.1448856592178345, + "668": 1.0267887115478516, + "669": 0.9792900681495667, + "670": 1.010474443435669, + "671": 1.0112379789352417, + "672": 0.9828790426254272, + "673": 0.8997846245765686, + "674": 0.8759722709655762, + "675": 0.9008798003196716, + "676": 0.988506555557251, + "677": 1.0519704818725586, + "678": 0.974343478679657, + "679": 0.91118985414505, + "680": 0.8859447240829468, + "681": 0.8313981890678406, + "682": 0.797153651714325, + "683": 0.7582221031188965, + "684": 0.7774538397789001, + "685": 0.7920456528663635, + "686": 0.7822142243385315, + "687": 0.784407913684845, + "688": 0.8251732587814331, + "689": 0.8213247656822205, + "690": 0.7949087619781494, + "691": 0.9233590364456177, + "692": 0.9984911680221558, + "693": 1.0227104425430298, + "694": 0.9449827671051025, + "695": 0.8695642352104187, + "696": 0.8431944847106934, + "697": 0.8557807207107544, + "698": 0.9188382625579834, + "699": 0.9073380827903748, + "700": 0.9085015654563904, + "701": 0.8866685628890991 + }, + "loss": { + "648": 2.1539249420166016, + "649": 2.153470516204834, + "650": 2.1303768157958984, + "651": 2.124969005584717, + "652": 2.112607479095459, + "653": 2.12748384475708, + "654": 2.1599693298339844, + "655": 2.1595160961151123, + "656": 2.147554636001587, + "657": 2.1652820110321045, + "658": 2.164153575897217, + "659": 2.1289477348327637, + "660": 2.122256278991699, + "661": 2.1265668869018555, + "662": 2.1151175498962402, + "663": 2.1025984287261963, + "664": 2.1128110885620117, + "665": 2.1415257453918457, + "666": 2.1279125213623047, + "667": 2.1497600078582764, + "668": 2.1128122806549072, + "669": 2.109170913696289, + "670": 2.1266679763793945, + "671": 2.108522415161133, + "672": 2.082127571105957, + "673": 2.080562114715576, + "674": 2.1004080772399902, + "675": 2.0928163528442383, + "676": 2.0909857749938965, + "677": 2.1165192127227783, + "678": 2.12237548828125, + "679": 2.060544490814209, + "680": 2.0678043365478516, + "681": 2.0857534408569336, + "682": 2.1190152168273926, + "683": 2.0696496963500977, + "684": 2.0691144466400146, + "685": 2.0708937644958496, + "686": 2.0833353996276855, + "687": 2.079961061477661, + "688": 2.0788733959198, + "689": 2.0817677974700928, + "690": 2.0411934852600098, + "691": 2.060296058654785, + "692": 2.0876822471618652, + "693": 2.089556932449341, + "694": 2.0931663513183594, + "695": 2.100879192352295, + "696": 2.0798120498657227, + "697": 2.0584583282470703, + "698": 2.087106227874756, + "699": 2.059535026550293, + "700": 2.0786895751953125, + "701": 2.078433036804199 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 1.96385, + 2.59139, + 3.13437, + 2.81449, + 2.14308, + 1.56211, + 1.34, + 1.44769, + 1.35209, + 1.2601, + 1.55186, + 1.73892, + 1.82564, + 2.09845, + 2.18513, + 2.209, + 2.20285, + 1.86711, + 1.62875, + 1.64008, + 2.004, + 2.19932, + 2.08281, + 2.06192, + 2.1553, + 2.56982, + 2.7373, + 2.57868, + 2.13989, + 1.91256, + 2.23562, + 2.48179, + 2.63449, + 3.01748, + 3.33465, + 3.60001, + 3.42322, + 3.30109, + 3.40493, + 3.38042, + 3.05308, + 3.08605, + 3.23035, + 2.41651, + 2.094, + 1.99779, + 2.34399, + 2.77842, + 2.92529, + 2.81072, + 2.4721, + 2.50168, + 2.51848, + 2.64371 + ], + "train_epoch_time": 5.0517659187316895, + "train_loss": 2.064169383288456, + "train_score": 0.38890894004056925, + "val_loss": 2.1586678411328286, + "val_score": 0.3648285020985368 + }, + { + "epoch": 13, + "grad_norm": 0.5387442708015442, + "learning_rate": 0.14333333333333334, + "model_norm": 87.76129913330078, + "step_logs": { + "grad_norm": { + "702": 0.8555957078933716, + "703": 0.8389430046081543, + "704": 0.7974465489387512, + "705": 0.7111917734146118, + "706": 0.6865280866622925, + "707": 0.6655653119087219, + "708": 0.7120420932769775, + "709": 0.6198110580444336, + "710": 0.6193294525146484, + "711": 0.6613816618919373, + "712": 0.6125500202178955, + "713": 0.6190069913864136, + "714": 0.6449284553527832, + "715": 0.623160719871521, + "716": 0.635799765586853, + "717": 0.6766581535339355, + "718": 0.6606045365333557, + "719": 0.6689791679382324, + "720": 0.7258394956588745, + "721": 0.6947575807571411, + "722": 0.6685524582862854, + "723": 0.6224409341812134, + "724": 0.6535094380378723, + "725": 0.6764883995056152, + "726": 0.6475353837013245, + "727": 0.6286060214042664, + "728": 0.6079074740409851, + "729": 0.5699890851974487, + "730": 0.6327533721923828, + "731": 0.6207576394081116, + "732": 0.6795573234558105, + "733": 0.7368409633636475, + "734": 0.6754153966903687, + "735": 0.5986524820327759, + "736": 0.5603148341178894, + "737": 0.5499348044395447, + "738": 0.553995668888092, + "739": 0.552712082862854, + "740": 0.5789799094200134, + "741": 0.6051276922225952, + "742": 0.5702527165412903, + "743": 0.5347087979316711, + "744": 0.5771920680999756, + "745": 0.5683488249778748, + "746": 0.5687381625175476, + "747": 0.5655471682548523, + "748": 0.5749183893203735, + "749": 0.5902183651924133, + "750": 0.591609001159668, + "751": 0.5643320679664612, + "752": 0.5647693276405334, + "753": 0.5534801483154297, + "754": 0.5389439463615417, + "755": 0.5387442708015442 + }, + "loss": { + "702": 2.068603038787842, + "703": 2.0600123405456543, + "704": 2.0609488487243652, + "705": 2.0514752864837646, + "706": 2.0525529384613037, + "707": 2.0297815799713135, + "708": 2.043025016784668, + "709": 2.0269765853881836, + "710": 2.041564702987671, + "711": 2.035750389099121, + "712": 2.015636920928955, + "713": 2.041550636291504, + "714": 2.0596487522125244, + "715": 2.0353753566741943, + "716": 2.0464487075805664, + "717": 2.0496344566345215, + "718": 2.0427510738372803, + "719": 2.0662004947662354, + "720": 2.044621706008911, + "721": 2.0307607650756836, + "722": 2.044048309326172, + "723": 2.0486457347869873, + "724": 2.0574636459350586, + "725": 2.022700786590576, + "726": 2.028663396835327, + "727": 2.0265889167785645, + "728": 2.050173282623291, + "729": 2.022919178009033, + "730": 2.0387704372406006, + "731": 2.04561185836792, + "732": 2.040342092514038, + "733": 2.025927782058716, + "734": 2.0442991256713867, + "735": 2.040170669555664, + "736": 2.0224528312683105, + "737": 2.0229549407958984, + "738": 2.013416290283203, + "739": 2.042023181915283, + "740": 2.0228073596954346, + "741": 2.013936758041382, + "742": 2.0341639518737793, + "743": 2.0218470096588135, + "744": 2.034235954284668, + "745": 2.01385235786438, + "746": 2.030132532119751, + "747": 2.009298086166382, + "748": 2.0243000984191895, + "749": 2.024648904800415, + "750": 2.03177547454834, + "751": 2.0019636154174805, + "752": 2.0308589935302734, + "753": 2.0112240314483643, + "754": 1.999527096748352, + "755": 2.010127067565918 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 2.82579, + 2.92688, + 3.24089, + 4.05595, + 4.3549, + 4.58214, + 4.0296, + 5.27631, + 5.32255, + 4.65393, + 5.37192, + 5.32806, + 4.95188, + 5.24136, + 5.06244, + 4.47649, + 4.68093, + 4.61687, + 3.88089, + 4.20719, + 4.5732, + 5.28775, + 4.81757, + 4.41988, + 4.83819, + 5.12872, + 5.54773, + 6.22652, + 5.09213, + 5.30859, + 4.41826, + 3.73144, + 4.48128, + 5.69268, + 6.4419, + 6.68904, + 6.56026, + 6.6844, + 6.03431, + 5.49986, + 6.25534, + 7.07153, + 6.10605, + 6.23444, + 6.27624, + 6.28213, + 6.12439, + 5.81198, + 5.80505, + 6.28618, + 6.36704, + 6.56533, + 6.88399, + 6.92561 + ], + "train_epoch_time": 5.052505970001221, + "train_loss": 2.010954659912131, + "train_score": 0.4025365404025725, + "val_loss": 2.1118211666834203, + "val_score": 0.3764754948810376 + }, + { + "epoch": 14, + "grad_norm": 0.4847679138183594, + "learning_rate": 0.07166666666666667, + "model_norm": 87.76786041259766, + "step_logs": { + "grad_norm": { + "756": 0.5058140158653259, + "757": 0.49769508838653564, + "758": 0.5300824046134949, + "759": 0.5167328119277954, + "760": 0.5182225108146667, + "761": 0.5222752690315247, + "762": 0.49985629320144653, + "763": 0.5261271595954895, + "764": 0.5339662432670593, + "765": 0.49722054600715637, + "766": 0.525741696357727, + "767": 0.5475974082946777, + "768": 0.48826518654823303, + "769": 0.5173293948173523, + "770": 0.5007962584495544, + "771": 0.5114091634750366, + "772": 0.4944199323654175, + "773": 0.49919113516807556, + "774": 0.5080803036689758, + "775": 0.5323089361190796, + "776": 0.513701856136322, + "777": 0.4728732705116272, + "778": 0.5287741422653198, + "779": 0.5132942199707031, + "780": 0.5144039988517761, + "781": 0.4901692271232605, + "782": 0.4993336498737335, + "783": 0.5102146863937378, + "784": 0.5094323754310608, + "785": 0.5016494989395142, + "786": 0.5456656217575073, + "787": 0.45594877004623413, + "788": 0.5241446495056152, + "789": 0.5235331058502197, + "790": 0.49963730573654175, + "791": 0.5246809124946594, + "792": 0.48514509201049805, + "793": 0.4656037986278534, + "794": 0.51222825050354, + "795": 0.48398905992507935, + "796": 0.48211216926574707, + "797": 0.4955475330352783, + "798": 0.455089271068573, + "799": 0.45145097374916077, + "800": 0.4879188537597656, + "801": 0.5031603574752808, + "802": 0.5076599717140198, + "803": 0.4612177908420563, + "804": 0.5085827112197876, + "805": 0.49353665113449097, + "806": 0.46145835518836975, + "807": 0.5111345648765564, + "808": 0.4644039273262024, + "809": 0.4847679138183594 + }, + "loss": { + "756": 1.9891791343688965, + "757": 2.003089189529419, + "758": 2.021998643875122, + "759": 1.998246669769287, + "760": 1.9814414978027344, + "761": 2.0237021446228027, + "762": 2.0245351791381836, + "763": 2.0070135593414307, + "764": 2.0204505920410156, + "765": 1.9956107139587402, + "766": 2.011631965637207, + "767": 2.02752947807312, + "768": 1.9936531782150269, + "769": 1.9787603616714478, + "770": 2.0311286449432373, + "771": 2.0049281120300293, + "772": 2.025374412536621, + "773": 1.9979968070983887, + "774": 2.014063835144043, + "775": 2.0084056854248047, + "776": 2.0069265365600586, + "777": 2.0142831802368164, + "778": 1.9831066131591797, + "779": 2.0081496238708496, + "780": 1.9759306907653809, + "781": 1.9884992837905884, + "782": 2.0142593383789062, + "783": 2.0046749114990234, + "784": 2.0277111530303955, + "785": 1.994141936302185, + "786": 2.0026023387908936, + "787": 2.0019803047180176, + "788": 2.0354251861572266, + "789": 2.0050806999206543, + "790": 1.9992761611938477, + "791": 2.0147576332092285, + "792": 2.006575345993042, + "793": 2.0002737045288086, + "794": 1.9919480085372925, + "795": 2.0168275833129883, + "796": 1.9870167970657349, + "797": 2.016627311706543, + "798": 2.0025057792663574, + "799": 1.9895964860916138, + "800": 2.0089383125305176, + "801": 2.0107169151306152, + "802": 1.982405185699463, + "803": 1.9838014841079712, + "804": 2.003615379333496, + "805": 1.9976394176483154, + "806": 1.9997005462646484, + "807": 2.0074102878570557, + "808": 1.979114294052124, + "809": 1.9938241243362427 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 7.77485, + 8.08674, + 7.19605, + 7.48371, + 7.37817, + 7.41904, + 8.1028, + 7.25052, + 7.08632, + 8.07194, + 7.27786, + 6.76152, + 8.36254, + 7.39365, + 8.0987, + 7.66588, + 8.2854, + 8.01791, + 7.80205, + 7.088, + 7.60517, + 9.00805, + 7.0926, + 7.6219, + 7.46729, + 8.27625, + 8.07856, + 7.70084, + 7.81327, + 7.9242, + 6.72576, + 9.63003, + 7.40888, + 7.31549, + 8.00872, + 7.31867, + 8.52535, + 9.22691, + 7.59191, + 8.60989, + 8.5488, + 8.21212, + 9.66898, + 9.76211, + 8.43862, + 7.94215, + 7.69213, + 9.3258, + 7.74624, + 8.20122, + 9.39074, + 7.68362, + 9.17654, + 8.48436 + ], + "train_epoch_time": 5.059843301773071, + "train_loss": 1.9970765826691856, + "train_score": 0.40541046454198393, + "val_loss": 2.1023791610989315, + "val_score": 0.3787582525908605 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:25:26.382728", + "final_model_norm": 87.76786041259766, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:23:41.528256", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 3.4020798206329346, + "learning_rate": 2.15e-11, + "model_norm": 87.4279556274414, + "step_logs": { + "grad_norm": { + "0": 22.7664794921875, + "1": 23.4499454498291, + "2": 6.6349334716796875, + "3": 7.6453537940979, + "4": 21.053424835205078, + "5": 7.058008670806885, + "6": 5.988020420074463, + "7": 4.229581832885742, + "8": 3.9983975887298584, + "9": 7.037342548370361, + "10": 5.494598388671875, + "11": 8.59819221496582, + "12": 2.925267457962036, + "13": 39.092185974121094, + "14": 4.42000150680542, + "15": 8.168949127197266, + "16": 16.293621063232422, + "17": 7.581535339355469, + "18": 9.173480033874512, + "19": 5.241852760314941, + "20": 4.280900001525879, + "21": 27.222410202026367, + "22": 5.295047760009766, + "23": 37.9439811706543, + "24": 8.8499116897583, + "25": 22.281017303466797, + "26": 4.42485237121582, + "27": 10.319413185119629, + "28": 3.462764024734497, + "29": 5.584513187408447, + "30": 6.203570365905762, + "31": 12.963118553161621, + "32": 4.502926349639893, + "33": 12.705103874206543, + "34": 4.057493686676025, + "35": 13.966697692871094, + "36": 5.8249006271362305, + "37": 5.485065937042236, + "38": 8.925680160522461, + "39": 5.118313312530518, + "40": 4.502964019775391, + "41": 3.303424596786499, + "42": 5.9380340576171875, + "43": 3.2785823345184326, + "44": 3.232649564743042, + "45": 10.542298316955566, + "46": 8.931904792785645, + "47": 2.3960790634155273, + "48": 3.548248767852783, + "49": 3.5594968795776367, + "50": 3.093522071838379, + "51": 7.152304172515869, + "52": 2.485452890396118, + "53": 3.4020798206329346 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.821899175643921, + "3": 3.692150592803955, + "4": 4.159581661224365, + "5": 4.066653251647949, + "6": 3.577277660369873, + "7": 3.644073009490967, + "8": 3.516756057739258, + "9": 3.527935743331909, + "10": 3.9001166820526123, + "11": 3.4582974910736084, + "12": 3.3248465061187744, + "13": 3.636115074157715, + "14": 3.3588008880615234, + "15": 3.5337278842926025, + "16": 5.811540126800537, + "17": 3.4106802940368652, + "18": 4.815016269683838, + "19": 4.148991107940674, + "20": 3.581756353378296, + "21": 5.044354438781738, + "22": 3.6478264331817627, + "23": 3.5692028999328613, + "24": 3.675647258758545, + "25": 4.464303016662598, + "26": 3.690401554107666, + "27": 3.540964126586914, + "28": 3.1019937992095947, + "29": 3.440617561340332, + "30": 3.246218204498291, + "31": 6.221119403839111, + "32": 3.552947998046875, + "33": 3.9883127212524414, + "34": 3.316122531890869, + "35": 4.133999824523926, + "36": 3.9942827224731445, + "37": 3.4199929237365723, + "38": 3.7397119998931885, + "39": 3.6552629470825195, + "40": 3.303652763366699, + "41": 3.5317206382751465, + "42": 3.6163101196289062, + "43": 3.2882792949676514, + "44": 3.2394442558288574, + "45": 3.826599597930908, + "46": 3.640352487564087, + "47": 3.0058960914611816, + "48": 3.12412166595459, + "49": 3.574035167694092, + "50": 3.350635290145874, + "51": 3.607766628265381, + "52": 3.1396865844726562, + "53": 3.1181774139404297 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 0.00874362, + 0.00824185, + 0.0868173, + 0.0631661, + 0.00938435, + 0.0816343, + 0.0997668, + 0.203701, + 0.219973, + 0.0712366, + 0.129183, + 0.0467787, + 0.388544, + 0.00237935, + 0.171925, + 0.0529542, + 0.0218905, + 0.0593372, + 0.0572176, + 0.150999, + 0.195446, + 0.00680695, + 0.130105, + 0.00247905, + 0.0469306, + 0.00899257, + 0.188485, + 0.0332515, + 0.258699, + 0.110323, + 0.0843518, + 0.0370211, + 0.175226, + 0.0247077, + 0.201426, + 0.0211925, + 0.117723, + 0.113674, + 0.0469413, + 0.139529, + 0.162929, + 0.323637, + 0.102561, + 0.305912, + 0.309994, + 0.0344304, + 0.0456305, + 0.523566, + 0.248142, + 0.282086, + 0.350123, + 0.0705255, + 0.508247, + 0.269409 + ], + "train_epoch_time": 5.055591344833374, + "train_loss": 3.3950779705512812, + "train_score": 0.18294364242163758, + "val_loss": 3.386549778017467, + "val_score": 0.17807746150805673 + }, + { + "epoch": 1, + "grad_norm": 1.2744216918945312, + "learning_rate": 0.215, + "model_norm": 87.44254302978516, + "step_logs": { + "grad_norm": { + "54": 2.9588420391082764, + "55": 4.551698684692383, + "56": 2.997354030609131, + "57": 2.449770927429199, + "58": 4.22332763671875, + "59": 5.804546356201172, + "60": 2.8611626625061035, + "61": 6.422741413116455, + "62": 2.121128797531128, + "63": 2.7550430297851562, + "64": 4.218456268310547, + "65": 3.298926591873169, + "66": 3.0421950817108154, + "67": 13.084441184997559, + "68": 2.878300905227661, + "69": 2.4003987312316895, + "70": 8.343585014343262, + "71": 2.248182535171509, + "72": 2.7363319396972656, + "73": 2.3988842964172363, + "74": 3.64451265335083, + "75": 2.167081594467163, + "76": 1.8511990308761597, + "77": 2.448065996170044, + "78": 1.6482326984405518, + "79": 1.3638733625411987, + "80": 1.453316569328308, + "81": 1.8655786514282227, + "82": 2.458991765975952, + "83": 2.3232791423797607, + "84": 2.0435938835144043, + "85": 1.2519172430038452, + "86": 1.0352760553359985, + "87": 1.4385895729064941, + "88": 1.5370991230010986, + "89": 1.914219617843628, + "90": 1.659883975982666, + "91": 1.1214090585708618, + "92": 1.267828106880188, + "93": 1.8239319324493408, + "94": 1.550571322441101, + "95": 1.0800126791000366, + "96": 1.2024586200714111, + "97": 1.4818787574768066, + "98": 2.308434009552002, + "99": 1.788259744644165, + "100": 1.1262332201004028, + "101": 1.1004700660705566, + "102": 1.3424655199050903, + "103": 1.4440902471542358, + "104": 1.3265347480773926, + "105": 1.2666053771972656, + "106": 1.3224139213562012, + "107": 1.2744216918945312 + }, + "loss": { + "54": 3.394242286682129, + "55": 3.3434767723083496, + "56": 3.417769193649292, + "57": 2.9999172687530518, + "58": 3.295624256134033, + "59": 3.4924838542938232, + "60": 3.0930306911468506, + "61": 3.2670469284057617, + "62": 2.8934435844421387, + "63": 2.8680472373962402, + "64": 3.2922112941741943, + "65": 2.977743625640869, + "66": 3.392702102661133, + "67": 3.770906448364258, + "68": 2.9260621070861816, + "69": 3.0121850967407227, + "70": 3.5799572467803955, + "71": 2.991973638534546, + "72": 2.9055001735687256, + "73": 3.136777877807617, + "74": 2.968360185623169, + "75": 3.387712001800537, + "76": 2.925246238708496, + "77": 2.841588020324707, + "78": 2.9415340423583984, + "79": 2.723445415496826, + "80": 2.7015414237976074, + "81": 2.7253923416137695, + "82": 2.829561233520508, + "83": 2.9352569580078125, + "84": 2.9875247478485107, + "85": 2.701293468475342, + "86": 2.61785626411438, + "87": 2.6081013679504395, + "88": 2.7314963340759277, + "89": 2.6647980213165283, + "90": 2.8540682792663574, + "91": 2.642575740814209, + "92": 2.6175789833068848, + "93": 2.6791069507598877, + "94": 2.755443811416626, + "95": 2.6256582736968994, + "96": 2.6011414527893066, + "97": 2.6784753799438477, + "98": 2.6998097896575928, + "99": 2.9472389221191406, + "100": 2.6260838508605957, + "101": 2.6080284118652344, + "102": 2.620018482208252, + "103": 2.6422929763793945, + "104": 2.653263568878174, + "105": 2.6121649742126465, + "106": 2.6141340732574463, + "107": 2.6211490631103516 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.387703, + 0.161381, + 0.380423, + 0.499871, + 0.184769, + 0.103657, + 0.377832, + 0.0791981, + 0.643104, + 0.377859, + 0.185004, + 0.273616, + 0.366582, + 0.022026, + 0.353192, + 0.522775, + 0.0514248, + 0.591963, + 0.388046, + 0.545086, + 0.223479, + 0.721366, + 0.853604, + 0.474149, + 1.08277, + 1.4641, + 1.27906, + 0.783072, + 0.467956, + 0.543805, + 0.715356, + 1.72354, + 2.44249, + 1.26023, + 1.1561, + 0.727245, + 1.03588, + 2.10135, + 1.62847, + 0.805328, + 1.14606, + 2.25103, + 1.79897, + 1.21973, + 0.506639, + 0.921625, + 2.07039, + 2.15355, + 1.45378, + 1.26705, + 1.5078, + 1.62824, + 1.49483, + 1.61386 + ], + "train_epoch_time": 5.053322792053223, + "train_loss": 2.6014165839300607, + "train_score": 0.24673601150769242, + "val_loss": 2.6329931763912864, + "val_score": 0.2444254088757643 + }, + { + "epoch": 2, + "grad_norm": 1.1091136932373047, + "learning_rate": 0.215, + "model_norm": 87.45811462402344, + "step_logs": { + "grad_norm": { + "108": 1.3537793159484863, + "109": 1.5923566818237305, + "110": 2.557115316390991, + "111": 1.9047725200653076, + "112": 1.3393384218215942, + "113": 1.1124268770217896, + "114": 1.282261610031128, + "115": 1.4741277694702148, + "116": 1.5760860443115234, + "117": 1.3580493927001953, + "118": 1.3671592473983765, + "119": 1.5843846797943115, + "120": 2.2419321537017822, + "121": 1.587849497795105, + "122": 1.1053768396377563, + "123": 0.9108991026878357, + "124": 0.9168004393577576, + "125": 1.0370756387710571, + "126": 1.1651623249053955, + "127": 1.484588623046875, + "128": 1.6852039098739624, + "129": 1.5998611450195312, + "130": 1.4381487369537354, + "131": 1.3622264862060547, + "132": 1.3824046850204468, + "133": 1.436284065246582, + "134": 1.4441611766815186, + "135": 1.2695380449295044, + "136": 1.2396032810211182, + "137": 1.3548280000686646, + "138": 1.272579312324524, + "139": 1.1266461610794067, + "140": 1.1946275234222412, + "141": 1.5436118841171265, + "142": 1.408092975616455, + "143": 1.094925045967102, + "144": 1.0771726369857788, + "145": 1.228016972541809, + "146": 1.2998939752578735, + "147": 1.3817087411880493, + "148": 1.3504518270492554, + "149": 1.2477402687072754, + "150": 1.1859843730926514, + "151": 1.156193494796753, + "152": 1.15631103515625, + "153": 1.1957241296768188, + "154": 1.2543359994888306, + "155": 1.1964917182922363, + "156": 1.1030373573303223, + "157": 1.2358675003051758, + "158": 1.2464865446090698, + "159": 1.3116198778152466, + "160": 1.2531845569610596, + "161": 1.1091136932373047 + }, + "loss": { + "108": 2.610424518585205, + "109": 2.6543350219726562, + "110": 2.728701591491699, + "111": 2.9849181175231934, + "112": 2.641246795654297, + "113": 2.5753979682922363, + "114": 2.6039133071899414, + "115": 2.6467647552490234, + "116": 2.6306467056274414, + "117": 2.642721176147461, + "118": 2.576988697052002, + "119": 2.6863064765930176, + "120": 2.6805765628814697, + "121": 2.8543100357055664, + "122": 2.6050119400024414, + "123": 2.5657882690429688, + "124": 2.5764946937561035, + "125": 2.564035415649414, + "126": 2.5523557662963867, + "127": 2.5930418968200684, + "128": 2.6834757328033447, + "129": 2.67267107963562, + "130": 2.655188798904419, + "131": 2.6000471115112305, + "132": 2.6280689239501953, + "133": 2.588824510574341, + "134": 2.648529052734375, + "135": 2.5908961296081543, + "136": 2.5875887870788574, + "137": 2.5850300788879395, + "138": 2.621670961380005, + "139": 2.570456027984619, + "140": 2.5660643577575684, + "141": 2.5941407680511475, + "142": 2.652806282043457, + "143": 2.5749118328094482, + "144": 2.5511906147003174, + "145": 2.5554089546203613, + "146": 2.5818586349487305, + "147": 2.5662527084350586, + "148": 2.6069812774658203, + "149": 2.5674352645874023, + "150": 2.5896189212799072, + "151": 2.565016269683838, + "152": 2.561311721801758, + "153": 2.5388855934143066, + "154": 2.5716705322265625, + "155": 2.5705032348632812, + "156": 2.5561070442199707, + "157": 2.5741374492645264, + "158": 2.5815210342407227, + "159": 2.5703485012054443, + "160": 2.6151628494262695, + "161": 2.538440227508545 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 1.42435, + 1.04683, + 0.417307, + 0.822709, + 1.47241, + 2.08114, + 1.5837, + 1.21799, + 1.05902, + 1.43291, + 1.37871, + 1.07012, + 0.533314, + 1.13209, + 2.13201, + 3.09229, + 3.06535, + 2.38398, + 1.88005, + 1.17651, + 0.944915, + 1.04419, + 1.28377, + 1.40114, + 1.3752, + 1.25494, + 1.26991, + 1.60753, + 1.68395, + 1.40831, + 1.61886, + 2.02505, + 1.79805, + 1.08872, + 1.33796, + 2.1478, + 2.19873, + 1.69454, + 1.52798, + 1.34421, + 1.42949, + 1.64912, + 1.8411, + 1.9188, + 1.91564, + 1.77575, + 1.63451, + 1.79556, + 2.10087, + 1.68534, + 1.6615, + 1.49409, + 1.66521, + 2.06355 + ], + "train_epoch_time": 5.052363157272339, + "train_loss": 2.5611019456061603, + "train_score": 0.23936849891113243, + "val_loss": 2.598254974345526, + "val_score": 0.23617339989621658 + }, + { + "epoch": 3, + "grad_norm": 1.1446658372879028, + "learning_rate": 0.215, + "model_norm": 87.47420501708984, + "step_logs": { + "grad_norm": { + "162": 1.0795036554336548, + "163": 1.1948630809783936, + "164": 1.313206672668457, + "165": 1.3505170345306396, + "166": 1.2679893970489502, + "167": 0.9987155795097351, + "168": 0.9981672763824463, + "169": 1.0860553979873657, + "170": 1.3168418407440186, + "171": 1.5022683143615723, + "172": 1.4740920066833496, + "173": 1.2219743728637695, + "174": 1.0660455226898193, + "175": 0.9779313206672668, + "176": 0.9448350071907043, + "177": 1.0835472345352173, + "178": 1.2158117294311523, + "179": 1.3276386260986328, + "180": 1.3913034200668335, + "181": 1.284597635269165, + "182": 1.0919030904769897, + "183": 1.0397491455078125, + "184": 1.2926994562149048, + "185": 1.3351250886917114, + "186": 1.1868009567260742, + "187": 1.0680104494094849, + "188": 1.1032730340957642, + "189": 1.1434894800186157, + "190": 1.0390969514846802, + "191": 0.9297041296958923, + "192": 0.9079300165176392, + "193": 1.0789446830749512, + "194": 1.2187674045562744, + "195": 1.5853400230407715, + "196": 1.4148362874984741, + "197": 1.177739143371582, + "198": 1.0950336456298828, + "199": 1.253442406654358, + "200": 1.3364614248275757, + "201": 1.11751127243042, + "202": 1.0730769634246826, + "203": 1.3222599029541016, + "204": 1.2656381130218506, + "205": 1.0810356140136719, + "206": 1.1039373874664307, + "207": 1.1099750995635986, + "208": 1.1167949438095093, + "209": 1.25467050075531, + "210": 1.2758673429489136, + "211": 1.0997803211212158, + "212": 1.0883212089538574, + "213": 1.2073808908462524, + "214": 1.289371132850647, + "215": 1.1446658372879028 + }, + "loss": { + "162": 2.555593490600586, + "163": 2.5555341243743896, + "164": 2.5946128368377686, + "165": 2.557882070541382, + "166": 2.5956897735595703, + "167": 2.5656960010528564, + "168": 2.513338327407837, + "169": 2.528416156768799, + "170": 2.5558478832244873, + "171": 2.597238063812256, + "172": 2.6018199920654297, + "173": 2.557772397994995, + "174": 2.5404086112976074, + "175": 2.5234920978546143, + "176": 2.495227813720703, + "177": 2.5375282764434814, + "178": 2.5435545444488525, + "179": 2.582265853881836, + "180": 2.5569159984588623, + "181": 2.605103015899658, + "182": 2.5344672203063965, + "183": 2.523806571960449, + "184": 2.5452044010162354, + "185": 2.5882067680358887, + "186": 2.5537099838256836, + "187": 2.5519070625305176, + "188": 2.5399184226989746, + "189": 2.5384864807128906, + "190": 2.53171968460083, + "191": 2.509122371673584, + "192": 2.5117125511169434, + "193": 2.513850212097168, + "194": 2.538954019546509, + "195": 2.5605599880218506, + "196": 2.6236510276794434, + "197": 2.5396456718444824, + "198": 2.5503299236297607, + "199": 2.5266995429992676, + "200": 2.5790867805480957, + "201": 2.530428409576416, + "202": 2.539207935333252, + "203": 2.536179304122925, + "204": 2.5748157501220703, + "205": 2.5254759788513184, + "206": 2.5426840782165527, + "207": 2.5105419158935547, + "208": 2.528724193572998, + "209": 2.5420632362365723, + "210": 2.576490879058838, + "211": 2.5136027336120605, + "212": 2.5271058082580566, + "213": 2.530677318572998, + "214": 2.571873188018799, + "215": 2.538135528564453 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 2.19302, + 1.78997, + 1.50455, + 1.40243, + 1.61444, + 2.5723, + 2.52258, + 2.1436, + 1.4739, + 1.15084, + 1.19737, + 1.71292, + 2.23538, + 2.63867, + 2.79511, + 2.1613, + 1.72071, + 1.46501, + 1.32091, + 1.57867, + 2.12578, + 2.33453, + 1.5231, + 1.45196, + 1.81308, + 2.23725, + 2.08667, + 1.94138, + 2.34479, + 2.9029, + 3.04695, + 2.15944, + 1.70928, + 1.0188, + 1.31067, + 1.83094, + 2.12687, + 1.60822, + 1.44395, + 2.02624, + 2.20514, + 1.4506, + 1.60741, + 2.16104, + 2.08643, + 2.0377, + 2.02747, + 1.61483, + 1.58277, + 2.07819, + 2.13358, + 1.73599, + 1.54701, + 1.93712 + ], + "train_epoch_time": 5.053134441375732, + "train_loss": 2.5162249922239286, + "train_score": 0.24222673056286412, + "val_loss": 2.556968991162446, + "val_score": 0.23407900437763993 + }, + { + "epoch": 4, + "grad_norm": 1.060787320137024, + "learning_rate": 0.215, + "model_norm": 87.49299621582031, + "step_logs": { + "grad_norm": { + "216": 0.9633231163024902, + "217": 1.0106887817382812, + "218": 1.0738520622253418, + "219": 1.2391701936721802, + "220": 1.1446963548660278, + "221": 0.955525279045105, + "222": 1.0772483348846436, + "223": 1.2479866743087769, + "224": 1.2095543146133423, + "225": 1.1579902172088623, + "226": 1.0227599143981934, + "227": 1.0297197103500366, + "228": 0.9692851901054382, + "229": 0.8947920799255371, + "230": 0.9824771881103516, + "231": 1.0812528133392334, + "232": 1.198651671409607, + "233": 1.1882545948028564, + "234": 1.1107783317565918, + "235": 1.1343162059783936, + "236": 1.2151082754135132, + "237": 1.3301852941513062, + "238": 1.3763662576675415, + "239": 1.3176782131195068, + "240": 1.2533372640609741, + "241": 1.0426181554794312, + "242": 0.9654722213745117, + "243": 0.986422061920166, + "244": 1.1485326290130615, + "245": 1.2290763854980469, + "246": 1.1783108711242676, + "247": 1.0896384716033936, + "248": 1.0137453079223633, + "249": 0.9478561878204346, + "250": 1.0650352239608765, + "251": 1.2701523303985596, + "252": 0.9598528742790222, + "253": 0.9563704133033752, + "254": 1.3311500549316406, + "255": 1.385802984237671, + "256": 1.1627907752990723, + "257": 1.1118957996368408, + "258": 1.090212345123291, + "259": 1.122296690940857, + "260": 1.2211205959320068, + "261": 1.1986427307128906, + "262": 1.0584967136383057, + "263": 1.1139775514602661, + "264": 1.1434112787246704, + "265": 1.111130714416504, + "266": 1.1608188152313232, + "267": 1.2084766626358032, + "268": 1.1846649646759033, + "269": 1.060787320137024 + }, + "loss": { + "216": 2.5163536071777344, + "217": 2.4914841651916504, + "218": 2.5015335083007812, + "219": 2.5429632663726807, + "220": 2.568941593170166, + "221": 2.49544095993042, + "222": 2.532780170440674, + "223": 2.5202486515045166, + "224": 2.56973934173584, + "225": 2.503185272216797, + "226": 2.5275704860687256, + "227": 2.4948208332061768, + "228": 2.5221123695373535, + "229": 2.481478214263916, + "230": 2.499135971069336, + "231": 2.5088982582092285, + "232": 2.5080246925354004, + "233": 2.526634693145752, + "234": 2.529207706451416, + "235": 2.512040853500366, + "236": 2.546560764312744, + "237": 2.5465004444122314, + "238": 2.5607752799987793, + "239": 2.549367904663086, + "240": 2.5440845489501953, + "241": 2.5321335792541504, + "242": 2.4821934700012207, + "243": 2.500823974609375, + "244": 2.4934394359588623, + "245": 2.537851095199585, + "246": 2.4922428131103516, + "247": 2.515509605407715, + "248": 2.5096993446350098, + "249": 2.492447853088379, + "250": 2.4846715927124023, + "251": 2.5338258743286133, + "252": 2.53157639503479, + "253": 2.499506950378418, + "254": 2.4946110248565674, + "255": 2.57424259185791, + "256": 2.506796360015869, + "257": 2.5065479278564453, + "258": 2.4947104454040527, + "259": 2.499516487121582, + "260": 2.5047781467437744, + "261": 2.5250051021575928, + "262": 2.4777143001556396, + "263": 2.512481689453125, + "264": 2.5152482986450195, + "265": 2.5075490474700928, + "266": 2.4944794178009033, + "267": 2.510552406311035, + "268": 2.50632381439209, + "269": 2.4979054927825928 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 2.71161, + 2.43906, + 2.16929, + 1.65607, + 1.96053, + 2.73315, + 2.18256, + 1.61817, + 1.75646, + 1.86674, + 2.41633, + 2.35289, + 2.68449, + 3.09932, + 2.58908, + 2.14599, + 1.7456, + 1.78947, + 2.04989, + 1.95235, + 1.72474, + 1.43919, + 1.35177, + 1.4683, + 1.61955, + 2.32936, + 2.66291, + 2.57014, + 1.89022, + 1.68, + 1.79503, + 2.11866, + 2.4421, + 2.77422, + 2.19049, + 1.5706, + 2.74778, + 2.73276, + 1.40783, + 1.34044, + 1.85403, + 2.02744, + 2.09893, + 1.98445, + 1.67978, + 1.75745, + 2.21142, + 2.02465, + 1.92387, + 2.03104, + 1.85119, + 1.71907, + 1.78585, + 2.21983 + ], + "train_epoch_time": 5.052665948867798, + "train_loss": 2.47574853609761, + "train_score": 0.25657057017301726, + "val_loss": 2.5230224606090244, + "val_score": 0.25065029451001253 + }, + { + "epoch": 5, + "grad_norm": 1.9789291620254517, + "learning_rate": 0.215, + "model_norm": 87.51495361328125, + "step_logs": { + "grad_norm": { + "270": 1.0404844284057617, + "271": 1.054120659828186, + "272": 1.0576982498168945, + "273": 1.0886740684509277, + "274": 1.2677284479141235, + "275": 1.4281651973724365, + "276": 1.227057933807373, + "277": 1.1613863706588745, + "278": 1.0496907234191895, + "279": 1.0507171154022217, + "280": 1.2304166555404663, + "281": 1.2998121976852417, + "282": 1.2435519695281982, + "283": 1.1221468448638916, + "284": 1.0892112255096436, + "285": 1.1592762470245361, + "286": 1.5829782485961914, + "287": 1.3556159734725952, + "288": 1.3460111618041992, + "289": 1.287331461906433, + "290": 1.2054649591445923, + "291": 1.216613531112671, + "292": 1.2182255983352661, + "293": 1.2035558223724365, + "294": 1.3098182678222656, + "295": 1.1993869543075562, + "296": 1.1196050643920898, + "297": 1.1932202577590942, + "298": 1.3162342309951782, + "299": 1.2452771663665771, + "300": 1.1721611022949219, + "301": 1.389076828956604, + "302": 1.4930850267410278, + "303": 1.3326239585876465, + "304": 1.0249804258346558, + "305": 1.2022465467453003, + "306": 1.3210219144821167, + "307": 1.347138524055481, + "308": 1.3398345708847046, + "309": 1.3874322175979614, + "310": 1.4049382209777832, + "311": 1.266074776649475, + "312": 1.2584120035171509, + "313": 1.1598255634307861, + "314": 1.0752277374267578, + "315": 1.1054651737213135, + "316": 1.2830973863601685, + "317": 1.3031442165374756, + "318": 1.2562724351882935, + "319": 1.3067193031311035, + "320": 1.253655195236206, + "321": 1.1800053119659424, + "322": 1.389737844467163, + "323": 1.9789291620254517 + }, + "loss": { + "270": 2.481053113937378, + "271": 2.4931485652923584, + "272": 2.47379469871521, + "273": 2.4876770973205566, + "274": 2.4740099906921387, + "275": 2.5152814388275146, + "276": 2.506864547729492, + "277": 2.4897401332855225, + "278": 2.4641270637512207, + "279": 2.4755802154541016, + "280": 2.463754653930664, + "281": 2.5001280307769775, + "282": 2.4866793155670166, + "283": 2.494428873062134, + "284": 2.440568447113037, + "285": 2.464487075805664, + "286": 2.4941728115081787, + "287": 2.567190170288086, + "288": 2.4898314476013184, + "289": 2.5082783699035645, + "290": 2.459127902984619, + "291": 2.4648776054382324, + "292": 2.4492084980010986, + "293": 2.4543380737304688, + "294": 2.4661977291107178, + "295": 2.4770710468292236, + "296": 2.4436912536621094, + "297": 2.458850383758545, + "298": 2.457784652709961, + "299": 2.4822587966918945, + "300": 2.445441484451294, + "301": 2.4586124420166016, + "302": 2.5083487033843994, + "303": 2.4750165939331055, + "304": 2.436406135559082, + "305": 2.4328064918518066, + "306": 2.4731924533843994, + "307": 2.4277968406677246, + "308": 2.4908194541931152, + "309": 2.4814276695251465, + "310": 2.494682788848877, + "311": 2.4678120613098145, + "312": 2.4374711513519287, + "313": 2.445601463317871, + "314": 2.4159693717956543, + "315": 2.407942295074463, + "316": 2.392904043197632, + "317": 2.4680347442626953, + "318": 2.426144599914551, + "319": 2.4157941341400146, + "320": 2.4545459747314453, + "321": 2.4055252075195312, + "322": 2.4295005798339844, + "323": 2.484586715698242 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 2.29174, + 2.24371, + 2.21126, + 2.09893, + 1.53939, + 1.23319, + 1.66495, + 1.84587, + 2.23635, + 2.24236, + 1.6274, + 1.47979, + 1.60802, + 1.98094, + 2.05715, + 1.8338, + 0.995352, + 1.39696, + 1.37427, + 1.51354, + 1.69228, + 1.66529, + 1.65033, + 1.69435, + 1.43749, + 1.72195, + 1.94947, + 1.72699, + 1.41866, + 1.60072, + 1.77985, + 1.2742, + 1.12517, + 1.39368, + 2.31909, + 1.68314, + 1.41722, + 1.33779, + 1.38752, + 1.28907, + 1.26387, + 1.53955, + 1.5392, + 1.81803, + 2.08973, + 1.97041, + 1.45347, + 1.45334, + 1.53727, + 1.4148, + 1.56176, + 1.7276, + 1.25791, + 0.634445 + ], + "train_epoch_time": 5.052392244338989, + "train_loss": 2.5304164905630193, + "train_score": 0.2597583392685763, + "val_loss": 2.5800164375458463, + "val_score": 0.25274917507281125 + }, + { + "epoch": 6, + "grad_norm": 1.1846872568130493, + "learning_rate": 0.215, + "model_norm": 87.54618835449219, + "step_logs": { + "grad_norm": { + "324": 1.3573976755142212, + "325": 1.3110039234161377, + "326": 1.3333898782730103, + "327": 1.2504231929779053, + "328": 1.1230586767196655, + "329": 1.0541247129440308, + "330": 1.1783971786499023, + "331": 1.3918724060058594, + "332": 1.244531273841858, + "333": 1.0875149965286255, + "334": 1.1421831846237183, + "335": 1.2019091844558716, + "336": 1.159886360168457, + "337": 1.1704378128051758, + "338": 1.1401429176330566, + "339": 0.9999255537986755, + "340": 1.0166006088256836, + "341": 1.082255482673645, + "342": 1.1286462545394897, + "343": 1.2869051694869995, + "344": 1.3348263502120972, + "345": 1.2356014251708984, + "346": 1.1513980627059937, + "347": 1.095396876335144, + "348": 1.2476483583450317, + "349": 1.450863242149353, + "350": 1.7418878078460693, + "351": 1.4917750358581543, + "352": 1.3195232152938843, + "353": 1.393325924873352, + "354": 1.245233178138733, + "355": 1.117339849472046, + "356": 1.1995913982391357, + "357": 1.1889171600341797, + "358": 1.1929136514663696, + "359": 1.3817616701126099, + "360": 1.3203043937683105, + "361": 1.130623459815979, + "362": 1.0869464874267578, + "363": 1.0876266956329346, + "364": 1.0385804176330566, + "365": 1.1228587627410889, + "366": 1.2124336957931519, + "367": 1.2343508005142212, + "368": 1.1473438739776611, + "369": 0.9730501174926758, + "370": 1.0877901315689087, + "371": 1.2865344285964966, + "372": 1.4649385213851929, + "373": 1.4533758163452148, + "374": 1.3768742084503174, + "375": 1.2845958471298218, + "376": 1.2969180345535278, + "377": 1.1846872568130493 + }, + "loss": { + "324": 2.5348188877105713, + "325": 2.517831802368164, + "326": 2.469292163848877, + "327": 2.4391627311706543, + "328": 2.432940721511841, + "329": 2.39764404296875, + "330": 2.4189252853393555, + "331": 2.4243788719177246, + "332": 2.4466824531555176, + "333": 2.3837130069732666, + "334": 2.3925302028656006, + "335": 2.4191460609436035, + "336": 2.4181969165802, + "337": 2.38248872756958, + "338": 2.4586009979248047, + "339": 2.376631259918213, + "340": 2.3857100009918213, + "341": 2.381657361984253, + "342": 2.38150954246521, + "343": 2.3754851818084717, + "344": 2.4282846450805664, + "345": 2.3892898559570312, + "346": 2.3803272247314453, + "347": 2.3713722229003906, + "348": 2.391658306121826, + "349": 2.415534019470215, + "350": 2.4277889728546143, + "351": 2.4933362007141113, + "352": 2.4133195877075195, + "353": 2.4069058895111084, + "354": 2.4253978729248047, + "355": 2.3844547271728516, + "356": 2.401344060897827, + "357": 2.390613079071045, + "358": 2.389570713043213, + "359": 2.3708393573760986, + "360": 2.404020309448242, + "361": 2.3591580390930176, + "362": 2.3566761016845703, + "363": 2.348598003387451, + "364": 2.3539350032806396, + "365": 2.3588826656341553, + "366": 2.3553085327148438, + "367": 2.3789544105529785, + "368": 2.380356788635254, + "369": 2.3502817153930664, + "370": 2.346113920211792, + "371": 2.3712704181671143, + "372": 2.3816232681274414, + "373": 2.4218852519989014, + "374": 2.3745689392089844, + "375": 2.371290445327759, + "376": 2.356919288635254, + "377": 2.3802976608276367 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 1.37573, + 1.46494, + 1.38886, + 1.56001, + 1.92898, + 2.15775, + 1.74196, + 1.25142, + 1.57967, + 2.0155, + 1.83394, + 1.67463, + 1.79747, + 1.73914, + 1.89134, + 2.37699, + 2.30843, + 2.03339, + 1.86955, + 1.43436, + 1.36286, + 1.56499, + 1.7955, + 1.97632, + 1.53644, + 1.14752, + 0.800148, + 1.1204, + 1.38606, + 1.23981, + 1.56416, + 1.90993, + 1.66874, + 1.69124, + 1.6792, + 1.24175, + 1.37908, + 1.84553, + 1.99473, + 1.9854, + 2.1823, + 1.87092, + 1.60226, + 1.56138, + 1.80823, + 2.48227, + 1.98271, + 1.43264, + 1.10977, + 1.14656, + 1.25255, + 1.43698, + 1.40126, + 1.69599 + ], + "train_epoch_time": 5.0534539222717285, + "train_loss": 2.324898044142867, + "train_score": 0.3233926650783424, + "val_loss": 2.37294156707387, + "val_score": 0.3126345435253651 + }, + { + "epoch": 7, + "grad_norm": 1.1531764268875122, + "learning_rate": 0.215, + "model_norm": 87.57366943359375, + "step_logs": { + "grad_norm": { + "378": 1.1289430856704712, + "379": 1.243653416633606, + "380": 1.3153750896453857, + "381": 1.3431339263916016, + "382": 1.3326919078826904, + "383": 1.3011196851730347, + "384": 1.2887794971466064, + "385": 1.31019926071167, + "386": 1.1913291215896606, + "387": 1.1776820421218872, + "388": 1.1971888542175293, + "389": 1.3472508192062378, + "390": 1.363603115081787, + "391": 1.162752389907837, + "392": 1.0115514993667603, + "393": 1.0417031049728394, + "394": 1.1638439893722534, + "395": 1.1206867694854736, + "396": 0.9458897709846497, + "397": 0.9332041144371033, + "398": 0.9492629766464233, + "399": 1.1382262706756592, + "400": 1.2862460613250732, + "401": 1.3330069780349731, + "402": 1.309767246246338, + "403": 1.2581886053085327, + "404": 1.23268723487854, + "405": 1.2677524089813232, + "406": 1.2518386840820312, + "407": 1.2291858196258545, + "408": 1.206500768661499, + "409": 1.1809968948364258, + "410": 1.2421176433563232, + "411": 1.1774324178695679, + "412": 1.0919493436813354, + "413": 1.2882260084152222, + "414": 1.288912296295166, + "415": 1.2558650970458984, + "416": 1.156387448310852, + "417": 1.1074306964874268, + "418": 1.2333728075027466, + "419": 1.3118255138397217, + "420": 1.2340978384017944, + "421": 1.3532516956329346, + "422": 1.341996192932129, + "423": 1.3310952186584473, + "424": 1.3048169612884521, + "425": 1.2553791999816895, + "426": 1.0900002717971802, + "427": 0.8893017172813416, + "428": 1.004921555519104, + "429": 1.3000060319900513, + "430": 1.3357574939727783, + "431": 1.1531764268875122 + }, + "loss": { + "378": 2.3454017639160156, + "379": 2.3728151321411133, + "380": 2.3513553142547607, + "381": 2.3730781078338623, + "382": 2.352410316467285, + "383": 2.38809871673584, + "384": 2.3483800888061523, + "385": 2.3708133697509766, + "386": 2.3452467918395996, + "387": 2.32724928855896, + "388": 2.3073418140411377, + "389": 2.350141763687134, + "390": 2.4108469486236572, + "391": 2.361056327819824, + "392": 2.3009684085845947, + "393": 2.312511444091797, + "394": 2.322472095489502, + "395": 2.345559597015381, + "396": 2.3028035163879395, + "397": 2.30960750579834, + "398": 2.2892653942108154, + "399": 2.325058937072754, + "400": 2.3363466262817383, + "401": 2.3556876182556152, + "402": 2.358995199203491, + "403": 2.351083755493164, + "404": 2.350637674331665, + "405": 2.3232100009918213, + "406": 2.3398730754852295, + "407": 2.3386454582214355, + "408": 2.3467605113983154, + "409": 2.315364360809326, + "410": 2.318145751953125, + "411": 2.3347935676574707, + "412": 2.3186306953430176, + "413": 2.303685188293457, + "414": 2.3645644187927246, + "415": 2.318037509918213, + "416": 2.3407301902770996, + "417": 2.310563087463379, + "418": 2.308903217315674, + "419": 2.341179370880127, + "420": 2.3423972129821777, + "421": 2.3010945320129395, + "422": 2.374690055847168, + "423": 2.324794054031372, + "424": 2.348637104034424, + "425": 2.3122482299804688, + "426": 2.3057942390441895, + "427": 2.269679546356201, + "428": 2.284641742706299, + "429": 2.3086280822753906, + "430": 2.349426746368408, + "431": 2.304624557495117 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 1.84023, + 1.53414, + 1.359, + 1.31545, + 1.3245, + 1.41065, + 1.41388, + 1.38109, + 1.65244, + 1.67798, + 1.60985, + 1.29478, + 1.29656, + 1.74635, + 2.24872, + 2.13106, + 1.71459, + 1.86757, + 2.57381, + 2.65207, + 2.54052, + 1.79464, + 1.41218, + 1.32572, + 1.37511, + 1.48517, + 1.54696, + 1.4455, + 1.49312, + 1.54785, + 1.61218, + 1.66005, + 1.5025, + 1.68413, + 1.94458, + 1.38816, + 1.42333, + 1.46972, + 1.75043, + 1.88402, + 1.51781, + 1.36045, + 1.53802, + 1.25654, + 1.31857, + 1.3121, + 1.37948, + 1.46718, + 1.94074, + 2.8699, + 2.26232, + 1.36604, + 1.31676, + 1.73304 + ], + "train_epoch_time": 5.05295467376709, + "train_loss": 2.3001526758694752, + "train_score": 0.3114732783186213, + "val_loss": 2.3597924676472504, + "val_score": 0.299180181134582 + }, + { + "epoch": 8, + "grad_norm": 1.2681738138198853, + "learning_rate": 0.215, + "model_norm": 87.60327911376953, + "step_logs": { + "grad_norm": { + "432": 1.1657077074050903, + "433": 1.0624216794967651, + "434": 0.9508992433547974, + "435": 1.0236607789993286, + "436": 1.0205790996551514, + "437": 1.1780050992965698, + "438": 1.1690641641616821, + "439": 0.9694347381591797, + "440": 0.855843722820282, + "441": 0.9146633744239807, + "442": 0.9712487459182739, + "443": 1.1102378368377686, + "444": 1.2463276386260986, + "445": 1.3122589588165283, + "446": 1.3411824703216553, + "447": 1.3111793994903564, + "448": 1.1978651285171509, + "449": 1.1386679410934448, + "450": 1.1126952171325684, + "451": 1.1515297889709473, + "452": 1.554506778717041, + "453": 1.7724924087524414, + "454": 1.4600499868392944, + "455": 1.2319329977035522, + "456": 1.2554680109024048, + "457": 1.1865328550338745, + "458": 1.2981535196304321, + "459": 1.3265959024429321, + "460": 1.3346232175827026, + "461": 1.337964653968811, + "462": 1.0969263315200806, + "463": 0.9672433137893677, + "464": 1.0292184352874756, + "465": 1.225926399230957, + "466": 1.2225052118301392, + "467": 1.1589717864990234, + "468": 1.1485871076583862, + "469": 1.1403145790100098, + "470": 1.1448750495910645, + "471": 1.07486891746521, + "472": 1.0754045248031616, + "473": 1.0469359159469604, + "474": 0.9945337176322937, + "475": 1.0120408535003662, + "476": 1.1361474990844727, + "477": 1.2197010517120361, + "478": 1.2575366497039795, + "479": 1.2691034078598022, + "480": 1.3010587692260742, + "481": 1.2214558124542236, + "482": 1.2115081548690796, + "483": 1.3336796760559082, + "484": 1.3091402053833008, + "485": 1.2681738138198853 + }, + "loss": { + "432": 2.3262267112731934, + "433": 2.289912223815918, + "434": 2.285169839859009, + "435": 2.2982277870178223, + "436": 2.2939398288726807, + "437": 2.2875099182128906, + "438": 2.2896578311920166, + "439": 2.269970655441284, + "440": 2.2595419883728027, + "441": 2.2395036220550537, + "442": 2.298501491546631, + "443": 2.263517379760742, + "444": 2.332143545150757, + "445": 2.3122246265411377, + "446": 2.3136560916900635, + "447": 2.3255043029785156, + "448": 2.3160669803619385, + "449": 2.2910187244415283, + "450": 2.2740893363952637, + "451": 2.2703871726989746, + "452": 2.3054635524749756, + "453": 2.380159378051758, + "454": 2.3471803665161133, + "455": 2.3172059059143066, + "456": 2.279041051864624, + "457": 2.317080020904541, + "458": 2.2779951095581055, + "459": 2.3112294673919678, + "460": 2.286576747894287, + "461": 2.330930709838867, + "462": 2.2930221557617188, + "463": 2.2537410259246826, + "464": 2.254612922668457, + "465": 2.26353120803833, + "466": 2.2835307121276855, + "467": 2.2856826782226562, + "468": 2.297455310821533, + "469": 2.271739959716797, + "470": 2.3129823207855225, + "471": 2.2743937969207764, + "472": 2.263763904571533, + "473": 2.257481813430786, + "474": 2.246896266937256, + "475": 2.265903949737549, + "476": 2.2495508193969727, + "477": 2.257024049758911, + "478": 2.291917562484741, + "479": 2.2819509506225586, + "480": 2.291085720062256, + "481": 2.2643344402313232, + "482": 2.2799153327941895, + "483": 2.2721385955810547, + "484": 2.281658172607422, + "485": 2.279491424560547 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 1.71188, + 2.02873, + 2.52726, + 2.19321, + 2.20236, + 1.64842, + 1.67531, + 2.41537, + 3.08483, + 2.67688, + 2.4366, + 1.83633, + 1.50138, + 1.34274, + 1.28624, + 1.35267, + 1.61412, + 1.76699, + 1.83677, + 1.71218, + 0.954054, + 0.757595, + 1.10106, + 1.52683, + 1.44591, + 1.64582, + 1.35176, + 1.31331, + 1.28371, + 1.30209, + 1.90569, + 2.40898, + 2.12842, + 1.50611, + 1.52794, + 1.70165, + 1.74148, + 1.74707, + 1.76464, + 1.96859, + 1.95744, + 2.05961, + 2.27166, + 2.21231, + 1.74272, + 1.51715, + 1.4493, + 1.41681, + 1.35347, + 1.5177, + 1.55334, + 1.27741, + 1.33131, + 1.41736 + ], + "train_epoch_time": 5.059791326522827, + "train_loss": 2.278086079415495, + "train_score": 0.32514235111356976, + "val_loss": 2.3398839945634387, + "val_score": 0.3114057124413798 + }, + { + "epoch": 9, + "grad_norm": 1.0351953506469727, + "learning_rate": 0.215, + "model_norm": 87.63327026367188, + "step_logs": { + "grad_norm": { + "486": 1.1684287786483765, + "487": 1.0308504104614258, + "488": 1.0464164018630981, + "489": 1.0797393321990967, + "490": 1.0152900218963623, + "491": 0.9935247898101807, + "492": 1.0372517108917236, + "493": 1.0761717557907104, + "494": 1.056221604347229, + "495": 1.1687315702438354, + "496": 1.201019287109375, + "497": 1.1341582536697388, + "498": 1.0773943662643433, + "499": 1.104231357574463, + "500": 1.2758419513702393, + "501": 1.1967161893844604, + "502": 1.0560189485549927, + "503": 1.0503727197647095, + "504": 1.089692234992981, + "505": 1.169783115386963, + "506": 1.1873635053634644, + "507": 1.1956232786178589, + "508": 1.1792436838150024, + "509": 1.166176676750183, + "510": 1.1644901037216187, + "511": 1.114013910293579, + "512": 1.1182492971420288, + "513": 1.0733829736709595, + "514": 1.024760127067566, + "515": 1.0335193872451782, + "516": 1.029309868812561, + "517": 1.1361197233200073, + "518": 1.2207305431365967, + "519": 1.1736738681793213, + "520": 1.063452959060669, + "521": 1.191510558128357, + "522": 1.1893848180770874, + "523": 1.1825417280197144, + "524": 1.288385033607483, + "525": 1.2547293901443481, + "526": 1.448888897895813, + "527": 1.4466297626495361, + "528": 1.411918044090271, + "529": 1.2146190404891968, + "530": 0.8395581841468811, + "531": 0.7797887325286865, + "532": 0.8696569204330444, + "533": 0.8927386999130249, + "534": 0.8359286189079285, + "535": 0.876964807510376, + "536": 0.9378746151924133, + "537": 1.0051822662353516, + "538": 1.0174076557159424, + "539": 1.0351953506469727 + }, + "loss": { + "486": 2.289457082748413, + "487": 2.258662700653076, + "488": 2.242375373840332, + "489": 2.2333269119262695, + "490": 2.227339506149292, + "491": 2.235215425491333, + "492": 2.2564926147460938, + "493": 2.261200428009033, + "494": 2.2278871536254883, + "495": 2.229905128479004, + "496": 2.2912046909332275, + "497": 2.24263072013855, + "498": 2.264322519302368, + "499": 2.2308273315429688, + "500": 2.2664594650268555, + "501": 2.264845371246338, + "502": 2.2422800064086914, + "503": 2.220738410949707, + "504": 2.2208306789398193, + "505": 2.243136167526245, + "506": 2.261786460876465, + "507": 2.223053455352783, + "508": 2.2746758460998535, + "509": 2.269198179244995, + "510": 2.230855941772461, + "511": 2.254364013671875, + "512": 2.251873016357422, + "513": 2.254303216934204, + "514": 2.2264745235443115, + "515": 2.226069927215576, + "516": 2.2261905670166016, + "517": 2.235260009765625, + "518": 2.2614593505859375, + "519": 2.260850429534912, + "520": 2.2187652587890625, + "521": 2.267563819885254, + "522": 2.2565879821777344, + "523": 2.258462905883789, + "524": 2.284813165664673, + "525": 2.260769844055176, + "526": 2.264866352081299, + "527": 2.297207832336426, + "528": 2.2487082481384277, + "529": 2.2696166038513184, + "530": 2.215879440307617, + "531": 2.190382242202759, + "532": 2.2044198513031006, + "533": 2.2123496532440186, + "534": 2.214869737625122, + "535": 2.216726541519165, + "536": 2.2274861335754395, + "537": 2.209041118621826, + "538": 2.2008519172668457, + "539": 2.2481071949005127 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 1.67698, + 2.12549, + 2.04786, + 1.91564, + 2.16076, + 2.26445, + 2.09732, + 1.95243, + 1.99702, + 1.63251, + 1.58841, + 1.74345, + 1.95069, + 1.82956, + 1.39237, + 1.58145, + 2.0107, + 2.01285, + 1.87028, + 1.63925, + 1.60429, + 1.55511, + 1.63573, + 1.66857, + 1.64513, + 1.81653, + 1.8008, + 1.9566, + 2.12018, + 2.08402, + 2.10121, + 1.73173, + 1.51757, + 1.64126, + 1.96189, + 1.59722, + 1.59517, + 1.61503, + 1.37645, + 1.43601, + 1.07888, + 1.0977, + 1.12801, + 1.53841, + 3.14372, + 3.60218, + 2.91473, + 2.77591, + 3.16964, + 2.88236, + 2.53236, + 2.18632, + 2.12618, + 2.09784 + ], + "train_epoch_time": 5.053409814834595, + "train_loss": 2.2223256973830323, + "train_score": 0.34378474719897234, + "val_loss": 2.2957989156725214, + "val_score": 0.3247748635598083 + }, + { + "epoch": 10, + "grad_norm": 1.1790107488632202, + "learning_rate": 0.215, + "model_norm": 87.6618423461914, + "step_logs": { + "grad_norm": { + "540": 1.1793591976165771, + "541": 1.2544864416122437, + "542": 1.3156967163085938, + "543": 1.3263134956359863, + "544": 1.2598274946212769, + "545": 1.2719249725341797, + "546": 1.3314683437347412, + "547": 1.3810806274414062, + "548": 1.2629423141479492, + "549": 1.1641008853912354, + "550": 1.079391360282898, + "551": 0.9686326384544373, + "552": 0.9809894561767578, + "553": 1.082485556602478, + "554": 1.3549513816833496, + "555": 1.5087933540344238, + "556": 1.3560024499893188, + "557": 1.0993787050247192, + "558": 1.0621819496154785, + "559": 1.0812126398086548, + "560": 1.0281181335449219, + "561": 1.03694486618042, + "562": 1.1006512641906738, + "563": 1.0757622718811035, + "564": 0.9926916360855103, + "565": 1.0858652591705322, + "566": 1.0707896947860718, + "567": 1.0707064867019653, + "568": 1.0729668140411377, + "569": 1.0329747200012207, + "570": 0.9695006012916565, + "571": 0.9426378011703491, + "572": 0.9705097079277039, + "573": 0.9889181852340698, + "574": 1.0636265277862549, + "575": 1.1128720045089722, + "576": 1.1323602199554443, + "577": 1.126505970954895, + "578": 1.0679641962051392, + "579": 1.0324246883392334, + "580": 1.00874662399292, + "581": 1.0246080160140991, + "582": 1.121915340423584, + "583": 1.1432573795318604, + "584": 1.168141484260559, + "585": 1.1352272033691406, + "586": 1.1535664796829224, + "587": 1.191074252128601, + "588": 1.13308584690094, + "589": 1.096796989440918, + "590": 1.1544077396392822, + "591": 1.2780288457870483, + "592": 1.2854341268539429, + "593": 1.1790107488632202 + }, + "loss": { + "540": 2.2375879287719727, + "541": 2.2494208812713623, + "542": 2.232557773590088, + "543": 2.268441677093506, + "544": 2.2331626415252686, + "545": 2.2606444358825684, + "546": 2.2736902236938477, + "547": 2.265397071838379, + "548": 2.2437784671783447, + "549": 2.237973213195801, + "550": 2.2194788455963135, + "551": 2.2054667472839355, + "552": 2.2070603370666504, + "553": 2.2249860763549805, + "554": 2.2266693115234375, + "555": 2.3066585063934326, + "556": 2.258350133895874, + "557": 2.2342143058776855, + "558": 2.187175989151001, + "559": 2.222107410430908, + "560": 2.2103919982910156, + "561": 2.1974058151245117, + "562": 2.223552942276001, + "563": 2.252666473388672, + "564": 2.1936118602752686, + "565": 2.1940817832946777, + "566": 2.229010581970215, + "567": 2.203260660171509, + "568": 2.1924338340759277, + "569": 2.214766502380371, + "570": 2.2222049236297607, + "571": 2.1961467266082764, + "572": 2.192488193511963, + "573": 2.173339366912842, + "574": 2.1902012825012207, + "575": 2.220437526702881, + "576": 2.173555374145508, + "577": 2.213186264038086, + "578": 2.193436622619629, + "579": 2.205944538116455, + "580": 2.1946370601654053, + "581": 2.196967601776123, + "582": 2.181394577026367, + "583": 2.1926252841949463, + "584": 2.2116217613220215, + "585": 2.202755928039551, + "586": 2.2120208740234375, + "587": 2.240696907043457, + "588": 2.2186598777770996, + "589": 2.1977717876434326, + "590": 2.2106475830078125, + "591": 2.215855360031128, + "592": 2.2250189781188965, + "593": 2.1973719596862793 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 1.60875, + 1.42935, + 1.28971, + 1.28954, + 1.40701, + 1.39736, + 1.28254, + 1.1877, + 1.40674, + 1.65148, + 1.90499, + 2.35062, + 2.29343, + 1.89882, + 1.21285, + 1.01327, + 1.2282, + 1.84855, + 1.93859, + 1.90083, + 2.09114, + 2.04361, + 1.83547, + 1.94654, + 2.22603, + 1.86081, + 1.94403, + 1.92187, + 1.90438, + 2.07562, + 2.36422, + 2.47156, + 2.32776, + 2.22232, + 1.936, + 1.79287, + 1.69512, + 1.74402, + 1.92314, + 2.06956, + 2.15674, + 2.09271, + 1.73306, + 1.67755, + 1.62076, + 1.70923, + 1.66228, + 1.57945, + 1.72808, + 1.82696, + 1.65883, + 1.35663, + 1.34659, + 1.58077 + ], + "train_epoch_time": 5.052105665206909, + "train_loss": 2.2066850610921853, + "train_score": 0.34228725790806447, + "val_loss": 2.2768776690508274, + "val_score": 0.32542964275477265 + }, + { + "epoch": 11, + "grad_norm": 1.0811924934387207, + "learning_rate": 0.215, + "model_norm": 87.69290161132812, + "step_logs": { + "grad_norm": { + "594": 1.0722501277923584, + "595": 0.9539421796798706, + "596": 0.8895746469497681, + "597": 0.9736768007278442, + "598": 1.1487165689468384, + "599": 1.2902697324752808, + "600": 1.2751232385635376, + "601": 1.2577886581420898, + "602": 1.2878049612045288, + "603": 1.3745046854019165, + "604": 1.474057912826538, + "605": 1.3831669092178345, + "606": 1.188003420829773, + "607": 1.0976040363311768, + "608": 1.110308289527893, + "609": 1.1269159317016602, + "610": 1.09621262550354, + "611": 1.125464677810669, + "612": 1.1130430698394775, + "613": 1.1233985424041748, + "614": 1.2097125053405762, + "615": 1.1646780967712402, + "616": 1.0226004123687744, + "617": 1.0091562271118164, + "618": 0.9841094613075256, + "619": 1.052068829536438, + "620": 1.039048194885254, + "621": 0.9284295439720154, + "622": 0.930082380771637, + "623": 0.9472982883453369, + "624": 1.0345968008041382, + "625": 1.1250677108764648, + "626": 1.133701205253601, + "627": 1.1700854301452637, + "628": 1.1502556800842285, + "629": 1.1888290643692017, + "630": 1.3324466943740845, + "631": 1.4147093296051025, + "632": 1.409759283065796, + "633": 1.324647307395935, + "634": 1.196407437324524, + "635": 1.1432737112045288, + "636": 1.0334744453430176, + "637": 1.0733283758163452, + "638": 1.0704445838928223, + "639": 1.1816291809082031, + "640": 1.2707067728042603, + "641": 1.4233193397521973, + "642": 1.510169506072998, + "643": 1.4230880737304688, + "644": 1.5449576377868652, + "645": 1.3111509084701538, + "646": 1.1607167720794678, + "647": 1.0811924934387207 + }, + "loss": { + "594": 2.197610378265381, + "595": 2.1663661003112793, + "596": 2.1908884048461914, + "597": 2.1659035682678223, + "598": 2.1972155570983887, + "599": 2.1681125164031982, + "600": 2.222126007080078, + "601": 2.2042784690856934, + "602": 2.209777593612671, + "603": 2.2139716148376465, + "604": 2.2378392219543457, + "605": 2.250018835067749, + "606": 2.227654218673706, + "607": 2.2098264694213867, + "608": 2.2035820484161377, + "609": 2.1683459281921387, + "610": 2.1792078018188477, + "611": 2.1931228637695312, + "612": 2.1854279041290283, + "613": 2.1973283290863037, + "614": 2.193040132522583, + "615": 2.2128329277038574, + "616": 2.211945056915283, + "617": 2.1756386756896973, + "618": 2.191995620727539, + "619": 2.1691370010375977, + "620": 2.1774208545684814, + "621": 2.1583166122436523, + "622": 2.1612367630004883, + "623": 2.1501541137695312, + "624": 2.1886472702026367, + "625": 2.179947853088379, + "626": 2.1892282962799072, + "627": 2.1607470512390137, + "628": 2.1942410469055176, + "629": 2.1435461044311523, + "630": 2.2039132118225098, + "631": 2.18869948387146, + "632": 2.2016348838806152, + "633": 2.2277731895446777, + "634": 2.1970982551574707, + "635": 2.169689178466797, + "636": 2.167137622833252, + "637": 2.184201717376709, + "638": 2.1892950534820557, + "639": 2.166654348373413, + "640": 2.200949192047119, + "641": 2.1958212852478027, + "642": 2.2325479984283447, + "643": 2.2413299083709717, + "644": 2.231419801712036, + "645": 2.235685110092163, + "646": 2.2096056938171387, + "647": 2.195629596710205 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 1.91143, + 2.38061, + 2.76857, + 2.2846, + 1.66513, + 1.30233, + 1.36667, + 1.39332, + 1.33244, + 1.17187, + 1.02991, + 1.17608, + 1.57838, + 1.83428, + 1.78748, + 1.70744, + 1.81346, + 1.73141, + 1.76406, + 1.74111, + 1.49859, + 1.63131, + 2.11525, + 2.13634, + 2.26336, + 1.95974, + 2.01684, + 2.5039, + 2.49839, + 2.39605, + 2.04472, + 1.72222, + 1.70331, + 1.57822, + 1.65842, + 1.51668, + 1.24135, + 1.09358, + 1.10778, + 1.26961, + 1.53494, + 1.65996, + 2.02902, + 1.89595, + 1.91063, + 1.55177, + 1.36307, + 1.08391, + 0.978925, + 1.10673, + 0.934863, + 1.30048, + 1.64007, + 1.87825 + ], + "train_epoch_time": 5.052819490432739, + "train_loss": 2.1614820974287037, + "train_score": 0.3638685885084582, + "val_loss": 2.241458268171063, + "val_score": 0.34152554541860464 + }, + { + "epoch": 12, + "grad_norm": 0.7322170734405518, + "learning_rate": 0.215, + "model_norm": 87.72135162353516, + "step_logs": { + "grad_norm": { + "648": 0.9811676740646362, + "649": 0.9268051385879517, + "650": 0.9433252215385437, + "651": 1.0863995552062988, + "652": 1.1166901588439941, + "653": 1.0618934631347656, + "654": 1.0624192953109741, + "655": 1.0678328275680542, + "656": 1.0174188613891602, + "657": 1.0399116277694702, + "658": 1.1021108627319336, + "659": 1.0501272678375244, + "660": 1.0812186002731323, + "661": 1.113978385925293, + "662": 1.1203151941299438, + "663": 1.1313939094543457, + "664": 1.147111177444458, + "665": 1.193229079246521, + "666": 1.1617342233657837, + "667": 1.014167070388794, + "668": 0.9099019765853882, + "669": 0.8943407535552979, + "670": 0.9062685966491699, + "671": 0.9609559774398804, + "672": 1.0396455526351929, + "673": 1.0577466487884521, + "674": 0.9902991056442261, + "675": 0.8730791807174683, + "676": 0.7829840779304504, + "677": 0.7391128540039062, + "678": 0.726824164390564, + "679": 0.7180324196815491, + "680": 0.789458155632019, + "681": 0.7767745852470398, + "682": 0.7667245268821716, + "683": 0.747543215751648, + "684": 0.7859933376312256, + "685": 0.863521158695221, + "686": 0.8415840864181519, + "687": 0.7546394467353821, + "688": 0.7119998931884766, + "689": 0.6587206125259399, + "690": 0.649011492729187, + "691": 0.594395637512207, + "692": 0.607739269733429, + "693": 0.6333566308021545, + "694": 0.7254647612571716, + "695": 0.7646143436431885, + "696": 0.7775688767433167, + "697": 0.7838239073753357, + "698": 0.7927706837654114, + "699": 0.8128005862236023, + "700": 0.7663552165031433, + "701": 0.7322170734405518 + }, + "loss": { + "648": 2.1460654735565186, + "649": 2.1604654788970947, + "650": 2.1466236114501953, + "651": 2.152681589126587, + "652": 2.1853973865509033, + "653": 2.1641101837158203, + "654": 2.13713002204895, + "655": 2.157498598098755, + "656": 2.1578383445739746, + "657": 2.1604483127593994, + "658": 2.15325665473938, + "659": 2.1440532207489014, + "660": 2.136319160461426, + "661": 2.17118501663208, + "662": 2.1742184162139893, + "663": 2.168506622314453, + "664": 2.1592202186584473, + "665": 2.169806957244873, + "666": 2.1680572032928467, + "667": 2.1269326210021973, + "668": 2.1224851608276367, + "669": 2.14237642288208, + "670": 2.1322193145751953, + "671": 2.109828472137451, + "672": 2.133904457092285, + "673": 2.1641364097595215, + "674": 2.1350114345550537, + "675": 2.1151628494262695, + "676": 2.14634108543396, + "677": 2.1125316619873047, + "678": 2.111654281616211, + "679": 2.1284406185150146, + "680": 2.1131863594055176, + "681": 2.0897419452667236, + "682": 2.116541862487793, + "683": 2.1236226558685303, + "684": 2.111642599105835, + "685": 2.112362861633301, + "686": 2.1085355281829834, + "687": 2.093629837036133, + "688": 2.109797477722168, + "689": 2.0911669731140137, + "690": 2.084482192993164, + "691": 2.1057138442993164, + "692": 2.0671777725219727, + "693": 2.0651800632476807, + "694": 2.086766242980957, + "695": 2.0856857299804688, + "696": 2.1058921813964844, + "697": 2.084188938140869, + "698": 2.0881385803222656, + "699": 2.0810317993164062, + "700": 2.105480670928955, + "701": 2.092444896697998 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 2.22924, + 2.51519, + 2.41231, + 1.8239, + 1.75253, + 1.91919, + 1.89339, + 1.8921, + 2.08458, + 1.9978, + 1.77274, + 1.94425, + 1.82742, + 1.74962, + 1.7323, + 1.69408, + 1.64091, + 1.52396, + 1.60641, + 2.06792, + 2.56363, + 2.67849, + 2.59608, + 2.28476, + 1.97426, + 1.93429, + 2.17705, + 2.77483, + 3.50101, + 3.86707, + 3.99728, + 4.12832, + 3.39062, + 3.4634, + 3.60038, + 3.80019, + 3.41808, + 2.83284, + 2.97705, + 3.67638, + 4.1618, + 4.81932, + 4.94873, + 5.96003, + 5.59684, + 5.14827, + 3.96498, + 3.5675, + 3.48304, + 3.39235, + 3.32249, + 3.15, + 3.58502, + 3.90278 + ], + "train_epoch_time": 5.051898241043091, + "train_loss": 2.0858033783637637, + "train_score": 0.38239105089836173, + "val_loss": 2.176868250668391, + "val_score": 0.3553252372758123 + }, + { + "epoch": 13, + "grad_norm": 0.5446042418479919, + "learning_rate": 0.14333333333333334, + "model_norm": 87.7396240234375, + "step_logs": { + "grad_norm": { + "702": 0.7106263637542725, + "703": 0.7023528814315796, + "704": 0.7346948981285095, + "705": 0.7453676462173462, + "706": 0.6841105818748474, + "707": 0.6793532371520996, + "708": 0.7205890417098999, + "709": 0.7325961589813232, + "710": 0.739450216293335, + "711": 0.6701182723045349, + "712": 0.6422028541564941, + "713": 0.625672459602356, + "714": 0.6575274467468262, + "715": 0.728140652179718, + "716": 0.7375804781913757, + "717": 0.6998003125190735, + "718": 0.640377402305603, + "719": 0.6396369338035583, + "720": 0.6164888739585876, + "721": 0.6106836199760437, + "722": 0.6095193028450012, + "723": 0.57877516746521, + "724": 0.5720216035842896, + "725": 0.5472336411476135, + "726": 0.5757449865341187, + "727": 0.540033757686615, + "728": 0.5368126630783081, + "729": 0.5741632580757141, + "730": 0.5675048828125, + "731": 0.5887230038642883, + "732": 0.6098219156265259, + "733": 0.5937517881393433, + "734": 0.530218243598938, + "735": 0.5037831664085388, + "736": 0.5180294513702393, + "737": 0.5833373665809631, + "738": 0.5581859350204468, + "739": 0.46934571862220764, + "740": 0.4947464168071747, + "741": 0.5710729956626892, + "742": 0.5602770447731018, + "743": 0.5280522704124451, + "744": 0.5501554608345032, + "745": 0.5401403903961182, + "746": 0.5467321872711182, + "747": 0.5148298144340515, + "748": 0.5362414121627808, + "749": 0.5347321033477783, + "750": 0.5060610771179199, + "751": 0.47633087635040283, + "752": 0.4503069519996643, + "753": 0.4799935519695282, + "754": 0.46059876680374146, + "755": 0.5446042418479919 + }, + "loss": { + "702": 2.082811117172241, + "703": 2.0821421146392822, + "704": 2.084503173828125, + "705": 2.0871059894561768, + "706": 2.071005344390869, + "707": 2.081273078918457, + "708": 2.075662136077881, + "709": 2.0994250774383545, + "710": 2.08886981010437, + "711": 2.081561803817749, + "712": 2.0550527572631836, + "713": 2.081940174102783, + "714": 2.0638108253479004, + "715": 2.1018009185791016, + "716": 2.0605666637420654, + "717": 2.040350914001465, + "718": 2.064635992050171, + "719": 2.0932199954986572, + "720": 2.041184425354004, + "721": 2.053959846496582, + "722": 2.0766549110412598, + "723": 2.0492029190063477, + "724": 2.0703554153442383, + "725": 2.096932888031006, + "726": 2.0610103607177734, + "727": 2.062887191772461, + "728": 2.0565528869628906, + "729": 2.0642240047454834, + "730": 2.0523338317871094, + "731": 2.0799221992492676, + "732": 2.049804925918579, + "733": 2.0646867752075195, + "734": 2.0541014671325684, + "735": 2.0829780101776123, + "736": 2.0456130504608154, + "737": 2.0479836463928223, + "738": 2.0667760372161865, + "739": 2.069713830947876, + "740": 2.0498552322387695, + "741": 2.089901924133301, + "742": 2.043478012084961, + "743": 2.041771411895752, + "744": 2.0558619499206543, + "745": 2.0418753623962402, + "746": 2.0727176666259766, + "747": 2.060298442840576, + "748": 2.041529655456543, + "749": 2.0278160572052, + "750": 2.0660321712493896, + "751": 2.0380566120147705, + "752": 2.071171760559082, + "753": 2.065018653869629, + "754": 2.056190013885498, + "755": 2.0613954067230225 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 4.12446, + 4.22085, + 3.86179, + 3.75667, + 4.42516, + 4.5096, + 3.99743, + 3.91175, + 3.82027, + 4.63539, + 4.98286, + 5.31832, + 4.77356, + 3.96425, + 3.78763, + 4.16636, + 5.03468, + 5.1162, + 5.37071, + 5.50756, + 5.58971, + 6.11738, + 6.32732, + 7.00227, + 6.21756, + 7.07349, + 7.13665, + 6.26161, + 6.37249, + 6.00102, + 5.51197, + 5.85658, + 7.30655, + 8.20724, + 7.6228, + 6.01848, + 6.63339, + 9.3956, + 8.37448, + 6.4083, + 6.50975, + 7.3224, + 6.7924, + 6.99868, + 6.93411, + 7.77325, + 7.09962, + 7.09179, + 8.06736, + 8.98253, + 10.2141, + 8.963, + 9.6921, + 6.95023 + ], + "train_epoch_time": 5.058979749679565, + "train_loss": 2.045722785331255, + "train_score": 0.39361437410329986, + "val_loss": 2.140639336867393, + "val_score": 0.3675283445419592 + }, + { + "epoch": 14, + "grad_norm": 0.44176968932151794, + "learning_rate": 0.07166666666666667, + "model_norm": 87.74591064453125, + "step_logs": { + "grad_norm": { + "756": 0.4906125068664551, + "757": 0.5034204721450806, + "758": 0.5374881029129028, + "759": 0.5053876638412476, + "760": 0.5014017820358276, + "761": 0.494512677192688, + "762": 0.4802466630935669, + "763": 0.4998571276664734, + "764": 0.516618013381958, + "765": 0.46685874462127686, + "766": 0.4829357862472534, + "767": 0.4740074574947357, + "768": 0.4797145128250122, + "769": 0.5078187584877014, + "770": 0.5147002339363098, + "771": 0.4584689438343048, + "772": 0.45266568660736084, + "773": 0.48620399832725525, + "774": 0.48905470967292786, + "775": 0.5051054954528809, + "776": 0.4816376268863678, + "777": 0.42868202924728394, + "778": 0.4547256827354431, + "779": 0.5143576860427856, + "780": 0.4641590118408203, + "781": 0.480656236410141, + "782": 0.43557465076446533, + "783": 0.4607539474964142, + "784": 0.4573465883731842, + "785": 0.44665324687957764, + "786": 0.45431259274482727, + "787": 0.46867701411247253, + "788": 0.4666171073913574, + "789": 0.4550279378890991, + "790": 0.46211832761764526, + "791": 0.4761371910572052, + "792": 0.42929545044898987, + "793": 0.4465051591396332, + "794": 0.45080915093421936, + "795": 0.42503631114959717, + "796": 0.4344958961009979, + "797": 0.4685482084751129, + "798": 0.4361921548843384, + "799": 0.43827101588249207, + "800": 0.47122254967689514, + "801": 0.47953304648399353, + "802": 0.429753839969635, + "803": 0.4799472391605377, + "804": 0.451869934797287, + "805": 0.439447820186615, + "806": 0.43792325258255005, + "807": 0.4672706127166748, + "808": 0.4483935236930847, + "809": 0.44176968932151794 + }, + "loss": { + "756": 2.0622403621673584, + "757": 2.0562312602996826, + "758": 2.058318614959717, + "759": 2.063681125640869, + "760": 2.0369791984558105, + "761": 2.0508737564086914, + "762": 2.0467677116394043, + "763": 2.062342643737793, + "764": 2.0008726119995117, + "765": 2.0311782360076904, + "766": 2.051344633102417, + "767": 2.0478522777557373, + "768": 2.0625946521759033, + "769": 2.017423391342163, + "770": 2.0727338790893555, + "771": 2.017502784729004, + "772": 2.0456480979919434, + "773": 2.0360159873962402, + "774": 2.041134834289551, + "775": 2.025242328643799, + "776": 2.0499439239501953, + "777": 2.0376076698303223, + "778": 2.0448663234710693, + "779": 2.0544586181640625, + "780": 2.0328807830810547, + "781": 2.005777359008789, + "782": 2.0554420948028564, + "783": 2.007248878479004, + "784": 2.0513405799865723, + "785": 2.044337749481201, + "786": 2.0244295597076416, + "787": 2.0545248985290527, + "788": 2.049156665802002, + "789": 1.9959447383880615, + "790": 2.015410900115967, + "791": 2.0546536445617676, + "792": 2.061164379119873, + "793": 2.021644115447998, + "794": 2.0457210540771484, + "795": 2.0294415950775146, + "796": 2.0455422401428223, + "797": 1.9957976341247559, + "798": 2.0266478061676025, + "799": 2.0487868785858154, + "800": 2.0228381156921387, + "801": 2.0152206420898438, + "802": 2.0473623275756836, + "803": 2.0365805625915527, + "804": 2.057499408721924, + "805": 2.0447020530700684, + "806": 2.041956901550293, + "807": 2.0415797233581543, + "808": 2.0305683612823486, + "809": 2.017944574356079 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 8.56766, + 8.11354, + 7.12484, + 8.07966, + 8.10242, + 8.38656, + 8.87442, + 8.25409, + 7.49688, + 9.31917, + 8.79549, + 9.1144, + 8.96289, + 7.82311, + 7.82411, + 9.5983, + 9.98334, + 8.61279, + 8.53408, + 7.93803, + 8.83692, + 11.0879, + 9.88931, + 7.76546, + 9.43579, + 8.68188, + 10.8338, + 9.45503, + 9.80724, + 10.2474, + 9.80829, + 9.35328, + 9.41139, + 9.6399, + 9.43751, + 9.06305, + 11.1841, + 10.1403, + 10.0661, + 11.2337, + 10.8352, + 9.09092, + 10.6518, + 10.6662, + 9.1098, + 8.76366, + 11.0855, + 8.84127, + 10.0766, + 10.588, + 10.6476, + 9.35038, + 10.0995, + 10.3399 + ], + "train_epoch_time": 5.0532004833221436, + "train_loss": 2.0328163226331495, + "train_score": 0.3969411315158587, + "val_loss": 2.131256647142833, + "val_score": 0.3697393443499586 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:27:11.321219", + "final_model_norm": 87.74591064453125, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:25:26.519759", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 8.82536792755127, + "learning_rate": 4.64e-11, + "model_norm": 87.40768432617188, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.300286769866943, + "3": 8.448266983032227, + "4": 16.9385929107666, + "5": 5.521918296813965, + "6": 3.9200997352600098, + "7": 3.599931001663208, + "8": 5.704758644104004, + "9": 5.834868431091309, + "10": 5.412257194519043, + "11": 12.190019607543945, + "12": 10.025042533874512, + "13": 6.54962682723999, + "14": 28.818603515625, + "15": 4.159427165985107, + "16": 36.858619689941406, + "17": 4.056296348571777, + "18": 7.1390380859375, + "19": 9.596250534057617, + "20": 4.975948333740234, + "21": 7.607447624206543, + "22": 15.746626853942871, + "23": 4.523006439208984, + "24": 20.08755874633789, + "25": 25.93598175048828, + "26": 5.108270168304443, + "27": 8.370865821838379, + "28": 13.677657127380371, + "29": 7.280508995056152, + "30": 3.9361441135406494, + "31": 13.632987022399902, + "32": 9.50937271118164, + "33": 4.104121208190918, + "34": 7.985281467437744, + "35": 6.304378509521484, + "36": 12.142670631408691, + "37": 3.0258395671844482, + "38": 11.962531089782715, + "39": 7.152475833892822, + "40": 5.270654678344727, + "41": 7.082338809967041, + "42": 3.53645920753479, + "43": 7.500590801239014, + "44": 12.024969100952148, + "45": 6.4233293533325195, + "46": 6.324449062347412, + "47": 3.1137545108795166, + "48": 10.045760154724121, + "49": 5.26921272277832, + "50": 4.198294639587402, + "51": 5.467690467834473, + "52": 8.350181579589844, + "53": 8.82536792755127 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.8053386211395264, + "3": 3.8723838329315186, + "4": 4.444986343383789, + "5": 4.119450569152832, + "6": 3.5325722694396973, + "7": 3.6846227645874023, + "8": 3.6134226322174072, + "9": 4.427856922149658, + "10": 4.080832004547119, + "11": 4.590441703796387, + "12": 5.791167259216309, + "13": 5.108011722564697, + "14": 4.309202194213867, + "15": 3.5151607990264893, + "16": 4.652713775634766, + "17": 3.707624912261963, + "18": 3.6932244300842285, + "19": 3.9449338912963867, + "20": 3.7334671020507812, + "21": 5.518805980682373, + "22": 4.755866050720215, + "23": 4.560586929321289, + "24": 5.46610689163208, + "25": 4.7802863121032715, + "26": 3.646597385406494, + "27": 4.823602676391602, + "28": 4.182075023651123, + "29": 3.780635118484497, + "30": 4.1041646003723145, + "31": 4.27890157699585, + "32": 4.1620893478393555, + "33": 3.46358585357666, + "34": 5.6740546226501465, + "35": 3.9396560192108154, + "36": 5.884576797485352, + "37": 3.360581159591675, + "38": 4.597044944763184, + "39": 4.134245872497559, + "40": 3.536829710006714, + "41": 4.366776466369629, + "42": 3.3245744705200195, + "43": 4.319300651550293, + "44": 4.683053016662598, + "45": 3.8527963161468506, + "46": 4.030945777893066, + "47": 3.3693461418151855, + "48": 4.704986572265625, + "49": 3.9191548824310303, + "50": 3.571779727935791, + "51": 5.254242420196533, + "52": 4.4730963706970215, + "53": 4.387752532958984 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 0.00839976, + 0.00857642, + 0.0958678, + 0.0542554, + 0.0154923, + 0.135101, + 0.229878, + 0.284318, + 0.111031, + 0.130056, + 0.139313, + 0.030892, + 0.0576227, + 0.119075, + 0.00518861, + 0.203179, + 0.00342474, + 0.225339, + 0.0724647, + 0.0428387, + 0.150786, + 0.0953602, + 0.0191803, + 0.222929, + 0.0135464, + 0.00710638, + 0.139746, + 0.0688384, + 0.0223547, + 0.0713249, + 0.264901, + 0.0230224, + 0.0460265, + 0.20563, + 0.0889842, + 0.0991228, + 0.0399105, + 0.367048, + 0.0321242, + 0.0808134, + 0.127317, + 0.0870578, + 0.265827, + 0.0767755, + 0.0323863, + 0.0933804, + 0.100777, + 0.347518, + 0.0466222, + 0.141157, + 0.202646, + 0.175753, + 0.0641529, + 0.0563348 + ], + "train_epoch_time": 5.05444073677063, + "train_loss": 4.594941640550129, + "train_score": 0.07883227225099096, + "val_loss": 4.602305918144988, + "val_score": 0.07776173228990334 + }, + { + "epoch": 1, + "grad_norm": 3.006953239440918, + "learning_rate": 0.464, + "model_norm": 87.36406707763672, + "step_logs": { + "grad_norm": { + "54": 4.710721969604492, + "55": 6.39650297164917, + "56": 5.843128681182861, + "57": 3.7431066036224365, + "58": 10.560835838317871, + "59": 4.017078876495361, + "60": 10.557452201843262, + "61": 8.319095611572266, + "62": 4.053964614868164, + "63": 9.202042579650879, + "64": 4.659931182861328, + "65": 12.972692489624023, + "66": 3.0958261489868164, + "67": 6.036882400512695, + "68": 5.621063232421875, + "69": 3.284179210662842, + "70": 5.690109729766846, + "71": 4.0513787269592285, + "72": 4.812616348266602, + "73": 11.357001304626465, + "74": 3.325056552886963, + "75": 3.4481284618377686, + "76": 5.75864839553833, + "77": 5.69787073135376, + "78": 2.842493772506714, + "79": 5.701220512390137, + "80": 6.768085479736328, + "81": 3.201327085494995, + "82": 6.8929667472839355, + "83": 6.23445463180542, + "84": 3.2729835510253906, + "85": 3.375899076461792, + "86": 5.4267473220825195, + "87": 4.822790145874023, + "88": 2.607414722442627, + "89": 4.681608200073242, + "90": 3.2702584266662598, + "91": 10.841818809509277, + "92": 3.875659942626953, + "93": 4.503223896026611, + "94": 3.666914224624634, + "95": 3.1708147525787354, + "96": 9.237199783325195, + "97": 3.3563873767852783, + "98": 9.311427116394043, + "99": 3.0541188716888428, + "100": 5.967686653137207, + "101": 2.543062210083008, + "102": 10.831096649169922, + "103": 8.295923233032227, + "104": 6.005552291870117, + "105": 3.4593918323516846, + "106": 4.312873840332031, + "107": 3.006953239440918 + }, + "loss": { + "54": 4.604720115661621, + "55": 4.238933086395264, + "56": 3.8177826404571533, + "57": 3.3083527088165283, + "58": 4.512346267700195, + "59": 3.6355762481689453, + "60": 4.134289741516113, + "61": 3.8532001972198486, + "62": 3.693216323852539, + "63": 4.743278503417969, + "64": 3.793639659881592, + "65": 5.383148193359375, + "66": 3.3732831478118896, + "67": 3.5797066688537598, + "68": 4.2783660888671875, + "69": 3.5555572509765625, + "70": 4.0370612144470215, + "71": 3.8692026138305664, + "72": 3.649075984954834, + "73": 5.1226959228515625, + "74": 3.494163751602173, + "75": 3.435516595840454, + "76": 4.286025047302246, + "77": 3.748389482498169, + "78": 3.6159849166870117, + "79": 3.966820240020752, + "80": 4.193337440490723, + "81": 3.1364057064056396, + "82": 4.423172950744629, + "83": 3.787367343902588, + "84": 3.4963293075561523, + "85": 3.8199548721313477, + "86": 4.016044616699219, + "87": 4.007709503173828, + "88": 3.229917049407959, + "89": 4.222854137420654, + "90": 3.899428129196167, + "91": 5.141974449157715, + "92": 3.5960309505462646, + "93": 3.7712361812591553, + "94": 3.76520037651062, + "95": 3.376819133758545, + "96": 4.155144214630127, + "97": 3.5652108192443848, + "98": 4.161769866943359, + "99": 3.2475061416625977, + "100": 4.360395908355713, + "101": 3.0919129848480225, + "102": 5.570976734161377, + "103": 4.026264190673828, + "104": 3.7307796478271484, + "105": 3.5170247554779053, + "106": 3.3530328273773193, + "107": 3.46724534034729 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.207505, + 0.103603, + 0.11182, + 0.236128, + 0.0404581, + 0.225296, + 0.0370922, + 0.0556762, + 0.224722, + 0.0560157, + 0.174702, + 0.0319872, + 0.351965, + 0.098225, + 0.135407, + 0.329651, + 0.124688, + 0.23573, + 0.157551, + 0.0397165, + 0.316042, + 0.288952, + 0.129245, + 0.115457, + 0.447536, + 0.122041, + 0.0915436, + 0.306036, + 0.093094, + 0.0974407, + 0.326381, + 0.335181, + 0.13637, + 0.172306, + 0.475085, + 0.192671, + 0.364617, + 0.0437447, + 0.239404, + 0.185967, + 0.280018, + 0.335866, + 0.0486974, + 0.316476, + 0.0480005, + 0.348159, + 0.122437, + 0.478094, + 0.0474883, + 0.0585023, + 0.103441, + 0.293884, + 0.180262, + 0.38347 + ], + "train_epoch_time": 5.0525500774383545, + "train_loss": 4.562017924611161, + "train_score": 0.18486145980645458, + "val_loss": 4.528683683765468, + "val_score": 0.1867689794500441 + }, + { + "epoch": 2, + "grad_norm": 2.1464927196502686, + "learning_rate": 0.464, + "model_norm": 87.26619720458984, + "step_logs": { + "grad_norm": { + "108": 8.169489860534668, + "109": 4.130614280700684, + "110": 5.649086952209473, + "111": 2.59594988822937, + "112": 4.144364356994629, + "113": 2.533385992050171, + "114": 3.6351304054260254, + "115": 3.638923406600952, + "116": 5.83383846282959, + "117": 2.0029103755950928, + "118": 3.1203951835632324, + "119": 7.157638072967529, + "120": 2.707301139831543, + "121": 8.3097505569458, + "122": 2.2818338871002197, + "123": 3.313976526260376, + "124": 3.3985631465911865, + "125": 2.8124184608459473, + "126": 5.011419773101807, + "127": 3.0676419734954834, + "128": 5.723419666290283, + "129": 2.9939911365509033, + "130": 2.884540557861328, + "131": 3.2379729747772217, + "132": 4.2943267822265625, + "133": 2.302279233932495, + "134": 4.502897262573242, + "135": 2.9996230602264404, + "136": 3.02902889251709, + "137": 4.490320682525635, + "138": 2.1783552169799805, + "139": 3.811655282974243, + "140": 2.590667247772217, + "141": 3.2014966011047363, + "142": 6.4773993492126465, + "143": 2.298563003540039, + "144": 5.1533002853393555, + "145": 1.9343879222869873, + "146": 2.8834545612335205, + "147": 2.548436403274536, + "148": 3.1128015518188477, + "149": 2.6498889923095703, + "150": 3.1814181804656982, + "151": 2.720179557800293, + "152": 10.12575626373291, + "153": 2.546569585800171, + "154": 5.204450607299805, + "155": 3.207531452178955, + "156": 2.602947950363159, + "157": 2.8123319149017334, + "158": 2.6033408641815186, + "159": 2.2779171466827393, + "160": 2.269800901412964, + "161": 2.1464927196502686 + }, + "loss": { + "108": 4.580419540405273, + "109": 3.487710952758789, + "110": 3.9428746700286865, + "111": 3.545706033706665, + "112": 3.683506488800049, + "113": 3.107248306274414, + "114": 3.8686742782592773, + "115": 3.341705799102783, + "116": 4.002868175506592, + "117": 3.345228672027588, + "118": 3.464733839035034, + "119": 4.078404426574707, + "120": 3.628619909286499, + "121": 4.028453826904297, + "122": 3.2160584926605225, + "123": 3.4308042526245117, + "124": 3.641704559326172, + "125": 3.5471291542053223, + "126": 3.857940196990967, + "127": 3.4921181201934814, + "128": 4.167585372924805, + "129": 3.240133762359619, + "130": 3.1896281242370605, + "131": 3.7312583923339844, + "132": 3.6839218139648438, + "133": 3.2522432804107666, + "134": 3.948781967163086, + "135": 3.5439393520355225, + "136": 3.437025547027588, + "137": 3.670748710632324, + "138": 3.453272819519043, + "139": 3.491819381713867, + "140": 3.388313055038452, + "141": 3.403357982635498, + "142": 3.9253406524658203, + "143": 3.3482656478881836, + "144": 3.880375862121582, + "145": 3.125821113586426, + "146": 3.3218133449554443, + "147": 3.2574081420898438, + "148": 3.747436761856079, + "149": 3.327773094177246, + "150": 3.937485456466675, + "151": 3.4126486778259277, + "152": 4.637243270874023, + "153": 3.2354354858398438, + "154": 4.095819473266602, + "155": 3.3196070194244385, + "156": 3.292613983154297, + "157": 3.6738102436065674, + "158": 3.430298328399658, + "159": 3.264312744140625, + "160": 3.2965221405029297, + "161": 3.218731164932251 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.0686302, + 0.204414, + 0.123554, + 0.526151, + 0.21446, + 0.484143, + 0.292767, + 0.252361, + 0.117615, + 0.833878, + 0.355837, + 0.0796069, + 0.495072, + 0.0583395, + 0.617669, + 0.31239, + 0.315293, + 0.448453, + 0.153615, + 0.37109, + 0.127225, + 0.361461, + 0.383342, + 0.355884, + 0.199765, + 0.613574, + 0.194751, + 0.39387, + 0.374607, + 0.182054, + 0.727735, + 0.240339, + 0.504848, + 0.332049, + 0.0935569, + 0.633734, + 0.146118, + 0.835366, + 0.39953, + 0.501562, + 0.386751, + 0.473913, + 0.389025, + 0.461208, + 0.0452277, + 0.498909, + 0.151214, + 0.32266, + 0.48597, + 0.464498, + 0.506139, + 0.629095, + 0.639853, + 0.698596 + ], + "train_epoch_time": 5.051715135574341, + "train_loss": 3.2515053442595168, + "train_score": 0.15544409075033033, + "val_loss": 3.2648093703146235, + "val_score": 0.1537699128162163 + }, + { + "epoch": 3, + "grad_norm": 0.7859781384468079, + "learning_rate": 0.464, + "model_norm": 87.29029846191406, + "step_logs": { + "grad_norm": { + "162": 2.9996471405029297, + "163": 1.7292149066925049, + "164": 1.6292093992233276, + "165": 1.736675500869751, + "166": 1.515742540359497, + "167": 1.1343401670455933, + "168": 1.131856918334961, + "169": 1.1080681085586548, + "170": 1.2317570447921753, + "171": 1.1229251623153687, + "172": 1.0073604583740234, + "173": 1.0934464931488037, + "174": 1.1346526145935059, + "175": 0.8990886807441711, + "176": 0.9201200008392334, + "177": 1.358350157737732, + "178": 1.1711368560791016, + "179": 0.750200092792511, + "180": 0.6113574504852295, + "181": 0.6565418243408203, + "182": 0.7366262078285217, + "183": 1.0335490703582764, + "184": 1.047764539718628, + "185": 1.078670620918274, + "186": 1.0676854848861694, + "187": 0.9073778986930847, + "188": 1.008515477180481, + "189": 1.1846050024032593, + "190": 0.9940149188041687, + "191": 0.7675840854644775, + "192": 0.8644276857376099, + "193": 1.1324049234390259, + "194": 0.9839430451393127, + "195": 0.6516187787055969, + "196": 0.7045209407806396, + "197": 1.100314974784851, + "198": 1.2486379146575928, + "199": 0.9664126634597778, + "200": 0.8384034633636475, + "201": 0.8335612416267395, + "202": 0.8664008975028992, + "203": 0.9978999495506287, + "204": 0.954193651676178, + "205": 0.8018909692764282, + "206": 0.7958046793937683, + "207": 0.9081897139549255, + "208": 0.9873657822608948, + "209": 1.0146435499191284, + "210": 0.9648124575614929, + "211": 0.8621169328689575, + "212": 0.820178747177124, + "213": 0.8205053806304932, + "214": 0.8169720768928528, + "215": 0.7859781384468079 + }, + "loss": { + "162": 3.2603979110717773, + "163": 3.1458051204681396, + "164": 3.2391433715820312, + "165": 2.882030963897705, + "166": 3.206644058227539, + "167": 2.762540340423584, + "168": 2.8386037349700928, + "169": 2.7174291610717773, + "170": 2.8106064796447754, + "171": 2.7681338787078857, + "172": 2.7565886974334717, + "173": 2.6932692527770996, + "174": 2.7876548767089844, + "175": 2.6847944259643555, + "176": 2.6747021675109863, + "177": 2.700319528579712, + "178": 2.8367528915405273, + "179": 2.682593822479248, + "180": 2.5992677211761475, + "181": 2.624390125274658, + "182": 2.5951452255249023, + "183": 2.6522347927093506, + "184": 2.749772071838379, + "185": 2.666172504425049, + "186": 2.729923725128174, + "187": 2.6606221199035645, + "188": 2.669532299041748, + "189": 2.6987133026123047, + "190": 2.765218734741211, + "191": 2.6127982139587402, + "192": 2.611755132675171, + "193": 2.6538138389587402, + "194": 2.712456703186035, + "195": 2.596526622772217, + "196": 2.5897269248962402, + "197": 2.6425256729125977, + "198": 2.7527050971984863, + "199": 2.6624763011932373, + "200": 2.655541181564331, + "201": 2.600940227508545, + "202": 2.646070718765259, + "203": 2.6062302589416504, + "204": 2.6927528381347656, + "205": 2.6068761348724365, + "206": 2.6212356090545654, + "207": 2.5866236686706543, + "208": 2.6545803546905518, + "209": 2.623156785964966, + "210": 2.655985116958618, + "211": 2.611877918243408, + "212": 2.6071112155914307, + "213": 2.5948870182037354, + "214": 2.625883102416992, + "215": 2.586777687072754 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 0.362352, + 1.05204, + 1.22033, + 0.955567, + 1.39573, + 2.14695, + 2.21575, + 2.21322, + 1.85246, + 2.19526, + 2.71645, + 2.2526, + 2.16528, + 3.32128, + 3.15927, + 1.4635, + 2.06827, + 4.76651, + 6.95441, + 6.08841, + 4.78264, + 2.48285, + 2.50478, + 2.29145, + 2.39477, + 3.23152, + 2.62464, + 1.92313, + 2.79862, + 4.43459, + 3.49522, + 2.06951, + 2.80171, + 6.11513, + 5.21754, + 2.18266, + 1.76558, + 2.85076, + 3.77787, + 3.74331, + 3.52504, + 2.61721, + 2.95749, + 4.05406, + 4.13898, + 3.13603, + 2.72295, + 2.54799, + 2.85325, + 3.51415, + 3.87563, + 3.85439, + 3.93424, + 4.18734 + ], + "train_epoch_time": 5.054364204406738, + "train_loss": 2.599224467941131, + "train_score": 0.23703707853382938, + "val_loss": 2.6324636979984497, + "val_score": 0.22956282321265317 + }, + { + "epoch": 4, + "grad_norm": 0.7366073727607727, + "learning_rate": 0.464, + "model_norm": 87.323486328125, + "step_logs": { + "grad_norm": { + "216": 0.7751213312149048, + "217": 0.8250643610954285, + "218": 0.8395969271659851, + "219": 0.8602483868598938, + "220": 0.9384766817092896, + "221": 0.9298914670944214, + "222": 0.8511123657226562, + "223": 0.7777748703956604, + "224": 0.7834213972091675, + "225": 0.8141236305236816, + "226": 0.7789618372917175, + "227": 0.7421006560325623, + "228": 0.7813637852668762, + "229": 0.8658016920089722, + "230": 0.8998185396194458, + "231": 0.8682911992073059, + "232": 0.777773380279541, + "233": 0.769269585609436, + "234": 0.8563226461410522, + "235": 0.8227230906486511, + "236": 0.8421036005020142, + "237": 0.9573317170143127, + "238": 0.9203121662139893, + "239": 0.8030405044555664, + "240": 0.7765429615974426, + "241": 0.772923469543457, + "242": 0.7743852138519287, + "243": 0.8424039483070374, + "244": 0.8016358017921448, + "245": 0.7169095873832703, + "246": 0.7090617418289185, + "247": 0.705998957157135, + "248": 0.7767693996429443, + "249": 0.8481795787811279, + "250": 0.8440188765525818, + "251": 0.7840527296066284, + "252": 0.7568413615226746, + "253": 0.7544783353805542, + "254": 0.8051284551620483, + "255": 0.8347389101982117, + "256": 0.8167509436607361, + "257": 0.7878116369247437, + "258": 0.7775871753692627, + "259": 0.7840420007705688, + "260": 0.8864841461181641, + "261": 0.8494069576263428, + "262": 0.712838888168335, + "263": 0.6982460618019104, + "264": 0.6957581639289856, + "265": 0.7341987490653992, + "266": 0.8037201166152954, + "267": 0.9084228873252869, + "268": 0.8392064571380615, + "269": 0.7366073727607727 + }, + "loss": { + "216": 2.611741542816162, + "217": 2.5883846282958984, + "218": 2.6176917552948, + "219": 2.5711255073547363, + "220": 2.6345291137695312, + "221": 2.6057045459747314, + "222": 2.6083571910858154, + "223": 2.5951972007751465, + "224": 2.587825059890747, + "225": 2.572554111480713, + "226": 2.6032943725585938, + "227": 2.5650768280029297, + "228": 2.5970864295959473, + "229": 2.57080340385437, + "230": 2.601482391357422, + "231": 2.600564956665039, + "232": 2.6188318729400635, + "233": 2.552961826324463, + "234": 2.6209750175476074, + "235": 2.5775938034057617, + "236": 2.574636459350586, + "237": 2.5973219871520996, + "238": 2.6412911415100098, + "239": 2.5575971603393555, + "240": 2.5757863521575928, + "241": 2.561328411102295, + "242": 2.5778720378875732, + "243": 2.558349132537842, + "244": 2.600175380706787, + "245": 2.5513784885406494, + "246": 2.551772117614746, + "247": 2.5409252643585205, + "248": 2.5703318119049072, + "249": 2.5635628700256348, + "250": 2.6016480922698975, + "251": 2.5579848289489746, + "252": 2.562375545501709, + "253": 2.5555100440979004, + "254": 2.5685741901397705, + "255": 2.5679335594177246, + "256": 2.5884275436401367, + "257": 2.56412410736084, + "258": 2.5438132286071777, + "259": 2.5496699810028076, + "260": 2.5670204162597656, + "261": 2.595435857772827, + "262": 2.565859794616699, + "263": 2.5291213989257812, + "264": 2.532837390899658, + "265": 2.527437925338745, + "266": 2.552936553955078, + "267": 2.534872055053711, + "268": 2.5888028144836426, + "269": 2.5330333709716797 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 4.34701, + 3.80236, + 3.71344, + 3.47437, + 2.99127, + 3.01343, + 3.60075, + 4.29005, + 4.21642, + 3.88136, + 4.29033, + 4.65773, + 4.25383, + 3.42951, + 3.213, + 3.44935, + 4.32914, + 4.31407, + 3.57428, + 3.80809, + 3.63065, + 2.83401, + 3.1185, + 3.96604, + 4.27148, + 4.28738, + 4.2988, + 3.60511, + 4.04621, + 4.96416, + 5.07544, + 5.09781, + 4.25995, + 3.56343, + 3.65211, + 4.16109, + 4.47335, + 4.48936, + 3.96243, + 3.68538, + 3.88022, + 4.13137, + 4.20714, + 4.14768, + 3.26654, + 3.59732, + 5.04952, + 5.18744, + 5.23228, + 4.68871, + 3.95212, + 3.07171, + 3.67588, + 4.66841 + ], + "train_epoch_time": 5.053727149963379, + "train_loss": 2.5606365554130917, + "train_score": 0.23850990851093057, + "val_loss": 2.5998535243843506, + "val_score": 0.23252278278944277 + }, + { + "epoch": 5, + "grad_norm": 0.7789791822433472, + "learning_rate": 0.464, + "model_norm": 87.36167907714844, + "step_logs": { + "grad_norm": { + "270": 0.7879934310913086, + "271": 0.8151271343231201, + "272": 0.8245845437049866, + "273": 0.7975809574127197, + "274": 0.826008141040802, + "275": 0.8231139779090881, + "276": 0.7795182466506958, + "277": 0.7362702488899231, + "278": 0.7211339473724365, + "279": 0.7161098718643188, + "280": 0.7130730152130127, + "281": 0.7160906195640564, + "282": 0.7397487759590149, + "283": 0.7289568185806274, + "284": 0.6959989070892334, + "285": 0.7171751856803894, + "286": 0.7552029490470886, + "287": 0.875968337059021, + "288": 0.9072176814079285, + "289": 0.7873736619949341, + "290": 0.7143775224685669, + "291": 0.7785099744796753, + "292": 0.8036198019981384, + "293": 0.7516964673995972, + "294": 0.6911051869392395, + "295": 0.7114232778549194, + "296": 0.7727879285812378, + "297": 0.756477415561676, + "298": 0.8052388429641724, + "299": 0.8475075364112854, + "300": 0.953319251537323, + "301": 0.8589737415313721, + "302": 0.6984808444976807, + "303": 0.5516240000724792, + "304": 0.5574950575828552, + "305": 0.5726935863494873, + "306": 0.6276155710220337, + "307": 0.6959097385406494, + "308": 0.709283709526062, + "309": 0.655981719493866, + "310": 0.6116015315055847, + "311": 0.6479474306106567, + "312": 0.6818233132362366, + "313": 0.7761996388435364, + "314": 0.823245108127594, + "315": 0.8815235495567322, + "316": 0.8640546202659607, + "317": 0.7447691559791565, + "318": 0.7237235903739929, + "319": 0.7106289267539978, + "320": 0.7508794665336609, + "321": 0.8465685248374939, + "322": 0.8888311982154846, + "323": 0.7789791822433472 + }, + "loss": { + "270": 2.5435686111450195, + "271": 2.5349855422973633, + "272": 2.587954044342041, + "273": 2.548557758331299, + "274": 2.5630228519439697, + "275": 2.544992446899414, + "276": 2.570162296295166, + "277": 2.5358524322509766, + "278": 2.5405116081237793, + "279": 2.540127754211426, + "280": 2.5498757362365723, + "281": 2.5221893787384033, + "282": 2.542597532272339, + "283": 2.5179834365844727, + "284": 2.5406641960144043, + "285": 2.5211777687072754, + "286": 2.544454574584961, + "287": 2.530001163482666, + "288": 2.5910000801086426, + "289": 2.5405683517456055, + "290": 2.533108711242676, + "291": 2.5320348739624023, + "292": 2.537235736846924, + "293": 2.5382938385009766, + "294": 2.510328769683838, + "295": 2.529064655303955, + "296": 2.5307998657226562, + "297": 2.5230109691619873, + "298": 2.5292470455169678, + "299": 2.5291075706481934, + "300": 2.5657010078430176, + "301": 2.5819036960601807, + "302": 2.546079158782959, + "303": 2.495604991912842, + "304": 2.499277114868164, + "305": 2.488581895828247, + "306": 2.485379457473755, + "307": 2.5085036754608154, + "308": 2.52060604095459, + "309": 2.4949469566345215, + "310": 2.4939627647399902, + "311": 2.5050272941589355, + "312": 2.4954237937927246, + "313": 2.4973912239074707, + "314": 2.5299363136291504, + "315": 2.5229904651641846, + "316": 2.5666661262512207, + "317": 2.5081264972686768, + "318": 2.510794162750244, + "319": 2.504138946533203, + "320": 2.492478847503662, + "321": 2.51399827003479, + "322": 2.5444583892822266, + "323": 2.520963430404663 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 4.09636, + 3.81527, + 3.80615, + 4.00631, + 3.7565, + 3.75635, + 4.22968, + 4.67788, + 4.88528, + 4.95332, + 5.01477, + 4.9186, + 4.64632, + 4.73859, + 5.24481, + 4.90177, + 4.46136, + 3.29719, + 3.14807, + 4.09797, + 4.96362, + 4.17774, + 3.9288, + 4.49218, + 5.25584, + 4.99694, + 4.23777, + 4.40887, + 3.90069, + 3.52111, + 2.82312, + 3.49929, + 5.21871, + 8.20143, + 8.04141, + 7.58765, + 6.30965, + 5.17975, + 5.01031, + 5.798, + 6.66734, + 5.96668, + 5.36785, + 4.14514, + 3.73294, + 3.24674, + 3.43785, + 4.52174, + 4.79364, + 4.95876, + 4.4207, + 3.50785, + 3.22075, + 4.15446 + ], + "train_epoch_time": 5.054446697235107, + "train_loss": 2.4865550700013914, + "train_score": 0.2752634057783157, + "val_loss": 2.5258698274566442, + "val_score": 0.26641432295984296 + }, + { + "epoch": 6, + "grad_norm": 0.7137139439582825, + "learning_rate": 0.464, + "model_norm": 87.4074478149414, + "step_logs": { + "grad_norm": { + "324": 0.6271835565567017, + "325": 0.5162777900695801, + "326": 0.45463573932647705, + "327": 0.48428601026535034, + "328": 0.5417293310165405, + "329": 0.5998635292053223, + "330": 0.7049420475959778, + "331": 1.0166071653366089, + "332": 0.9228875041007996, + "333": 0.691796600818634, + "334": 0.6814365386962891, + "335": 0.6703628897666931, + "336": 0.7252383232116699, + "337": 0.8871849179267883, + "338": 0.879819929599762, + "339": 0.757430911064148, + "340": 0.6660841107368469, + "341": 0.6151532530784607, + "342": 0.670251727104187, + "343": 0.7393510937690735, + "344": 0.7295030355453491, + "345": 0.7082820534706116, + "346": 0.7068021297454834, + "347": 0.6874851584434509, + "348": 0.7131623029708862, + "349": 0.8159708380699158, + "350": 0.8107249736785889, + "351": 0.7199427485466003, + "352": 0.7256243228912354, + "353": 0.6989573240280151, + "354": 0.7428707480430603, + "355": 0.7518157362937927, + "356": 0.6944369077682495, + "357": 0.709665834903717, + "358": 0.8446700572967529, + "359": 0.766253650188446, + "360": 0.6400922536849976, + "361": 0.6063502430915833, + "362": 0.6322147250175476, + "363": 0.7301038503646851, + "364": 0.935541033744812, + "365": 0.9347594976425171, + "366": 0.8508215546607971, + "367": 1.0017222166061401, + "368": 0.8770847916603088, + "369": 0.8094735741615295, + "370": 0.8130273818969727, + "371": 0.8326973915100098, + "372": 0.805537760257721, + "373": 0.7913177609443665, + "374": 0.7684414386749268, + "375": 0.7492300868034363, + "376": 0.8499711751937866, + "377": 0.7137139439582825 + }, + "loss": { + "324": 2.4609580039978027, + "325": 2.486044406890869, + "326": 2.4552671909332275, + "327": 2.456817865371704, + "328": 2.468639612197876, + "329": 2.439281940460205, + "330": 2.4610490798950195, + "331": 2.5160906314849854, + "332": 2.5812013149261475, + "333": 2.487825393676758, + "334": 2.4659054279327393, + "335": 2.457551956176758, + "336": 2.4987754821777344, + "337": 2.497960090637207, + "338": 2.5343010425567627, + "339": 2.4975650310516357, + "340": 2.482572317123413, + "341": 2.4568405151367188, + "342": 2.476325035095215, + "343": 2.4544246196746826, + "344": 2.4693198204040527, + "345": 2.4664196968078613, + "346": 2.4847562313079834, + "347": 2.475238084793091, + "348": 2.4955506324768066, + "349": 2.4817051887512207, + "350": 2.5186798572540283, + "351": 2.4716553688049316, + "352": 2.469747543334961, + "353": 2.458035707473755, + "354": 2.455958127975464, + "355": 2.457535743713379, + "356": 2.4468395709991455, + "357": 2.435042381286621, + "358": 2.4795143604278564, + "359": 2.466731071472168, + "360": 2.4289627075195312, + "361": 2.4095346927642822, + "362": 2.4092440605163574, + "363": 2.430191993713379, + "364": 2.440250873565674, + "365": 2.496913194656372, + "366": 2.4840247631073, + "367": 2.447493553161621, + "368": 2.5323054790496826, + "369": 2.428744316101074, + "370": 2.4415202140808105, + "371": 2.4556589126586914, + "372": 2.4467263221740723, + "373": 2.421767234802246, + "374": 2.431647777557373, + "375": 2.406076669692993, + "376": 2.4108290672302246, + "377": 2.4239096641540527 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 6.25626, + 9.327, + 11.8788, + 10.4754, + 8.41188, + 6.77887, + 4.95237, + 2.43456, + 3.03057, + 5.19832, + 5.31038, + 5.46868, + 4.75079, + 3.17364, + 3.27394, + 4.35342, + 5.59556, + 6.49248, + 5.51229, + 4.49002, + 4.64005, + 4.91648, + 4.9738, + 5.23709, + 4.9067, + 3.72736, + 3.832, + 4.76861, + 4.69061, + 5.03138, + 4.45035, + 4.34787, + 5.07388, + 4.83503, + 3.4753, + 4.20123, + 5.92838, + 6.55369, + 6.0277, + 4.55902, + 2.7881, + 2.85761, + 3.43146, + 2.43909, + 3.2918, + 3.70661, + 3.6936, + 3.54155, + 3.77063, + 3.8675, + 4.11793, + 4.28627, + 3.33701, + 4.75848 + ], + "train_epoch_time": 5.054785490036011, + "train_loss": 2.387372040919628, + "train_score": 0.287844108603093, + "val_loss": 2.4200390195463335, + "val_score": 0.28176126547850644 + }, + { + "epoch": 7, + "grad_norm": 0.7057314515113831, + "learning_rate": 0.464, + "model_norm": 87.4571304321289, + "step_logs": { + "grad_norm": { + "378": 0.6375489234924316, + "379": 0.8574041724205017, + "380": 0.9480672478675842, + "381": 1.0023635625839233, + "382": 0.8999370336532593, + "383": 0.7423200607299805, + "384": 0.7463111877441406, + "385": 0.8375042676925659, + "386": 0.9662033319473267, + "387": 0.8519492745399475, + "388": 0.7708650231361389, + "389": 0.8508585691452026, + "390": 0.7910328507423401, + "391": 0.8769818544387817, + "392": 0.7972829937934875, + "393": 0.649968147277832, + "394": 0.6651025414466858, + "395": 0.7307669520378113, + "396": 0.879423975944519, + "397": 0.9184058308601379, + "398": 1.015202283859253, + "399": 0.9219325184822083, + "400": 0.8425905704498291, + "401": 0.9114477038383484, + "402": 0.8427307605743408, + "403": 0.6758178472518921, + "404": 0.6432573795318604, + "405": 0.6658771634101868, + "406": 0.7152196168899536, + "407": 0.7890692353248596, + "408": 1.0599589347839355, + "409": 0.8242237567901611, + "410": 0.9556509852409363, + "411": 1.124696969985962, + "412": 1.0104082822799683, + "413": 0.8460323214530945, + "414": 0.7274619936943054, + "415": 0.8518417477607727, + "416": 1.1824973821640015, + "417": 1.078783631324768, + "418": 0.7930795550346375, + "419": 0.6923499703407288, + "420": 0.7985798716545105, + "421": 0.7141899466514587, + "422": 0.5975543856620789, + "423": 0.6306990385055542, + "424": 0.803292989730835, + "425": 0.9265243411064148, + "426": 1.0233440399169922, + "427": 0.7907835245132446, + "428": 0.5657371282577515, + "429": 0.5865322947502136, + "430": 0.6819055676460266, + "431": 0.7057314515113831 + }, + "loss": { + "378": 2.388603687286377, + "379": 2.380326271057129, + "380": 2.4764041900634766, + "381": 2.406127691268921, + "382": 2.4708681106567383, + "383": 2.38834810256958, + "384": 2.3860855102539062, + "385": 2.380411148071289, + "386": 2.4289848804473877, + "387": 2.437363386154175, + "388": 2.4080100059509277, + "389": 2.3924927711486816, + "390": 2.4215683937072754, + "391": 2.381993293762207, + "392": 2.427151679992676, + "393": 2.3666839599609375, + "394": 2.3613901138305664, + "395": 2.342604398727417, + "396": 2.4052934646606445, + "397": 2.409193992614746, + "398": 2.4350767135620117, + "399": 2.414005756378174, + "400": 2.4030091762542725, + "401": 2.3974595069885254, + "402": 2.4292213916778564, + "403": 2.3631086349487305, + "404": 2.3518929481506348, + "405": 2.3557896614074707, + "406": 2.370086193084717, + "407": 2.3623900413513184, + "408": 2.3709654808044434, + "409": 2.4475955963134766, + "410": 2.4127299785614014, + "411": 2.4374356269836426, + "412": 2.445082187652588, + "413": 2.4067864418029785, + "414": 2.368389129638672, + "415": 2.3836231231689453, + "416": 2.405555248260498, + "417": 2.504934787750244, + "418": 2.368527889251709, + "419": 2.375561237335205, + "420": 2.331961154937744, + "421": 2.3814916610717773, + "422": 2.328709363937378, + "423": 2.329042673110962, + "424": 2.345515251159668, + "425": 2.390723705291748, + "426": 2.384331703186035, + "427": 2.4061336517333984, + "428": 2.3169169425964355, + "429": 2.314420700073242, + "430": 2.3164196014404297, + "431": 2.3172173500061035 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 5.87648, + 3.23791, + 2.75514, + 2.39479, + 3.05088, + 4.33426, + 4.28397, + 3.39373, + 2.60188, + 3.35809, + 4.0523, + 3.30473, + 3.86997, + 3.09713, + 3.81832, + 5.60217, + 5.33815, + 4.38673, + 3.11008, + 2.85629, + 2.36269, + 2.84014, + 3.38472, + 2.88594, + 3.4205, + 5.17398, + 5.68392, + 5.3131, + 4.63325, + 3.79421, + 2.11031, + 3.60288, + 2.64186, + 1.92691, + 2.39497, + 3.36251, + 4.47541, + 3.28488, + 1.72034, + 2.15242, + 3.76569, + 4.95581, + 3.65666, + 4.66898, + 6.52169, + 5.85508, + 3.63488, + 2.78494, + 2.27679, + 3.84773, + 7.23904, + 6.72757, + 4.9816, + 4.65252 + ], + "train_epoch_time": 5.0608069896698, + "train_loss": 2.3236168134161184, + "train_score": 0.3147282997368056, + "val_loss": 2.370218958564522, + "val_score": 0.30544991440532127 + }, + { + "epoch": 8, + "grad_norm": 0.7709482312202454, + "learning_rate": 0.464, + "model_norm": 87.51288604736328, + "step_logs": { + "grad_norm": { + "432": 0.7742019891738892, + "433": 0.8048451542854309, + "434": 0.8584392070770264, + "435": 0.9014288783073425, + "436": 0.9329814910888672, + "437": 0.7818324565887451, + "438": 0.6735311150550842, + "439": 0.7207943797111511, + "440": 0.8359708189964294, + "441": 1.0091593265533447, + "442": 0.9774025082588196, + "443": 0.9620204567909241, + "444": 1.1691254377365112, + "445": 0.9814362525939941, + "446": 0.8438099026679993, + "447": 0.676818311214447, + "448": 0.754360556602478, + "449": 0.7340006232261658, + "450": 0.734951376914978, + "451": 0.7800947427749634, + "452": 0.7486482262611389, + "453": 0.7244609594345093, + "454": 0.7954311966896057, + "455": 0.8835601210594177, + "456": 0.8508532047271729, + "457": 0.8095985651016235, + "458": 0.855453610420227, + "459": 0.8926812410354614, + "460": 0.8820421099662781, + "461": 0.820753276348114, + "462": 0.7099539637565613, + "463": 0.6721706390380859, + "464": 0.7528849840164185, + "465": 0.8814135193824768, + "466": 0.8815662860870361, + "467": 0.7790881395339966, + "468": 0.7338931560516357, + "469": 0.7409322261810303, + "470": 0.7974900603294373, + "471": 0.8311624526977539, + "472": 0.8229005336761475, + "473": 0.8401023745536804, + "474": 0.8360876441001892, + "475": 0.8216034173965454, + "476": 0.8300091624259949, + "477": 0.8260737657546997, + "478": 1.0198419094085693, + "479": 0.8955154418945312, + "480": 0.8397411108016968, + "481": 1.089308738708496, + "482": 0.9894914627075195, + "483": 0.8322555422782898, + "484": 0.8540805578231812, + "485": 0.7709482312202454 + }, + "loss": { + "432": 2.3349180221557617, + "433": 2.3573460578918457, + "434": 2.3331007957458496, + "435": 2.385509967803955, + "436": 2.362790107727051, + "437": 2.3727855682373047, + "438": 2.3391332626342773, + "439": 2.3124806880950928, + "440": 2.353797674179077, + "441": 2.3675384521484375, + "442": 2.386098861694336, + "443": 2.3554799556732178, + "444": 2.3905320167541504, + "445": 2.422302007675171, + "446": 2.3350419998168945, + "447": 2.3125033378601074, + "448": 2.2823407649993896, + "449": 2.3381686210632324, + "450": 2.318347930908203, + "451": 2.322920083999634, + "452": 2.290309429168701, + "453": 2.3206136226654053, + "454": 2.3229427337646484, + "455": 2.345212459564209, + "456": 2.3333492279052734, + "457": 2.3499529361724854, + "458": 2.3212950229644775, + "459": 2.3592188358306885, + "460": 2.3115272521972656, + "461": 2.342606544494629, + "462": 2.287214994430542, + "463": 2.299330949783325, + "464": 2.286092758178711, + "465": 2.3428871631622314, + "466": 2.3253884315490723, + "467": 2.3341774940490723, + "468": 2.284053087234497, + "469": 2.3118135929107666, + "470": 2.2840709686279297, + "471": 2.315932512283325, + "472": 2.3071727752685547, + "473": 2.3133482933044434, + "474": 2.297257900238037, + "475": 2.2959394454956055, + "476": 2.2795228958129883, + "477": 2.307526111602783, + "478": 2.3175907135009766, + "479": 2.36483097076416, + "480": 2.3247885704040527, + "481": 2.3374457359313965, + "482": 2.3501124382019043, + "483": 2.3068530559539795, + "484": 2.3033838272094727, + "485": 2.3385672569274902 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 3.8955, + 3.63914, + 3.16603, + 2.93574, + 2.71443, + 3.88178, + 5.15632, + 4.45098, + 3.36811, + 2.32476, + 2.49771, + 2.54513, + 1.74893, + 2.5148, + 3.27948, + 5.04822, + 4.01072, + 4.33993, + 4.29202, + 3.81715, + 4.08638, + 4.42153, + 3.67141, + 3.00407, + 3.22308, + 3.58525, + 3.17203, + 2.96057, + 2.97112, + 3.47756, + 4.53781, + 5.08912, + 4.03308, + 3.01573, + 2.99217, + 3.84557, + 4.24073, + 4.2111, + 3.59136, + 3.35239, + 3.4071, + 3.27776, + 3.28629, + 3.40123, + 3.30886, + 3.3815, + 2.22829, + 2.94886, + 3.2968, + 1.96988, + 2.40029, + 3.33048, + 3.15768, + 3.93459 + ], + "train_epoch_time": 5.0537073612213135, + "train_loss": 2.280266422520751, + "train_score": 0.33841911751878484, + "val_loss": 2.3427316233703928, + "val_score": 0.324007965393373 + }, + { + "epoch": 9, + "grad_norm": 1.2686938047409058, + "learning_rate": 0.464, + "model_norm": 87.5705795288086, + "step_logs": { + "grad_norm": { + "486": 0.7851659059524536, + "487": 0.8902822732925415, + "488": 0.9662135243415833, + "489": 0.9832495450973511, + "490": 0.8648109436035156, + "491": 0.7941068410873413, + "492": 0.7695399522781372, + "493": 0.7362545132637024, + "494": 0.7355393171310425, + "495": 0.8485167622566223, + "496": 0.8725716471672058, + "497": 0.7784942984580994, + "498": 0.7393256425857544, + "499": 0.7618834376335144, + "500": 0.7379514575004578, + "501": 0.763367235660553, + "502": 0.7525189518928528, + "503": 0.6701223850250244, + "504": 0.721518874168396, + "505": 0.7934356331825256, + "506": 0.8603341579437256, + "507": 0.9990874528884888, + "508": 1.0042060613632202, + "509": 0.8474621772766113, + "510": 0.749873161315918, + "511": 0.6934238076210022, + "512": 0.7034761309623718, + "513": 0.8546044826507568, + "514": 0.8088599443435669, + "515": 0.6858404874801636, + "516": 0.6820484399795532, + "517": 0.7325760722160339, + "518": 0.8107496500015259, + "519": 0.8114433884620667, + "520": 0.7677037715911865, + "521": 0.7214632034301758, + "522": 0.6353814005851746, + "523": 0.7185224294662476, + "524": 0.8090643286705017, + "525": 0.7631515860557556, + "526": 0.7186344861984253, + "527": 0.8085775375366211, + "528": 0.9327294826507568, + "529": 0.9190691709518433, + "530": 0.9887126088142395, + "531": 0.9865207672119141, + "532": 0.9222756028175354, + "533": 0.8012776374816895, + "534": 0.6898269653320312, + "535": 0.6873668432235718, + "536": 0.7523880004882812, + "537": 0.8484282493591309, + "538": 1.0232796669006348, + "539": 1.2686938047409058 + }, + "loss": { + "486": 2.2811126708984375, + "487": 2.3104922771453857, + "488": 2.311701536178589, + "489": 2.3457655906677246, + "490": 2.2997727394104004, + "491": 2.3074018955230713, + "492": 2.2888741493225098, + "493": 2.26469087600708, + "494": 2.2830967903137207, + "495": 2.292107582092285, + "496": 2.310253620147705, + "497": 2.282397985458374, + "498": 2.2857651710510254, + "499": 2.224403142929077, + "500": 2.279804229736328, + "501": 2.271515130996704, + "502": 2.291240930557251, + "503": 2.265056848526001, + "504": 2.2835335731506348, + "505": 2.2513251304626465, + "506": 2.279625177383423, + "507": 2.294560432434082, + "508": 2.319828510284424, + "509": 2.2903213500976562, + "510": 2.287503719329834, + "511": 2.233738899230957, + "512": 2.2568113803863525, + "513": 2.2923905849456787, + "514": 2.3041679859161377, + "515": 2.2597708702087402, + "516": 2.233388900756836, + "517": 2.2521467208862305, + "518": 2.2564029693603516, + "519": 2.276836395263672, + "520": 2.2647783756256104, + "521": 2.255136251449585, + "522": 2.237454891204834, + "523": 2.2669026851654053, + "524": 2.258902072906494, + "525": 2.272812843322754, + "526": 2.254998207092285, + "527": 2.226747989654541, + "528": 2.261782169342041, + "529": 2.2746810913085938, + "530": 2.3026933670043945, + "531": 2.3066163063049316, + "532": 2.287996292114258, + "533": 2.289806842803955, + "534": 2.2100539207458496, + "535": 2.2068634033203125, + "536": 2.2271270751953125, + "537": 2.24106502532959, + "538": 2.2470970153808594, + "539": 2.2986319065093994 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 3.70019, + 2.91507, + 2.4762, + 2.42637, + 3.07498, + 3.65902, + 3.86509, + 4.17785, + 4.22, + 3.18357, + 3.03429, + 3.766, + 4.18177, + 3.8321, + 4.18641, + 3.89806, + 4.04609, + 5.04395, + 4.38644, + 3.57614, + 3.07985, + 2.29875, + 2.30044, + 3.18901, + 4.06805, + 4.64553, + 4.56033, + 3.13876, + 3.52182, + 4.80417, + 4.80102, + 4.19654, + 3.43276, + 3.45792, + 3.84272, + 4.33256, + 5.54224, + 4.39089, + 3.45089, + 3.90249, + 4.36647, + 3.40587, + 2.5998, + 2.69292, + 2.35557, + 2.37008, + 2.68989, + 3.56642, + 4.64432, + 4.67088, + 3.93424, + 3.11332, + 2.14602, + 1.42809 + ], + "train_epoch_time": 5.053997039794922, + "train_loss": 2.3445621073844616, + "train_score": 0.32579582139443464, + "val_loss": 2.4186753881785132, + "val_score": 0.3054364599398713 + }, + { + "epoch": 10, + "grad_norm": 0.9750382900238037, + "learning_rate": 0.464, + "model_norm": 87.63328552246094, + "step_logs": { + "grad_norm": { + "540": 1.1835196018218994, + "541": 0.94350665807724, + "542": 0.9540534019470215, + "543": 1.0197839736938477, + "544": 1.2302792072296143, + "545": 1.2041865587234497, + "546": 1.120396375656128, + "547": 0.8945721983909607, + "548": 0.7712628245353699, + "549": 0.7412620782852173, + "550": 0.7946042418479919, + "551": 0.8093903064727783, + "552": 0.7556217908859253, + "553": 0.7436901926994324, + "554": 0.7568050026893616, + "555": 0.7321219444274902, + "556": 0.7226343750953674, + "557": 0.7480760216712952, + "558": 0.7279438972473145, + "559": 0.7623870372772217, + "560": 0.8186307549476624, + "561": 0.7554833889007568, + "562": 0.7386659383773804, + "563": 0.7012243866920471, + "564": 0.6587784290313721, + "565": 0.7076345086097717, + "566": 0.7385809421539307, + "567": 0.7746321558952332, + "568": 0.7839486002922058, + "569": 0.7919442653656006, + "570": 0.7336534261703491, + "571": 0.6945879459381104, + "572": 0.7922754287719727, + "573": 0.8719609975814819, + "574": 0.8545740246772766, + "575": 0.8933283090591431, + "576": 0.9756830334663391, + "577": 0.9009069204330444, + "578": 0.8197300434112549, + "579": 0.7380638122558594, + "580": 0.6490558981895447, + "581": 0.6385520100593567, + "582": 0.7874158620834351, + "583": 0.8764427304267883, + "584": 0.8372117877006531, + "585": 0.7458502650260925, + "586": 0.7517497539520264, + "587": 0.7369568347930908, + "588": 0.7382214069366455, + "589": 0.7276107668876648, + "590": 0.7043472528457642, + "591": 0.766448974609375, + "592": 0.8357090353965759, + "593": 0.9750382900238037 + }, + "loss": { + "540": 2.3539493083953857, + "541": 2.2981619834899902, + "542": 2.2958292961120605, + "543": 2.322713613510132, + "544": 2.2743759155273438, + "545": 2.34297776222229, + "546": 2.334174156188965, + "547": 2.297511100769043, + "548": 2.248453140258789, + "549": 2.232665538787842, + "550": 2.2525105476379395, + "551": 2.2676198482513428, + "552": 2.2209653854370117, + "553": 2.232907772064209, + "554": 2.2223243713378906, + "555": 2.2527878284454346, + "556": 2.2135627269744873, + "557": 2.2180428504943848, + "558": 2.1899607181549072, + "559": 2.2403485774993896, + "560": 2.1900129318237305, + "561": 2.2425734996795654, + "562": 2.2281856536865234, + "563": 2.2081387042999268, + "564": 2.2237088680267334, + "565": 2.1757054328918457, + "566": 2.1926674842834473, + "567": 2.2095744609832764, + "568": 2.2260971069335938, + "569": 2.2344472408294678, + "570": 2.2290022373199463, + "571": 2.2139875888824463, + "572": 2.2216098308563232, + "573": 2.253282308578491, + "574": 2.2164113521575928, + "575": 2.213837146759033, + "576": 2.2379891872406006, + "577": 2.257227897644043, + "578": 2.2344202995300293, + "579": 2.2295289039611816, + "580": 2.1947102546691895, + "581": 2.185819625854492, + "582": 2.213284492492676, + "583": 2.2145490646362305, + "584": 2.2354774475097656, + "585": 2.1978392601013184, + "586": 2.1795501708984375, + "587": 2.1730775833129883, + "588": 2.193183183670044, + "589": 2.1779942512512207, + "590": 2.195094585418701, + "591": 2.188117504119873, + "592": 2.2057876586914062, + "593": 2.2147738933563232 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 1.68053, + 2.58161, + 2.52229, + 2.23347, + 1.50264, + 1.61577, + 1.85947, + 2.87096, + 3.77989, + 4.06331, + 3.56751, + 3.46142, + 3.88985, + 4.03726, + 3.88007, + 4.20294, + 4.23891, + 3.9635, + 4.13276, + 3.85447, + 3.26791, + 3.92913, + 4.08371, + 4.49068, + 5.12388, + 4.34492, + 4.01954, + 3.68229, + 3.62217, + 3.56271, + 4.14122, + 4.58903, + 3.53928, + 2.96361, + 3.03495, + 2.77411, + 2.35093, + 2.78109, + 3.32524, + 4.09284, + 5.2097, + 5.3607, + 3.56968, + 2.88296, + 3.18933, + 3.95087, + 3.85674, + 4.00121, + 4.0244, + 4.11395, + 4.42466, + 3.72481, + 3.1583, + 2.32963 + ], + "train_epoch_time": 5.053994417190552, + "train_loss": 2.26130206978304, + "train_score": 0.32888495330317974, + "val_loss": 2.342782899514953, + "val_score": 0.30547233848018845 + }, + { + "epoch": 11, + "grad_norm": 0.8273022174835205, + "learning_rate": 0.464, + "model_norm": 87.70013427734375, + "step_logs": { + "grad_norm": { + "594": 1.0164622068405151, + "595": 0.978763222694397, + "596": 0.9974223971366882, + "597": 0.9824926257133484, + "598": 0.9369617700576782, + "599": 0.9179539680480957, + "600": 0.9198263883590698, + "601": 0.9800050854682922, + "602": 0.9910167455673218, + "603": 1.0632083415985107, + "604": 0.9760513305664062, + "605": 0.9206082224845886, + "606": 0.9623233079910278, + "607": 0.944540798664093, + "608": 0.7439101338386536, + "609": 0.679442822933197, + "610": 0.7358267903327942, + "611": 0.8010754585266113, + "612": 0.7818032503128052, + "613": 0.7321988344192505, + "614": 0.8337776064872742, + "615": 0.9823498129844666, + "616": 0.9329684972763062, + "617": 0.975547194480896, + "618": 0.9363654851913452, + "619": 0.7973529696464539, + "620": 0.7889009714126587, + "621": 0.8207709789276123, + "622": 0.7937414646148682, + "623": 0.7389844059944153, + "624": 0.7226306200027466, + "625": 0.7109134197235107, + "626": 0.7178105711936951, + "627": 0.7838050127029419, + "628": 0.8162763118743896, + "629": 0.775202751159668, + "630": 0.6732639074325562, + "631": 0.6817336082458496, + "632": 0.7645159363746643, + "633": 0.8073098063468933, + "634": 0.931305468082428, + "635": 0.9899008274078369, + "636": 0.9269702434539795, + "637": 0.8649294972419739, + "638": 0.7406699657440186, + "639": 0.7121018767356873, + "640": 0.7555528879165649, + "641": 0.777006208896637, + "642": 0.7744395732879639, + "643": 0.8030886054039001, + "644": 0.8008630275726318, + "645": 0.7854599952697754, + "646": 0.805654764175415, + "647": 0.8273022174835205 + }, + "loss": { + "594": 2.2600948810577393, + "595": 2.260560989379883, + "596": 2.2460503578186035, + "597": 2.2132647037506104, + "598": 2.213008403778076, + "599": 2.1883063316345215, + "600": 2.2109174728393555, + "601": 2.2285168170928955, + "602": 2.251828670501709, + "603": 2.2240641117095947, + "604": 2.242880344390869, + "605": 2.2348902225494385, + "606": 2.2197818756103516, + "607": 2.222428560256958, + "608": 2.182300567626953, + "609": 2.184536933898926, + "610": 2.1794681549072266, + "611": 2.1533432006835938, + "612": 2.201977491378784, + "613": 2.161073684692383, + "614": 2.1864712238311768, + "615": 2.2045974731445312, + "616": 2.236830234527588, + "617": 2.22475528717041, + "618": 2.2685189247131348, + "619": 2.211355686187744, + "620": 2.182935953140259, + "621": 2.196930408477783, + "622": 2.1796860694885254, + "623": 2.153675079345703, + "624": 2.172628402709961, + "625": 2.1341538429260254, + "626": 2.159029483795166, + "627": 2.189436197280884, + "628": 2.1797616481781006, + "629": 2.1593220233917236, + "630": 2.1281826496124268, + "631": 2.1284921169281006, + "632": 2.1576924324035645, + "633": 2.1742377281188965, + "634": 2.159294605255127, + "635": 2.198235034942627, + "636": 2.2072763442993164, + "637": 2.197878837585449, + "638": 2.1755714416503906, + "639": 2.115135669708252, + "640": 2.150275707244873, + "641": 2.1557679176330566, + "642": 2.154815673828125, + "643": 2.165224552154541, + "644": 2.148066759109497, + "645": 2.156641960144043, + "646": 2.171790838241577, + "647": 2.1523656845092773 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 2.18748, + 2.35972, + 2.25767, + 2.29285, + 2.52081, + 2.59697, + 2.61313, + 2.32038, + 2.29284, + 1.96748, + 2.35429, + 2.63698, + 2.397, + 2.49107, + 3.94343, + 4.7321, + 4.02531, + 3.35557, + 3.60262, + 4.03099, + 3.14516, + 2.28453, + 2.5698, + 2.33768, + 2.58733, + 3.47822, + 3.50749, + 3.26116, + 3.45968, + 3.94375, + 4.16057, + 4.22272, + 4.19024, + 3.56382, + 3.27141, + 3.59324, + 4.69503, + 4.57976, + 3.69162, + 3.336, + 2.48959, + 2.24332, + 2.56877, + 2.93794, + 3.96574, + 4.17113, + 3.76673, + 3.5707, + 3.59282, + 3.35719, + 3.34912, + 3.49567, + 3.34595, + 3.14476 + ], + "train_epoch_time": 5.0541229248046875, + "train_loss": 2.1574668594207105, + "train_score": 0.3588986280487805, + "val_loss": 2.25482671898624, + "val_score": 0.33570877648248465 + }, + { + "epoch": 12, + "grad_norm": 0.5213214159011841, + "learning_rate": 0.464, + "model_norm": 87.76070404052734, + "step_logs": { + "grad_norm": { + "648": 0.7569950819015503, + "649": 0.7735424637794495, + "650": 0.792700469493866, + "651": 0.768092930316925, + "652": 0.7337979078292847, + "653": 0.6889377236366272, + "654": 0.714954674243927, + "655": 0.7085017561912537, + "656": 0.6625272631645203, + "657": 0.5941329002380371, + "658": 0.5833716988563538, + "659": 0.6013264656066895, + "660": 0.6253089308738708, + "661": 0.6521468162536621, + "662": 0.6336640119552612, + "663": 0.6292108297348022, + "664": 0.6899257898330688, + "665": 0.7523852586746216, + "666": 0.7956793308258057, + "667": 0.7607225179672241, + "668": 0.7084807753562927, + "669": 0.6430822610855103, + "670": 0.6105523705482483, + "671": 0.6632283926010132, + "672": 0.6920416355133057, + "673": 0.6887260675430298, + "674": 0.6674964427947998, + "675": 0.6178245544433594, + "676": 0.6420121192932129, + "677": 0.7029278874397278, + "678": 0.6824374794960022, + "679": 0.6836779713630676, + "680": 0.6397414207458496, + "681": 0.5898133516311646, + "682": 0.6011072993278503, + "683": 0.6327524185180664, + "684": 0.6570475697517395, + "685": 0.653169572353363, + "686": 0.5740808248519897, + "687": 0.5248444676399231, + "688": 0.52183598279953, + "689": 0.5088307857513428, + "690": 0.5654743313789368, + "691": 0.5853663682937622, + "692": 0.5990533828735352, + "693": 0.596141517162323, + "694": 0.6164954304695129, + "695": 0.6716930866241455, + "696": 0.6152611374855042, + "697": 0.5288187861442566, + "698": 0.5166124701499939, + "699": 0.5450300574302673, + "700": 0.5482425689697266, + "701": 0.5213214159011841 + }, + "loss": { + "648": 2.1507115364074707, + "649": 2.142016887664795, + "650": 2.1239571571350098, + "651": 2.1406073570251465, + "652": 2.1432909965515137, + "653": 2.1183032989501953, + "654": 2.1332972049713135, + "655": 2.136540412902832, + "656": 2.130563735961914, + "657": 2.1043756008148193, + "658": 2.102400779724121, + "659": 2.1108832359313965, + "660": 2.112490177154541, + "661": 2.0898590087890625, + "662": 2.1311960220336914, + "663": 2.1345791816711426, + "664": 2.114157199859619, + "665": 2.1389048099517822, + "666": 2.105990409851074, + "667": 2.086348056793213, + "668": 2.120601177215576, + "669": 2.1081249713897705, + "670": 2.0916836261749268, + "671": 2.1078715324401855, + "672": 2.1157338619232178, + "673": 2.07753324508667, + "674": 2.1002273559570312, + "675": 2.0919601917266846, + "676": 2.053832530975342, + "677": 2.0911052227020264, + "678": 2.0771384239196777, + "679": 2.106577157974243, + "680": 2.091855525970459, + "681": 2.091642379760742, + "682": 2.0712103843688965, + "683": 2.0842442512512207, + "684": 2.0857014656066895, + "685": 2.081791400909424, + "686": 2.0966548919677734, + "687": 2.110111713409424, + "688": 2.063055992126465, + "689": 2.052750587463379, + "690": 2.0683670043945312, + "691": 2.07271146774292, + "692": 2.038562297821045, + "693": 2.030576229095459, + "694": 2.0623843669891357, + "695": 2.085425853729248, + "696": 2.0528316497802734, + "697": 2.0866293907165527, + "698": 2.0487313270568848, + "699": 2.0785648822784424, + "700": 2.0680387020111084, + "701": 2.0458428859710693 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 3.75315, + 3.57977, + 3.38008, + 3.62835, + 3.98041, + 4.46301, + 4.17344, + 4.25627, + 4.85386, + 5.96151, + 6.17767, + 5.83772, + 5.40263, + 4.9139, + 5.30769, + 5.39163, + 4.44153, + 3.77843, + 3.32644, + 3.60524, + 4.22477, + 5.09757, + 5.61113, + 4.79201, + 4.4177, + 4.37981, + 4.71377, + 5.48054, + 4.98286, + 4.23208, + 4.46005, + 4.50686, + 5.1112, + 6.01254, + 5.73219, + 5.20572, + 4.83124, + 4.87961, + 6.36181, + 7.66027, + 7.57605, + 7.92847, + 6.46847, + 6.049, + 5.68058, + 5.71374, + 5.42638, + 4.62225, + 5.42293, + 7.46159, + 7.67636, + 6.99717, + 6.88039, + 7.52768 + ], + "train_epoch_time": 5.060282945632935, + "train_loss": 2.0488108048651106, + "train_score": 0.3966710007173601, + "val_loss": 2.156653976057208, + "val_score": 0.36659102342022 + }, + { + "epoch": 13, + "grad_norm": 0.3689773678779602, + "learning_rate": 0.3093333333333334, + "model_norm": 87.79572296142578, + "step_logs": { + "grad_norm": { + "702": 0.5364833474159241, + "703": 0.5259773135185242, + "704": 0.5002303719520569, + "705": 0.4888346791267395, + "706": 0.5394003391265869, + "707": 0.5595197081565857, + "708": 0.5669735074043274, + "709": 0.5195918679237366, + "710": 0.47284796833992004, + "711": 0.43037548661231995, + "712": 0.4000774323940277, + "713": 0.4237956404685974, + "714": 0.436426043510437, + "715": 0.46994078159332275, + "716": 0.43116238713264465, + "717": 0.4097455143928528, + "718": 0.3929866850376129, + "719": 0.39943069219589233, + "720": 0.3972277343273163, + "721": 0.38414469361305237, + "722": 0.41446948051452637, + "723": 0.4122876822948456, + "724": 0.3981236517429352, + "725": 0.43609535694122314, + "726": 0.4336734414100647, + "727": 0.42594480514526367, + "728": 0.4029075503349304, + "729": 0.3956875503063202, + "730": 0.3929194509983063, + "731": 0.409662663936615, + "732": 0.42363542318344116, + "733": 0.4122909605503082, + "734": 0.4097675085067749, + "735": 0.3712646961212158, + "736": 0.36579084396362305, + "737": 0.3617793917655945, + "738": 0.3636852204799652, + "739": 0.39375123381614685, + "740": 0.34693223237991333, + "741": 0.36366233229637146, + "742": 0.3651773929595947, + "743": 0.3666030466556549, + "744": 0.39372679591178894, + "745": 0.38121268153190613, + "746": 0.3903146982192993, + "747": 0.3828539252281189, + "748": 0.4013423025608063, + "749": 0.3672022223472595, + "750": 0.3636907935142517, + "751": 0.35521090030670166, + "752": 0.40065255761146545, + "753": 0.35991808772087097, + "754": 0.3769945502281189, + "755": 0.3689773678779602 + }, + "loss": { + "702": 2.034242868423462, + "703": 2.041623115539551, + "704": 2.015681266784668, + "705": 2.019620418548584, + "706": 2.045684576034546, + "707": 2.0498275756835938, + "708": 2.0647382736206055, + "709": 2.036111831665039, + "710": 2.041393280029297, + "711": 2.04914927482605, + "712": 2.036518096923828, + "713": 2.046377658843994, + "714": 2.0181939601898193, + "715": 2.0533552169799805, + "716": 2.0251524448394775, + "717": 2.048600196838379, + "718": 2.0302419662475586, + "719": 2.028761863708496, + "720": 2.031588554382324, + "721": 2.032290458679199, + "722": 2.0233216285705566, + "723": 2.0069386959075928, + "724": 2.020716905593872, + "725": 2.009347915649414, + "726": 2.027273178100586, + "727": 2.0027408599853516, + "728": 2.023124933242798, + "729": 2.033870220184326, + "730": 2.041031837463379, + "731": 1.9998836517333984, + "732": 2.045276403427124, + "733": 2.0177018642425537, + "734": 2.018979072570801, + "735": 2.0090126991271973, + "736": 2.0201165676116943, + "737": 1.996307373046875, + "738": 2.008244276046753, + "739": 2.030090093612671, + "740": 2.0360255241394043, + "741": 2.0037169456481934, + "742": 2.0351085662841797, + "743": 2.038628578186035, + "744": 2.0207865238189697, + "745": 2.034184455871582, + "746": 2.0122597217559814, + "747": 2.012249231338501, + "748": 2.008070468902588, + "749": 1.9660768508911133, + "750": 1.9985742568969727, + "751": 1.9919071197509766, + "752": 1.9824224710464478, + "753": 1.9904437065124512, + "754": 1.9913536310195923, + "755": 1.9934518337249756 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 7.0679, + 7.37975, + 8.0553, + 8.45173, + 7.03099, + 6.54767, + 6.42302, + 7.54183, + 9.13027, + 11.0631, + 12.7233, + 11.3939, + 10.596, + 9.29775, + 10.8937, + 12.2019, + 13.146, + 12.7159, + 12.8753, + 13.772, + 11.7782, + 11.8068, + 12.7488, + 10.5656, + 10.7792, + 11.0387, + 12.4627, + 12.9903, + 13.2203, + 11.9166, + 11.3964, + 11.87, + 12.0242, + 14.5752, + 15.0977, + 15.2525, + 15.1833, + 13.094, + 16.9159, + 15.1509, + 15.2609, + 15.1686, + 13.0356, + 13.9977, + 13.2085, + 13.7283, + 12.4666, + 14.5811, + 15.1097, + 15.7869, + 12.3498, + 15.3654, + 14.0113, + 14.6422 + ], + "train_epoch_time": 5.053859710693359, + "train_loss": 2.000632954191101, + "train_score": 0.4104801827985553, + "val_loss": 2.117544219907198, + "val_score": 0.3790273394020773 + }, + { + "epoch": 14, + "grad_norm": 0.3107752501964569, + "learning_rate": 0.1546666666666667, + "model_norm": 87.80721282958984, + "step_logs": { + "grad_norm": { + "756": 0.36391544342041016, + "757": 0.34061774611473083, + "758": 0.3373683989048004, + "759": 0.368813157081604, + "760": 0.37175610661506653, + "761": 0.3224388062953949, + "762": 0.30784475803375244, + "763": 0.3142504096031189, + "764": 0.3739141821861267, + "765": 0.3637954890727997, + "766": 0.34604036808013916, + "767": 0.3342316150665283, + "768": 0.35025209188461304, + "769": 0.33368217945098877, + "770": 0.35340237617492676, + "771": 0.3525720536708832, + "772": 0.3336862325668335, + "773": 0.3644021153450012, + "774": 0.320532888174057, + "775": 0.3363337516784668, + "776": 0.33224961161613464, + "777": 0.33082783222198486, + "778": 0.3233364522457123, + "779": 0.33373716473579407, + "780": 0.34446826577186584, + "781": 0.33136895298957825, + "782": 0.3461458683013916, + "783": 0.3803730607032776, + "784": 0.3377346694469452, + "785": 0.3358157277107239, + "786": 0.32283443212509155, + "787": 0.3645469546318054, + "788": 0.3507978618144989, + "789": 0.3115021884441376, + "790": 0.31787246465682983, + "791": 0.3238396644592285, + "792": 0.3129032850265503, + "793": 0.3185950517654419, + "794": 0.34452077746391296, + "795": 0.329192191362381, + "796": 0.3147062063217163, + "797": 0.3022286295890808, + "798": 0.3186566233634949, + "799": 0.3093697130680084, + "800": 0.3028464913368225, + "801": 0.30358558893203735, + "802": 0.3344113230705261, + "803": 0.29959699511528015, + "804": 0.3260193467140198, + "805": 0.34442007541656494, + "806": 0.3120967447757721, + "807": 0.3306928873062134, + "808": 0.34221121668815613, + "809": 0.3107752501964569 + }, + "loss": { + "756": 2.008708953857422, + "757": 1.992264986038208, + "758": 1.9933116436004639, + "759": 1.99192476272583, + "760": 1.9767273664474487, + "761": 2.0058794021606445, + "762": 2.0074713230133057, + "763": 1.9907655715942383, + "764": 1.9953372478485107, + "765": 1.9932351112365723, + "766": 2.018752098083496, + "767": 1.9948519468307495, + "768": 2.0240020751953125, + "769": 1.9772720336914062, + "770": 1.9919686317443848, + "771": 1.9680653810501099, + "772": 2.0073580741882324, + "773": 1.99609375, + "774": 2.01682710647583, + "775": 2.000037670135498, + "776": 1.9889997243881226, + "777": 1.9887484312057495, + "778": 2.012557029724121, + "779": 1.9965174198150635, + "780": 2.014468193054199, + "781": 1.9837143421173096, + "782": 1.9620920419692993, + "783": 2.006413221359253, + "784": 1.994457721710205, + "785": 2.009927272796631, + "786": 1.964850902557373, + "787": 1.9998902082443237, + "788": 2.0035266876220703, + "789": 2.0017571449279785, + "790": 1.9732887744903564, + "791": 1.9986650943756104, + "792": 1.9936249256134033, + "793": 2.0048770904541016, + "794": 2.004767894744873, + "795": 1.9931751489639282, + "796": 1.9738640785217285, + "797": 1.9997490644454956, + "798": 1.9952969551086426, + "799": 1.9724063873291016, + "800": 1.9803149700164795, + "801": 1.9830210208892822, + "802": 1.9854111671447754, + "803": 1.9689775705337524, + "804": 1.9684094190597534, + "805": 1.9876070022583008, + "806": 1.9952058792114258, + "807": 1.9629908800125122, + "808": 2.017622470855713, + "809": 2.006425380706787 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 15.1676, + 17.1717, + 17.5132, + 14.644, + 14.3031, + 19.2935, + 21.1829, + 20.159, + 14.2716, + 15.0607, + 16.8589, + 17.8573, + 16.4987, + 17.7583, + 15.9494, + 15.8323, + 18.028, + 15.0321, + 19.6301, + 17.6806, + 18.018, + 18.1709, + 19.2504, + 17.9252, + 16.977, + 18.0657, + 16.3757, + 13.8676, + 17.4853, + 17.8229, + 18.8525, + 15.0487, + 16.281, + 20.6295, + 19.5292, + 19.0581, + 20.3621, + 19.7519, + 16.8901, + 18.3927, + 19.93, + 21.893, + 19.65, + 20.6082, + 21.5918, + 21.5162, + 17.7537, + 21.9364, + 18.5195, + 16.7554, + 20.4837, + 17.9502, + 17.2287, + 20.7745 + ], + "train_epoch_time": 5.053775787353516, + "train_loss": 1.9869832968609233, + "train_score": 0.41368252341682293, + "val_loss": 2.108280881811639, + "val_score": 0.38268692625513306 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:28:56.414613", + "final_model_norm": 87.80721282958984, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:27:11.463805", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 9.828468322753906, + "learning_rate": 4.64e-11, + "model_norm": 87.36962890625, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.540498733520508, + "3": 8.915801048278809, + "4": 14.781766891479492, + "5": 5.608191013336182, + "6": 3.5889790058135986, + "7": 5.249990463256836, + "8": 3.8745484352111816, + "9": 4.497948169708252, + "10": 4.748721122741699, + "11": 4.648350715637207, + "12": 20.810983657836914, + "13": 6.429323673248291, + "14": 21.983104705810547, + "15": 3.9209253787994385, + "16": 6.288517951965332, + "17": 13.581155776977539, + "18": 5.620336055755615, + "19": 14.86163330078125, + "20": 9.477866172790527, + "21": 5.467764854431152, + "22": 3.5305988788604736, + "23": 10.763609886169434, + "24": 8.13704776763916, + "25": 5.859228610992432, + "26": 16.31954574584961, + "27": 7.764303207397461, + "28": 8.32188892364502, + "29": 10.810586929321289, + "30": 4.629059314727783, + "31": 7.128204345703125, + "32": 7.402172565460205, + "33": 8.087552070617676, + "34": 10.075281143188477, + "35": 3.894308090209961, + "36": 10.271742820739746, + "37": 4.768221378326416, + "38": 12.201920509338379, + "39": 7.188667297363281, + "40": 6.573385238647461, + "41": 12.998823165893555, + "42": 3.9970970153808594, + "43": 9.127283096313477, + "44": 10.654000282287598, + "45": 4.676833152770996, + "46": 12.401044845581055, + "47": 3.9859025478363037, + "48": 9.84560775756836, + "49": 7.28375768661499, + "50": 4.3989996910095215, + "51": 14.074361801147461, + "52": 5.319989204406738, + "53": 9.828468322753906 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.8002490997314453, + "3": 3.9038262367248535, + "4": 4.260810852050781, + "5": 4.260765075683594, + "6": 3.5228896141052246, + "7": 3.688533306121826, + "8": 4.044903755187988, + "9": 3.5027456283569336, + "10": 3.9566173553466797, + "11": 4.078937530517578, + "12": 4.282462120056152, + "13": 7.085672855377197, + "14": 4.148512840270996, + "15": 3.8898236751556396, + "16": 3.8796591758728027, + "17": 4.587935447692871, + "18": 3.8093762397766113, + "19": 4.920986652374268, + "20": 5.559451580047607, + "21": 4.693474292755127, + "22": 3.4429643154144287, + "23": 4.915096282958984, + "24": 5.12191915512085, + "25": 3.9754562377929688, + "26": 5.782840251922607, + "27": 4.330013275146484, + "28": 4.404635906219482, + "29": 4.462398052215576, + "30": 5.823443412780762, + "31": 4.089975357055664, + "32": 3.8508737087249756, + "33": 5.038377285003662, + "34": 4.2024688720703125, + "35": 3.9904799461364746, + "36": 6.611223220825195, + "37": 5.6425323486328125, + "38": 5.7696852684021, + "39": 4.86868953704834, + "40": 4.1282758712768555, + "41": 5.179817199707031, + "42": 4.502457618713379, + "43": 4.382997989654541, + "44": 5.242732524871826, + "45": 4.001960754394531, + "46": 6.762143135070801, + "47": 4.60350227355957, + "48": 6.042437553405762, + "49": 5.121825695037842, + "50": 4.212311744689941, + "51": 5.92720890045166, + "52": 4.657595157623291, + "53": 5.204258918762207 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 0.00869338, + 0.00874607, + 0.0888363, + 0.04911, + 0.0195002, + 0.13547, + 0.2735, + 0.133825, + 0.269442, + 0.173133, + 0.175457, + 0.188777, + 0.009888, + 0.171416, + 0.00858449, + 0.253019, + 0.0981063, + 0.0248739, + 0.120595, + 0.0222802, + 0.0618886, + 0.156991, + 0.276208, + 0.0424244, + 0.0773569, + 0.115799, + 0.0217133, + 0.0718264, + 0.0636013, + 0.038183, + 0.271766, + 0.0804934, + 0.0702815, + 0.0770294, + 0.041399, + 0.263126, + 0.0626605, + 0.248177, + 0.0387521, + 0.094214, + 0.0955412, + 0.0306554, + 0.281812, + 0.0526124, + 0.0461883, + 0.182965, + 0.0439711, + 0.289758, + 0.0623343, + 0.0965414, + 0.217677, + 0.0299222, + 0.164566, + 0.053875 + ], + "train_epoch_time": 5.054558753967285, + "train_loss": 4.656336928576958, + "train_score": 0.17824381281176804, + "val_loss": 4.700132458683544, + "val_score": 0.17385727603386245 + }, + { + "epoch": 1, + "grad_norm": 1.9219194650650024, + "learning_rate": 0.464, + "model_norm": 87.32701873779297, + "step_logs": { + "grad_norm": { + "54": 8.648448944091797, + "55": 4.578888416290283, + "56": 9.155689239501953, + "57": 4.598766326904297, + "58": 11.530369758605957, + "59": 11.030740737915039, + "60": 4.289649963378906, + "61": 10.072020530700684, + "62": 4.655407905578613, + "63": 6.099062442779541, + "64": 4.3996148109436035, + "65": 5.014732837677002, + "66": 6.065854549407959, + "67": 3.0276389122009277, + "68": 7.581371784210205, + "69": 2.718651533126831, + "70": 12.671808242797852, + "71": 5.8091840744018555, + "72": 5.4534735679626465, + "73": 2.60016131401062, + "74": 8.829605102539062, + "75": 4.44790506362915, + "76": 4.387836933135986, + "77": 5.095001697540283, + "78": 3.3428914546966553, + "79": 6.636009216308594, + "80": 7.214669704437256, + "81": 2.0348479747772217, + "82": 6.88657808303833, + "83": 2.659146785736084, + "84": 5.854611396789551, + "85": 3.974867105484009, + "86": 2.5644941329956055, + "87": 8.266683578491211, + "88": 4.98799467086792, + "89": 6.879557132720947, + "90": 2.684969902038574, + "91": 9.80031681060791, + "92": 7.215325832366943, + "93": 5.047442436218262, + "94": 5.031023025512695, + "95": 4.495306491851807, + "96": 4.4241228103637695, + "97": 3.1164910793304443, + "98": 9.714051246643066, + "99": 3.9311654567718506, + "100": 3.965953826904297, + "101": 3.0835049152374268, + "102": 6.171586990356445, + "103": 2.359495162963867, + "104": 1.8651447296142578, + "105": 2.323331832885742, + "106": 3.661193609237671, + "107": 1.9219194650650024 + }, + "loss": { + "54": 4.683562278747559, + "55": 4.135055065155029, + "56": 4.433767318725586, + "57": 3.9660756587982178, + "58": 6.08804988861084, + "59": 5.184176921844482, + "60": 3.7943224906921387, + "61": 4.71868896484375, + "62": 4.050958633422852, + "63": 4.403265476226807, + "64": 4.339849948883057, + "65": 3.511442184448242, + "66": 4.623457908630371, + "67": 3.6430869102478027, + "68": 4.774730682373047, + "69": 3.583538055419922, + "70": 6.695191383361816, + "71": 4.642904281616211, + "72": 3.9379208087921143, + "73": 4.109834671020508, + "74": 4.738090515136719, + "75": 3.473706007003784, + "76": 3.4843969345092773, + "77": 4.260203838348389, + "78": 3.5664355754852295, + "79": 4.1052961349487305, + "80": 5.061912536621094, + "81": 3.204319715499878, + "82": 4.00563383102417, + "83": 3.421499252319336, + "84": 4.289734840393066, + "85": 3.433037519454956, + "86": 3.4934334754943848, + "87": 4.548451900482178, + "88": 4.164238929748535, + "89": 3.838240623474121, + "90": 3.443056583404541, + "91": 4.547194480895996, + "92": 4.2838826179504395, + "93": 3.8870186805725098, + "94": 4.154099464416504, + "95": 3.80397629737854, + "96": 3.806995153427124, + "97": 3.9605796337127686, + "98": 4.141651153564453, + "99": 3.4924166202545166, + "100": 3.4166598320007324, + "101": 3.3425891399383545, + "102": 4.16901159286499, + "103": 3.720499277114868, + "104": 3.306309223175049, + "105": 3.304769277572632, + "106": 3.4789741039276123, + "107": 3.6573867797851562 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.0626181, + 0.197225, + 0.0528921, + 0.187533, + 0.0457922, + 0.042606, + 0.206201, + 0.0465145, + 0.186914, + 0.118372, + 0.224205, + 0.139634, + 0.125656, + 0.397431, + 0.0830717, + 0.484847, + 0.0416952, + 0.137581, + 0.13241, + 0.607888, + 0.0607744, + 0.175583, + 0.180978, + 0.164113, + 0.319146, + 0.0932246, + 0.0972482, + 0.773877, + 0.0844626, + 0.483873, + 0.125151, + 0.217287, + 0.531189, + 0.0665581, + 0.167372, + 0.0810982, + 0.477601, + 0.0473438, + 0.0822859, + 0.152572, + 0.164121, + 0.188243, + 0.194504, + 0.407781, + 0.0438907, + 0.225987, + 0.217223, + 0.351555, + 0.109456, + 0.668287, + 0.950426, + 0.612236, + 0.259541, + 0.990149 + ], + "train_epoch_time": 5.054613828659058, + "train_loss": 3.2655267616939683, + "train_score": 0.1486504662840746, + "val_loss": 3.273803014295383, + "val_score": 0.14700685266991298 + }, + { + "epoch": 2, + "grad_norm": 0.8656755089759827, + "learning_rate": 0.464, + "model_norm": 87.37055206298828, + "step_logs": { + "grad_norm": { + "108": 2.224764823913574, + "109": 2.4785454273223877, + "110": 1.7006232738494873, + "111": 2.4705331325531006, + "112": 2.2004644870758057, + "113": 1.3159208297729492, + "114": 1.4564001560211182, + "115": 1.6065329313278198, + "116": 1.8932063579559326, + "117": 1.3055025339126587, + "118": 0.9097657799720764, + "119": 1.0402640104293823, + "120": 1.3806099891662598, + "121": 2.1265838146209717, + "122": 1.08148193359375, + "123": 1.3173999786376953, + "124": 1.15640389919281, + "125": 0.6935372948646545, + "126": 0.6353476643562317, + "127": 0.8891350626945496, + "128": 2.159102201461792, + "129": 1.2500455379486084, + "130": 1.0583934783935547, + "131": 0.719110369682312, + "132": 1.1054965257644653, + "133": 1.209761381149292, + "134": 1.1139384508132935, + "135": 1.0394201278686523, + "136": 1.0191543102264404, + "137": 1.1278642416000366, + "138": 1.42232346534729, + "139": 1.1458137035369873, + "140": 0.4841899275779724, + "141": 0.37479013204574585, + "142": 0.43170198798179626, + "143": 0.5522638559341431, + "144": 0.9335963129997253, + "145": 1.1546589136123657, + "146": 1.5090208053588867, + "147": 1.112670660018921, + "148": 0.7939635515213013, + "149": 0.8073285818099976, + "150": 0.9377206563949585, + "151": 1.3339279890060425, + "152": 1.0855871438980103, + "153": 0.6903332471847534, + "154": 0.5326964259147644, + "155": 0.5710505247116089, + "156": 0.6821045875549316, + "157": 0.9781208038330078, + "158": 1.0031172037124634, + "159": 1.0019264221191406, + "160": 0.9743958711624146, + "161": 0.8656755089759827 + }, + "loss": { + "108": 3.2651774883270264, + "109": 3.5514795780181885, + "110": 3.45906925201416, + "111": 3.0252509117126465, + "112": 3.6742281913757324, + "113": 3.1361379623413086, + "114": 2.958592176437378, + "115": 3.0104382038116455, + "116": 2.9714958667755127, + "117": 3.229057788848877, + "118": 2.8667311668395996, + "119": 2.767115831375122, + "120": 2.8680105209350586, + "121": 2.9593679904937744, + "122": 3.189466953277588, + "123": 2.9810893535614014, + "124": 2.914921522140503, + "125": 2.728799343109131, + "126": 2.6786763668060303, + "127": 2.713738441467285, + "128": 2.8392176628112793, + "129": 3.252112627029419, + "130": 2.979386329650879, + "131": 2.6869688034057617, + "132": 2.6942973136901855, + "133": 2.8581767082214355, + "134": 2.738443613052368, + "135": 2.77777099609375, + "136": 2.6899850368499756, + "137": 2.7787766456604004, + "138": 2.7383408546447754, + "139": 2.908726215362549, + "140": 2.6322412490844727, + "141": 2.595581531524658, + "142": 2.5955400466918945, + "143": 2.6038382053375244, + "144": 2.6158382892608643, + "145": 2.7760329246520996, + "146": 2.745797634124756, + "147": 2.921092987060547, + "148": 2.6332855224609375, + "149": 2.639986038208008, + "150": 2.6682796478271484, + "151": 2.692105770111084, + "152": 2.8223981857299805, + "153": 2.6438698768615723, + "154": 2.5921244621276855, + "155": 2.597104787826538, + "156": 2.5918169021606445, + "157": 2.620973825454712, + "158": 2.7008495330810547, + "159": 2.6395912170410156, + "160": 2.702662467956543, + "161": 2.632098436355591 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.659688, + 0.578117, + 1.19603, + 0.495656, + 0.758818, + 1.81107, + 1.39484, + 1.16641, + 0.829047, + 1.89461, + 3.4636, + 2.55706, + 1.50466, + 0.654386, + 2.72696, + 1.71767, + 2.17976, + 5.67325, + 6.63586, + 3.43267, + 0.609049, + 2.0812, + 2.6597, + 5.19603, + 2.2046, + 1.95294, + 2.20689, + 2.57107, + 2.58982, + 2.18444, + 1.3536, + 2.21552, + 11.2278, + 18.4781, + 13.9271, + 8.5373, + 3.00118, + 2.08217, + 1.20581, + 2.35946, + 4.17731, + 4.05043, + 3.03448, + 1.51296, + 2.39491, + 5.54782, + 9.13474, + 7.96417, + 5.57061, + 2.73954, + 2.68409, + 2.62945, + 2.84656, + 3.5123 + ], + "train_epoch_time": 5.060021638870239, + "train_loss": 2.664649004436805, + "train_score": 0.23297278515492142, + "val_loss": 2.700881416022983, + "val_score": 0.2264369263577817 + }, + { + "epoch": 3, + "grad_norm": 0.8106290698051453, + "learning_rate": 0.464, + "model_norm": 87.40774536132812, + "step_logs": { + "grad_norm": { + "162": 0.8817408084869385, + "163": 1.0190868377685547, + "164": 1.0005944967269897, + "165": 0.8770706057548523, + "166": 0.9105992317199707, + "167": 1.1062794923782349, + "168": 1.010089635848999, + "169": 0.6871556639671326, + "170": 0.6717459559440613, + "171": 0.8958699703216553, + "172": 0.9576413035392761, + "173": 1.0030708312988281, + "174": 0.9145611524581909, + "175": 0.7132887244224548, + "176": 0.8106444478034973, + "177": 1.098758578300476, + "178": 1.0777448415756226, + "179": 0.8357540369033813, + "180": 0.7871129512786865, + "181": 0.8443882465362549, + "182": 0.9341241717338562, + "183": 1.0236402750015259, + "184": 0.8844297528266907, + "185": 0.6667125821113586, + "186": 0.7151057720184326, + "187": 0.9944509863853455, + "188": 0.9487119913101196, + "189": 0.7194960713386536, + "190": 0.7447572946548462, + "191": 0.9531127214431763, + "192": 1.022182583808899, + "193": 0.890994668006897, + "194": 0.8254366517066956, + "195": 0.8376187086105347, + "196": 0.8385922908782959, + "197": 0.786694347858429, + "198": 0.8559815287590027, + "199": 1.051024317741394, + "200": 1.0123729705810547, + "201": 0.9012372493743896, + "202": 0.9190186858177185, + "203": 1.050588607788086, + "204": 1.076776385307312, + "205": 0.9972522854804993, + "206": 0.898023247718811, + "207": 0.8465771079063416, + "208": 0.8197812438011169, + "209": 0.7887733578681946, + "210": 0.8892205357551575, + "211": 1.0172501802444458, + "212": 0.9609780311584473, + "213": 0.813028872013092, + "214": 0.7615267038345337, + "215": 0.8106290698051453 + }, + "loss": { + "162": 2.6643261909484863, + "163": 2.629546642303467, + "164": 2.7244205474853516, + "165": 2.6418588161468506, + "166": 2.6596574783325195, + "167": 2.610051155090332, + "168": 2.7604384422302246, + "169": 2.614164352416992, + "170": 2.6069884300231934, + "171": 2.591151237487793, + "172": 2.671731948852539, + "173": 2.6373350620269775, + "174": 2.697756767272949, + "175": 2.591090440750122, + "176": 2.5971221923828125, + "177": 2.642165422439575, + "178": 2.7219491004943848, + "179": 2.636932849884033, + "180": 2.6107211112976074, + "181": 2.6221394538879395, + "182": 2.638202667236328, + "183": 2.628596782684326, + "184": 2.6915245056152344, + "185": 2.5791103839874268, + "186": 2.585062026977539, + "187": 2.5909202098846436, + "188": 2.6705780029296875, + "189": 2.5907747745513916, + "190": 2.5837063789367676, + "191": 2.5773487091064453, + "192": 2.6718831062316895, + "193": 2.611485481262207, + "194": 2.6175007820129395, + "195": 2.5951855182647705, + "196": 2.620110511779785, + "197": 2.575089454650879, + "198": 2.6082868576049805, + "199": 2.6181271076202393, + "200": 2.6842989921569824, + "201": 2.5998175144195557, + "202": 2.6240413188934326, + "203": 2.6005783081054688, + "204": 2.672299385070801, + "205": 2.602001667022705, + "206": 2.6349525451660156, + "207": 2.5763492584228516, + "208": 2.617816925048828, + "209": 2.5412867069244385, + "210": 2.6208558082580566, + "211": 2.60310697555542, + "212": 2.64933180809021, + "213": 2.567422389984131, + "214": 2.581874370574951, + "215": 2.564702033996582 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 3.42693, + 2.53197, + 2.72118, + 3.43432, + 3.20753, + 2.13265, + 2.70557, + 5.53634, + 5.77735, + 3.22852, + 2.91331, + 2.62121, + 3.22535, + 5.09274, + 3.95213, + 2.18854, + 2.34341, + 3.77522, + 4.21392, + 3.67766, + 3.02342, + 2.50859, + 3.4409, + 5.8022, + 5.05511, + 2.61992, + 2.96713, + 5.00464, + 4.65815, + 2.83717, + 2.55718, + 3.28956, + 3.84166, + 3.69893, + 3.72579, + 4.16083, + 3.55981, + 2.37009, + 2.61909, + 3.20084, + 3.10686, + 2.35616, + 2.3048, + 2.61636, + 3.26736, + 3.59477, + 3.89532, + 4.0846, + 3.31455, + 2.51557, + 2.86886, + 3.88406, + 4.4521, + 3.90295 + ], + "train_epoch_time": 5.0556724071502686, + "train_loss": 2.600386273467558, + "train_score": 0.23735876973595477, + "val_loss": 2.641591264514507, + "val_score": 0.23300265490871894 + }, + { + "epoch": 4, + "grad_norm": 0.715764045715332, + "learning_rate": 0.464, + "model_norm": 87.44450378417969, + "step_logs": { + "grad_norm": { + "216": 0.8147035241127014, + "217": 0.7814739942550659, + "218": 0.8018308877944946, + "219": 0.8745168447494507, + "220": 0.848491907119751, + "221": 0.7085331082344055, + "222": 0.6883143186569214, + "223": 0.795415461063385, + "224": 0.8790597319602966, + "225": 0.9082624316215515, + "226": 0.822569727897644, + "227": 0.697720468044281, + "228": 0.7037535309791565, + "229": 0.8077332973480225, + "230": 0.8776323795318604, + "231": 0.9197269082069397, + "232": 0.914128839969635, + "233": 0.8946424126625061, + "234": 0.8728997707366943, + "235": 0.9009494781494141, + "236": 0.8744539022445679, + "237": 0.7535949349403381, + "238": 0.7415274381637573, + "239": 0.8407319784164429, + "240": 0.8422963619232178, + "241": 0.8557510375976562, + "242": 0.902815043926239, + "243": 0.8630161285400391, + "244": 0.7883111238479614, + "245": 0.6763781309127808, + "246": 0.6961337327957153, + "247": 0.7964216470718384, + "248": 0.7997011542320251, + "249": 0.7394702434539795, + "250": 0.7804990410804749, + "251": 0.8331275582313538, + "252": 0.8206280469894409, + "253": 0.7827075719833374, + "254": 0.7920497059822083, + "255": 0.8238981366157532, + "256": 0.7816749811172485, + "257": 0.7070029377937317, + "258": 0.6945733428001404, + "259": 0.7669711709022522, + "260": 0.8645933866500854, + "261": 0.9310718178749084, + "262": 0.9162704944610596, + "263": 0.9239433407783508, + "264": 0.9411354064941406, + "265": 0.9254506230354309, + "266": 0.8275494575500488, + "267": 0.7672973871231079, + "268": 0.7667270302772522, + "269": 0.715764045715332 + }, + "loss": { + "216": 2.6052346229553223, + "217": 2.555436372756958, + "218": 2.601337194442749, + "219": 2.5790903568267822, + "220": 2.6068601608276367, + "221": 2.5617146492004395, + "222": 2.5569586753845215, + "223": 2.555171489715576, + "224": 2.617776870727539, + "225": 2.58357310295105, + "226": 2.6194419860839844, + "227": 2.5379624366760254, + "228": 2.5633058547973633, + "229": 2.555050849914551, + "230": 2.6009440422058105, + "231": 2.5609793663024902, + "232": 2.626481056213379, + "233": 2.584388256072998, + "234": 2.57894229888916, + "235": 2.56315016746521, + "236": 2.6240882873535156, + "237": 2.549525737762451, + "238": 2.5621232986450195, + "239": 2.5661332607269287, + "240": 2.5973246097564697, + "241": 2.5558292865753174, + "242": 2.62863826751709, + "243": 2.5682787895202637, + "244": 2.5877485275268555, + "245": 2.541343927383423, + "246": 2.536112070083618, + "247": 2.5410687923431396, + "248": 2.568793535232544, + "249": 2.5299339294433594, + "250": 2.550713062286377, + "251": 2.5351595878601074, + "252": 2.567039966583252, + "253": 2.542923927307129, + "254": 2.560943603515625, + "255": 2.5632314682006836, + "256": 2.5640065670013428, + "257": 2.5342507362365723, + "258": 2.544525146484375, + "259": 2.5233750343322754, + "260": 2.568404197692871, + "261": 2.573936939239502, + "262": 2.599677324295044, + "263": 2.5530166625976562, + "264": 2.5886425971984863, + "265": 2.5661191940307617, + "266": 2.5861520767211914, + "267": 2.5155277252197266, + "268": 2.5421180725097656, + "269": 2.521008014678955 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 3.92507, + 4.18443, + 4.04605, + 3.37233, + 3.62095, + 5.10282, + 5.39697, + 4.03861, + 3.38763, + 3.13183, + 3.87136, + 5.21341, + 5.17558, + 3.91619, + 3.3768, + 3.02753, + 3.14311, + 3.22893, + 3.38464, + 3.15772, + 3.43166, + 4.48935, + 4.65957, + 3.63048, + 3.66097, + 3.49009, + 3.22503, + 3.44829, + 4.16415, + 5.555, + 5.23339, + 4.00618, + 4.01674, + 4.62666, + 4.18713, + 3.65243, + 3.81189, + 4.15082, + 4.08221, + 3.77608, + 4.1963, + 5.06999, + 5.27437, + 4.28967, + 3.43589, + 2.96915, + 3.09651, + 2.99063, + 2.92259, + 2.9962, + 3.7763, + 4.27269, + 4.32428, + 4.92079 + ], + "train_epoch_time": 5.055287599563599, + "train_loss": 2.541071497384924, + "train_score": 0.25027797703164895, + "val_loss": 2.5712752596792754, + "val_score": 0.24682925523490776 + }, + { + "epoch": 5, + "grad_norm": 0.8155317306518555, + "learning_rate": 0.464, + "model_norm": 87.49127197265625, + "step_logs": { + "grad_norm": { + "270": 0.709923267364502, + "271": 0.7353038787841797, + "272": 0.7703292369842529, + "273": 0.8287577629089355, + "274": 0.8209397792816162, + "275": 0.7596935629844666, + "276": 0.7621415853500366, + "277": 0.822248637676239, + "278": 0.8194732069969177, + "279": 0.794235348701477, + "280": 0.8863086700439453, + "281": 0.9842506647109985, + "282": 1.0085747241973877, + "283": 0.7977074384689331, + "284": 0.6794868111610413, + "285": 0.7212539911270142, + "286": 0.9283609390258789, + "287": 1.05335533618927, + "288": 1.0210649967193604, + "289": 0.8998480439186096, + "290": 0.8394947052001953, + "291": 0.8239033818244934, + "292": 0.8653450608253479, + "293": 0.91135573387146, + "294": 0.8895570039749146, + "295": 0.8021307587623596, + "296": 0.8022594451904297, + "297": 1.8957005739212036, + "298": 0.7364959716796875, + "299": 0.48802316188812256, + "300": 0.4164507985115051, + "301": 0.500688910484314, + "302": 0.6880447864532471, + "303": 0.986847996711731, + "304": 0.9654015302658081, + "305": 0.966780960559845, + "306": 0.9232786893844604, + "307": 0.7876191735267639, + "308": 0.7683576941490173, + "309": 0.8035704493522644, + "310": 0.8007235527038574, + "311": 0.8699837923049927, + "312": 0.8615723848342896, + "313": 0.7858172655105591, + "314": 0.7602621912956238, + "315": 0.7625511884689331, + "316": 0.7627614736557007, + "317": 0.8088719844818115, + "318": 0.8202798962593079, + "319": 0.7959516644477844, + "320": 0.7554711699485779, + "321": 0.7299551367759705, + "322": 0.7404099106788635, + "323": 0.8155317306518555 + }, + "loss": { + "270": 2.530697822570801, + "271": 2.520373821258545, + "272": 2.543203592300415, + "273": 2.5499415397644043, + "274": 2.5715713500976562, + "275": 2.5144195556640625, + "276": 2.5348362922668457, + "277": 2.524979591369629, + "278": 2.5430073738098145, + "279": 2.5175833702087402, + "280": 2.5496034622192383, + "281": 2.5477521419525146, + "282": 2.5816190242767334, + "283": 2.5505175590515137, + "284": 2.4904980659484863, + "285": 2.4714856147766113, + "286": 2.5006422996520996, + "287": 2.530031204223633, + "288": 2.5561769008636475, + "289": 2.517416477203369, + "290": 2.5242419242858887, + "291": 2.4763975143432617, + "292": 2.4675889015197754, + "293": 2.4743995666503906, + "294": 2.525834798812866, + "295": 2.482401132583618, + "296": 2.475626230239868, + "297": 2.535186767578125, + "298": 2.597148895263672, + "299": 2.498443126678467, + "300": 2.4709739685058594, + "301": 2.4736552238464355, + "302": 2.4779701232910156, + "303": 2.548572540283203, + "304": 2.567087173461914, + "305": 2.536877155303955, + "306": 2.5445680618286133, + "307": 2.5344479084014893, + "308": 2.533520221710205, + "309": 2.478400468826294, + "310": 2.520012855529785, + "311": 2.512167453765869, + "312": 2.542781352996826, + "313": 2.4930758476257324, + "314": 2.4958527088165283, + "315": 2.4869534969329834, + "316": 2.51597261428833, + "317": 2.495067834854126, + "318": 2.5288801193237305, + "319": 2.4858336448669434, + "320": 2.509807825088501, + "321": 2.469442367553711, + "322": 2.5032758712768555, + "323": 2.4647891521453857 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 5.02131, + 4.66156, + 4.28577, + 3.71257, + 3.81572, + 4.35673, + 4.36394, + 3.73466, + 3.78685, + 3.99103, + 3.24566, + 2.62994, + 2.53791, + 4.00812, + 5.39416, + 4.75096, + 2.90147, + 2.28022, + 2.45179, + 3.10897, + 3.58175, + 3.64811, + 3.29529, + 2.97916, + 3.19196, + 3.85817, + 3.84641, + 0.705457, + 4.78802, + 10.4903, + 14.2476, + 9.86741, + 5.23435, + 2.61696, + 2.75438, + 2.71421, + 2.98503, + 4.08555, + 4.29138, + 3.83816, + 3.93041, + 3.31915, + 3.42551, + 4.03731, + 4.31809, + 4.27691, + 4.32443, + 3.81349, + 3.75841, + 3.92373, + 4.39749, + 4.63454, + 4.5663, + 3.70594 + ], + "train_epoch_time": 5.05573034286499, + "train_loss": 2.5106289301917406, + "train_score": 0.25232021160840645, + "val_loss": 2.563950307607377, + "val_score": 0.24603544820600481 + }, + { + "epoch": 6, + "grad_norm": 0.6872848868370056, + "learning_rate": 0.464, + "model_norm": 87.54857635498047, + "step_logs": { + "grad_norm": { + "324": 0.7839581966400146, + "325": 0.7095580697059631, + "326": 0.862248420715332, + "327": 0.9032272696495056, + "328": 0.7961395978927612, + "329": 0.7738818526268005, + "330": 0.7535048723220825, + "331": 0.7769403457641602, + "332": 0.8828176259994507, + "333": 0.9689627885818481, + "334": 0.8665558695793152, + "335": 0.7570440173149109, + "336": 1.123897910118103, + "337": 1.06027090549469, + "338": 0.8520289063453674, + "339": 0.5979045033454895, + "340": 0.6060411334037781, + "341": 0.7240143418312073, + "342": 1.0907390117645264, + "343": 1.1220327615737915, + "344": 1.0307424068450928, + "345": 1.0251318216323853, + "346": 0.6972977519035339, + "347": 0.7023191452026367, + "348": 0.8285085558891296, + "349": 0.8545849323272705, + "350": 0.9272441267967224, + "351": 1.012359380722046, + "352": 0.9842120409011841, + "353": 0.8615912795066833, + "354": 0.9104143977165222, + "355": 0.9002649188041687, + "356": 0.9262411594390869, + "357": 1.001621961593628, + "358": 0.9617332816123962, + "359": 0.8877976536750793, + "360": 0.9657832980155945, + "361": 0.8201555609703064, + "362": 1.131272554397583, + "363": 0.6465250253677368, + "364": 0.6325067281723022, + "365": 0.8593429327011108, + "366": 0.898367702960968, + "367": 1.1018083095550537, + "368": 0.8325973749160767, + "369": 0.8152550458908081, + "370": 0.7561051845550537, + "371": 0.7377316355705261, + "372": 0.9109453558921814, + "373": 0.9643517732620239, + "374": 0.8522351384162903, + "375": 0.9295061826705933, + "376": 0.881537914276123, + "377": 0.6872848868370056 + }, + "loss": { + "324": 2.505286693572998, + "325": 2.456019163131714, + "326": 2.4898881912231445, + "327": 2.5054454803466797, + "328": 2.479649066925049, + "329": 2.4650940895080566, + "330": 2.4800806045532227, + "331": 2.4616641998291016, + "332": 2.467437267303467, + "333": 2.4682998657226562, + "334": 2.493536949157715, + "335": 2.4131741523742676, + "336": 2.4347915649414062, + "337": 2.5208096504211426, + "338": 2.513828754425049, + "339": 2.4165737628936768, + "340": 2.3917222023010254, + "341": 2.4062037467956543, + "342": 2.431318759918213, + "343": 2.5053200721740723, + "344": 2.531595230102539, + "345": 2.4476327896118164, + "346": 2.4766273498535156, + "347": 2.413266181945801, + "348": 2.41045880317688, + "349": 2.40478515625, + "350": 2.4176554679870605, + "351": 2.454700469970703, + "352": 2.4540939331054688, + "353": 2.399731159210205, + "354": 2.444430112838745, + "355": 2.416689872741699, + "356": 2.416731834411621, + "357": 2.4256677627563477, + "358": 2.442284107208252, + "359": 2.4102227687835693, + "360": 2.402315616607666, + "361": 2.425611734390259, + "362": 2.4030792713165283, + "363": 2.4333086013793945, + "364": 2.3604183197021484, + "365": 2.3863296508789062, + "366": 2.4027938842773438, + "367": 2.4116382598876953, + "368": 2.4453606605529785, + "369": 2.3839893341064453, + "370": 2.3941221237182617, + "371": 2.340608596801758, + "372": 2.380875825881958, + "373": 2.3923251628875732, + "374": 2.3657093048095703, + "375": 2.398134708404541, + "376": 2.4324960708618164, + "377": 2.365286350250244 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 4.07635, + 4.87816, + 3.349, + 3.07108, + 3.91212, + 4.11608, + 4.36811, + 4.07806, + 3.16595, + 2.62896, + 3.32065, + 4.21062, + 1.92756, + 2.24237, + 3.4628, + 6.75984, + 6.51188, + 4.59027, + 2.04362, + 1.99, + 2.38284, + 2.32909, + 5.09359, + 4.89256, + 3.51161, + 3.2928, + 2.81194, + 2.39513, + 2.53346, + 3.23266, + 2.94917, + 2.98181, + 2.81696, + 2.41782, + 2.6405, + 3.05794, + 2.57555, + 3.60603, + 1.87773, + 5.82139, + 5.90009, + 3.23145, + 2.9772, + 1.98655, + 3.52755, + 3.58688, + 4.18776, + 4.30063, + 2.86914, + 2.57246, + 3.25719, + 2.77568, + 3.13019, + 5.00738 + ], + "train_epoch_time": 5.054721355438232, + "train_loss": 2.3425577487972924, + "train_score": 0.31036137009795806, + "val_loss": 2.399173461473905, + "val_score": 0.29665972998988066 + }, + { + "epoch": 7, + "grad_norm": 0.8393017053604126, + "learning_rate": 0.464, + "model_norm": 87.60615539550781, + "step_logs": { + "grad_norm": { + "378": 0.7589331865310669, + "379": 0.8892024159431458, + "380": 0.858923614025116, + "381": 0.8288053274154663, + "382": 0.815409779548645, + "383": 0.8348818421363831, + "384": 0.922610342502594, + "385": 0.8301099538803101, + "386": 0.7437319755554199, + "387": 0.7968907356262207, + "388": 0.8912876844406128, + "389": 1.1475551128387451, + "390": 0.7989431619644165, + "391": 0.8346490859985352, + "392": 0.9870498776435852, + "393": 1.0443447828292847, + "394": 1.223189115524292, + "395": 0.9580093026161194, + "396": 0.9073383808135986, + "397": 1.0248693227767944, + "398": 0.8835127949714661, + "399": 0.8155155181884766, + "400": 0.8422682285308838, + "401": 1.074558973312378, + "402": 0.8556275963783264, + "403": 0.795551598072052, + "404": 0.8677669167518616, + "405": 0.9945087432861328, + "406": 0.9845489263534546, + "407": 0.9531922340393066, + "408": 0.7928701043128967, + "409": 0.7659369707107544, + "410": 0.8509597182273865, + "411": 0.867879331111908, + "412": 0.7661714553833008, + "413": 0.7206516265869141, + "414": 0.8043820858001709, + "415": 0.938575804233551, + "416": 0.875091016292572, + "417": 0.8536773324012756, + "418": 0.9054034352302551, + "419": 0.9733379483222961, + "420": 1.0094554424285889, + "421": 0.8578090071678162, + "422": 0.7676835060119629, + "423": 0.7866225838661194, + "424": 0.8058855533599854, + "425": 0.9008245468139648, + "426": 1.0388469696044922, + "427": 0.9630997776985168, + "428": 0.8510398864746094, + "429": 1.0077499151229858, + "430": 0.9395222663879395, + "431": 0.8393017053604126 + }, + "loss": { + "378": 2.353041172027588, + "379": 2.3699467182159424, + "380": 2.392122268676758, + "381": 2.3604695796966553, + "382": 2.3915934562683105, + "383": 2.339259147644043, + "384": 2.3710484504699707, + "385": 2.356905460357666, + "386": 2.3484816551208496, + "387": 2.345686435699463, + "388": 2.3691048622131348, + "389": 2.395533800125122, + "390": 2.4183292388916016, + "391": 2.3802669048309326, + "392": 2.394064426422119, + "393": 2.3875062465667725, + "394": 2.4428303241729736, + "395": 2.41601824760437, + "396": 2.409115791320801, + "397": 2.3533449172973633, + "398": 2.4201550483703613, + "399": 2.353846549987793, + "400": 2.3462610244750977, + "401": 2.3599376678466797, + "402": 2.4097695350646973, + "403": 2.3267579078674316, + "404": 2.314735174179077, + "405": 2.3632850646972656, + "406": 2.366142511367798, + "407": 2.3565785884857178, + "408": 2.3394815921783447, + "409": 2.307865619659424, + "410": 2.3455278873443604, + "411": 2.3363208770751953, + "412": 2.343714475631714, + "413": 2.285933256149292, + "414": 2.3379223346710205, + "415": 2.3197362422943115, + "416": 2.3655757904052734, + "417": 2.3053879737854004, + "418": 2.3367247581481934, + "419": 2.3189516067504883, + "420": 2.371898889541626, + "421": 2.322610378265381, + "422": 2.3212804794311523, + "423": 2.297940492630005, + "424": 2.3085403442382812, + "425": 2.3058664798736572, + "426": 2.348581314086914, + "427": 2.3665213584899902, + "428": 2.3129308223724365, + "429": 2.3490896224975586, + "430": 2.3440871238708496, + "431": 2.3069348335266113 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 4.08529, + 2.99735, + 3.24246, + 3.43632, + 3.59696, + 3.35605, + 2.7855, + 3.42035, + 4.24575, + 3.69379, + 2.98228, + 1.81909, + 3.78864, + 3.41679, + 2.4573, + 2.18906, + 1.6327, + 2.63245, + 2.9263, + 2.24052, + 3.1004, + 3.53927, + 3.30731, + 2.04381, + 3.29159, + 3.67633, + 3.07394, + 2.38946, + 2.44099, + 2.59371, + 3.72148, + 3.93391, + 3.23909, + 3.1018, + 3.99257, + 4.40162, + 3.61331, + 2.6333, + 3.08909, + 3.16342, + 2.85051, + 2.44773, + 2.32767, + 3.15642, + 3.93879, + 3.71369, + 3.5546, + 2.84154, + 2.17622, + 2.55134, + 3.19347, + 2.3131, + 2.65558, + 3.27491 + ], + "train_epoch_time": 5.060734033584595, + "train_loss": 2.322275898480518, + "train_score": 0.29503116021881487, + "val_loss": 2.3888586859478607, + "val_score": 0.2832053670174219 + }, + { + "epoch": 8, + "grad_norm": 0.8663375973701477, + "learning_rate": 0.464, + "model_norm": 87.67359924316406, + "step_logs": { + "grad_norm": { + "432": 0.9048224687576294, + "433": 0.8958725929260254, + "434": 0.9489478468894958, + "435": 0.9375250935554504, + "436": 1.1159147024154663, + "437": 1.3539495468139648, + "438": 0.9937126636505127, + "439": 0.7304781079292297, + "440": 1.2727736234664917, + "441": 0.839798092842102, + "442": 0.8061545491218567, + "443": 0.8553096055984497, + "444": 0.8286412358283997, + "445": 0.7482856512069702, + "446": 0.6412596106529236, + "447": 0.6080443859100342, + "448": 0.7059473991394043, + "449": 0.8625189661979675, + "450": 1.1521011590957642, + "451": 1.0409513711929321, + "452": 0.8599990010261536, + "453": 0.7692727446556091, + "454": 0.8178277611732483, + "455": 0.9262793660163879, + "456": 0.9845213294029236, + "457": 1.1887562274932861, + "458": 0.849265456199646, + "459": 0.7488734722137451, + "460": 0.7918029427528381, + "461": 0.8631037473678589, + "462": 0.9187257289886475, + "463": 0.9063173532485962, + "464": 0.8658668994903564, + "465": 0.7803764343261719, + "466": 0.7280052304267883, + "467": 0.7875689268112183, + "468": 0.8168123364448547, + "469": 0.8298878073692322, + "470": 0.8827573657035828, + "471": 0.9784128665924072, + "472": 1.0885601043701172, + "473": 2.0626463890075684, + "474": 1.383554220199585, + "475": 1.3349108695983887, + "476": 1.8164100646972656, + "477": 1.3956739902496338, + "478": 1.3808982372283936, + "479": 1.0982495546340942, + "480": 1.036338448524475, + "481": 0.9854855537414551, + "482": 1.0208631753921509, + "483": 0.891883373260498, + "484": 0.8291735649108887, + "485": 0.8663375973701477 + }, + "loss": { + "432": 2.3207361698150635, + "433": 2.32639479637146, + "434": 2.3160781860351562, + "435": 2.327807903289795, + "436": 2.3533363342285156, + "437": 2.3823373317718506, + "438": 2.404601573944092, + "439": 2.274229049682617, + "440": 2.32706356048584, + "441": 2.372982978820801, + "442": 2.3207733631134033, + "443": 2.2941932678222656, + "444": 2.334689140319824, + "445": 2.2608489990234375, + "446": 2.2635459899902344, + "447": 2.2262797355651855, + "448": 2.26047420501709, + "449": 2.294696807861328, + "450": 2.344208240509033, + "451": 2.3642759323120117, + "452": 2.319617748260498, + "453": 2.2785825729370117, + "454": 2.2757506370544434, + "455": 2.292905807495117, + "456": 2.2992310523986816, + "457": 2.312321662902832, + "458": 2.332876205444336, + "459": 2.274783134460449, + "460": 2.267345428466797, + "461": 2.290771961212158, + "462": 2.279402256011963, + "463": 2.311415910720825, + "464": 2.2693541049957275, + "465": 2.283268690109253, + "466": 2.2357559204101562, + "467": 2.2635698318481445, + "468": 2.2616844177246094, + "469": 2.2747926712036133, + "470": 2.255516529083252, + "471": 2.315216541290283, + "472": 2.3080945014953613, + "473": 2.3555655479431152, + "474": 2.46317195892334, + "475": 2.39725399017334, + "476": 2.5200319290161133, + "477": 2.5057151317596436, + "478": 2.527132034301758, + "479": 2.3812496662139893, + "480": 2.363093852996826, + "481": 2.3452019691467285, + "482": 2.360673427581787, + "483": 2.3743820190429688, + "484": 2.3033931255340576, + "485": 2.329716920852661 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 2.83465, + 2.89862, + 2.57199, + 2.64839, + 1.88983, + 1.29957, + 2.43513, + 4.26206, + 1.4365, + 3.36469, + 3.57105, + 3.13605, + 3.40013, + 4.03772, + 5.50455, + 6.02156, + 4.53581, + 3.08452, + 1.7661, + 2.18191, + 3.13632, + 3.85038, + 3.40252, + 2.6724, + 2.3721, + 1.6363, + 3.23448, + 4.05624, + 3.61646, + 3.07508, + 2.70053, + 2.81396, + 3.02691, + 3.74929, + 4.21847, + 3.64936, + 3.3899, + 3.30296, + 2.89443, + 2.41851, + 1.94782, + 0.553663, + 1.28677, + 1.34527, + 0.763798, + 1.28636, + 1.32527, + 1.97425, + 2.20028, + 2.41479, + 2.26517, + 2.98493, + 3.35025, + 3.10405 + ], + "train_epoch_time": 5.054119110107422, + "train_loss": 2.301459996566882, + "train_score": 0.3255570750667512, + "val_loss": 2.3690481073683904, + "val_score": 0.30634238727467755 + }, + { + "epoch": 9, + "grad_norm": 0.8091462850570679, + "learning_rate": 0.464, + "model_norm": 87.738525390625, + "step_logs": { + "grad_norm": { + "486": 0.9061972498893738, + "487": 0.8700083494186401, + "488": 0.8097296357154846, + "489": 0.8147047162055969, + "490": 0.9064461588859558, + "491": 1.026487112045288, + "492": 0.9785983562469482, + "493": 0.8480057120323181, + "494": 0.715952455997467, + "495": 0.7886447906494141, + "496": 0.9205501079559326, + "497": 0.9369004368782043, + "498": 0.7981026768684387, + "499": 0.6879714131355286, + "500": 0.6765692830085754, + "501": 0.7633191347122192, + "502": 0.8823603987693787, + "503": 0.949428379535675, + "504": 0.8830804824829102, + "505": 0.8267177939414978, + "506": 0.8351994752883911, + "507": 0.757067859172821, + "508": 0.7504280209541321, + "509": 0.7553586363792419, + "510": 0.8671025037765503, + "511": 0.8524565100669861, + "512": 0.8685594797134399, + "513": 1.0522680282592773, + "514": 0.9484105706214905, + "515": 0.7447003126144409, + "516": 0.6856025457382202, + "517": 0.7139851450920105, + "518": 0.746656060218811, + "519": 0.7791493535041809, + "520": 0.7601523399353027, + "521": 0.7545337080955505, + "522": 0.8574492931365967, + "523": 0.9257049560546875, + "524": 0.8977177143096924, + "525": 0.7736575603485107, + "526": 0.7302653789520264, + "527": 0.7500297427177429, + "528": 0.8104608654975891, + "529": 0.8192557096481323, + "530": 0.7449462413787842, + "531": 0.7800441980361938, + "532": 0.869362473487854, + "533": 0.9132375717163086, + "534": 0.849949300289154, + "535": 0.6947661638259888, + "536": 0.6462142467498779, + "537": 0.7439555525779724, + "538": 0.7711809277534485, + "539": 0.8091462850570679 + }, + "loss": { + "486": 2.309541702270508, + "487": 2.3110992908477783, + "488": 2.2999320030212402, + "489": 2.2903366088867188, + "490": 2.2813172340393066, + "491": 2.297905683517456, + "492": 2.3322479724884033, + "493": 2.307438373565674, + "494": 2.252199411392212, + "495": 2.2337327003479004, + "496": 2.2935547828674316, + "497": 2.2870070934295654, + "498": 2.266975164413452, + "499": 2.239179849624634, + "500": 2.239522695541382, + "501": 2.229620933532715, + "502": 2.241481304168701, + "503": 2.2858049869537354, + "504": 2.2830092906951904, + "505": 2.248178482055664, + "506": 2.262666940689087, + "507": 2.232067108154297, + "508": 2.225383996963501, + "509": 2.2340638637542725, + "510": 2.219205141067505, + "511": 2.273848295211792, + "512": 2.229863166809082, + "513": 2.2780256271362305, + "514": 2.3253731727600098, + "515": 2.2247302532196045, + "516": 2.1997780799865723, + "517": 2.1794025897979736, + "518": 2.2175052165985107, + "519": 2.2099719047546387, + "520": 2.229396343231201, + "521": 2.1843299865722656, + "522": 2.2053704261779785, + "523": 2.223071575164795, + "524": 2.2601327896118164, + "525": 2.2394065856933594, + "526": 2.1992015838623047, + "527": 2.188534736633301, + "528": 2.2334952354431152, + "529": 2.221254348754883, + "530": 2.22395396232605, + "531": 2.1958765983581543, + "532": 2.200474262237549, + "533": 2.217067241668701, + "534": 2.2371339797973633, + "535": 2.196822166442871, + "536": 2.1863207817077637, + "537": 2.212778091430664, + "538": 2.2034809589385986, + "539": 2.202427625656128 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 2.81242, + 3.05332, + 3.5078, + 3.45063, + 2.77653, + 2.18085, + 2.43537, + 3.20873, + 4.39378, + 3.59144, + 2.70654, + 2.60544, + 3.55901, + 4.73095, + 4.8925, + 3.82665, + 2.87901, + 2.5358, + 2.92757, + 3.2894, + 3.2437, + 3.89437, + 3.95173, + 3.91552, + 2.95159, + 3.12908, + 2.95583, + 2.05734, + 2.58523, + 4.01157, + 4.67987, + 4.27523, + 3.97762, + 3.64037, + 3.85821, + 3.83673, + 2.99961, + 2.59423, + 2.80449, + 3.74141, + 4.12386, + 3.89042, + 3.40033, + 3.30948, + 4.00752, + 3.60886, + 2.91148, + 2.65834, + 3.09675, + 4.55111, + 5.23553, + 3.99801, + 3.70507, + 3.36393 + ], + "train_epoch_time": 5.055047035217285, + "train_loss": 2.2124781825450093, + "train_score": 0.3428454538414434, + "val_loss": 2.2925207798297587, + "val_score": 0.3205412238254613 + }, + { + "epoch": 10, + "grad_norm": 0.839262068271637, + "learning_rate": 0.464, + "model_norm": 87.80653381347656, + "step_logs": { + "grad_norm": { + "540": 0.8846848011016846, + "541": 0.851793646812439, + "542": 0.8648220300674438, + "543": 0.875769317150116, + "544": 0.8684296607971191, + "545": 0.8440709710121155, + "546": 0.8757414817810059, + "547": 0.8696944713592529, + "548": 0.8052344918251038, + "549": 0.7640607357025146, + "550": 0.8160861730575562, + "551": 0.8356430530548096, + "552": 0.8475441336631775, + "553": 0.9411374926567078, + "554": 0.9904058575630188, + "555": 0.7932664155960083, + "556": 0.6735861301422119, + "557": 0.6476566195487976, + "558": 0.7187003493309021, + "559": 0.7167638540267944, + "560": 0.6879050135612488, + "561": 0.750463604927063, + "562": 0.8491277098655701, + "563": 0.9181175827980042, + "564": 0.9281700253486633, + "565": 0.7286429405212402, + "566": 0.7585279941558838, + "567": 0.7738417983055115, + "568": 0.7366043329238892, + "569": 0.7681720852851868, + "570": 0.810899555683136, + "571": 0.8764036893844604, + "572": 1.11243736743927, + "573": 1.004509449005127, + "574": 0.8296661972999573, + "575": 0.74354088306427, + "576": 0.6978038549423218, + "577": 0.7900229692459106, + "578": 0.930242121219635, + "579": 0.9561259150505066, + "580": 0.8252516984939575, + "581": 0.7146198153495789, + "582": 0.7405450344085693, + "583": 0.8092361688613892, + "584": 0.8283589482307434, + "585": 0.7919597625732422, + "586": 0.8006977438926697, + "587": 0.7785877585411072, + "588": 0.6945397853851318, + "589": 0.8264068365097046, + "590": 0.8366949558258057, + "591": 0.8228870630264282, + "592": 0.8261078596115112, + "593": 0.839262068271637 + }, + "loss": { + "540": 2.2191858291625977, + "541": 2.2137722969055176, + "542": 2.2008423805236816, + "543": 2.2153892517089844, + "544": 2.18567156791687, + "545": 2.222735643386841, + "546": 2.229074001312256, + "547": 2.2262442111968994, + "548": 2.1911351680755615, + "549": 2.196539878845215, + "550": 2.183676242828369, + "551": 2.227532386779785, + "552": 2.167851448059082, + "553": 2.2243947982788086, + "554": 2.2245230674743652, + "555": 2.199087619781494, + "556": 2.19425106048584, + "557": 2.153940200805664, + "558": 2.1462035179138184, + "559": 2.154965400695801, + "560": 2.16123628616333, + "561": 2.163560390472412, + "562": 2.1878600120544434, + "563": 2.2012462615966797, + "564": 2.1952459812164307, + "565": 2.189441442489624, + "566": 2.1669788360595703, + "567": 2.1989150047302246, + "568": 2.179996967315674, + "569": 2.1789050102233887, + "570": 2.1882901191711426, + "571": 2.1846516132354736, + "572": 2.2329823970794678, + "573": 2.24784517288208, + "574": 2.1880455017089844, + "575": 2.1801204681396484, + "576": 2.184971809387207, + "577": 2.1718716621398926, + "578": 2.1804022789001465, + "579": 2.230469226837158, + "580": 2.183898448944092, + "581": 2.194782257080078, + "582": 2.148127317428589, + "583": 2.1465179920196533, + "584": 2.1728463172912598, + "585": 2.185518980026245, + "586": 2.1895668506622314, + "587": 2.1469428539276123, + "588": 2.1203830242156982, + "589": 2.1771926879882812, + "590": 2.181893825531006, + "591": 2.1747307777404785, + "592": 2.1478943824768066, + "593": 2.171764850616455 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 2.83541, + 3.05115, + 2.94263, + 2.88849, + 2.89811, + 3.11982, + 2.90652, + 2.94333, + 3.37928, + 3.76256, + 3.27881, + 3.18994, + 3.0179, + 2.51134, + 2.26783, + 3.49466, + 4.83615, + 5.13504, + 4.15504, + 4.19458, + 4.56715, + 3.84158, + 3.0344, + 2.61139, + 2.54817, + 4.12386, + 3.76627, + 3.67201, + 4.01779, + 3.69251, + 3.3279, + 2.84429, + 1.80441, + 2.22771, + 3.1787, + 3.9434, + 4.48724, + 3.4798, + 2.51968, + 2.43987, + 3.20671, + 4.29775, + 3.91703, + 3.27781, + 3.16659, + 3.48456, + 3.41524, + 3.54165, + 4.39562, + 3.18793, + 3.11673, + 3.21163, + 3.14731, + 3.08331 + ], + "train_epoch_time": 5.054749250411987, + "train_loss": 2.1566254325029326, + "train_score": 0.3630301739256214, + "val_loss": 2.253623548796749, + "val_score": 0.3428081953566603 + }, + { + "epoch": 11, + "grad_norm": 0.7908388376235962, + "learning_rate": 0.464, + "model_norm": 87.87235260009766, + "step_logs": { + "grad_norm": { + "594": 0.8385294079780579, + "595": 0.9107412099838257, + "596": 0.9985735416412354, + "597": 1.0071958303451538, + "598": 0.9127932190895081, + "599": 0.8815768361091614, + "600": 0.8051580190658569, + "601": 0.7952074408531189, + "602": 0.7971633076667786, + "603": 0.8336962461471558, + "604": 0.8893314599990845, + "605": 0.9319782257080078, + "606": 0.9262791872024536, + "607": 0.9126294851303101, + "608": 0.7592926621437073, + "609": 0.6793693900108337, + "610": 0.7502461075782776, + "611": 0.8824211359024048, + "612": 0.8688129186630249, + "613": 0.7475083470344543, + "614": 0.672337532043457, + "615": 0.7291651964187622, + "616": 0.7945804595947266, + "617": 0.8353235125541687, + "618": 0.8443430066108704, + "619": 0.8979946970939636, + "620": 0.8763735890388489, + "621": 0.7830246686935425, + "622": 0.7525209188461304, + "623": 0.823521614074707, + "624": 0.9534501433372498, + "625": 0.885374128818512, + "626": 0.7447731494903564, + "627": 0.7996683120727539, + "628": 0.8268854022026062, + "629": 0.7702431082725525, + "630": 0.7293129563331604, + "631": 0.8059701323509216, + "632": 0.8450236320495605, + "633": 0.8778682351112366, + "634": 0.9468300342559814, + "635": 0.9203754663467407, + "636": 0.8117689490318298, + "637": 0.7357116341590881, + "638": 0.7154402732849121, + "639": 0.7058746218681335, + "640": 0.7272049188613892, + "641": 0.8203611969947815, + "642": 0.8053290843963623, + "643": 0.7895781397819519, + "644": 0.8198033571243286, + "645": 0.8081340789794922, + "646": 0.7271595597267151, + "647": 0.7908388376235962 + }, + "loss": { + "594": 2.18215274810791, + "595": 2.179840087890625, + "596": 2.1663570404052734, + "597": 2.2396609783172607, + "598": 2.1682393550872803, + "599": 2.1708712577819824, + "600": 2.176543712615967, + "601": 2.14573335647583, + "602": 2.1722970008850098, + "603": 2.1507678031921387, + "604": 2.1619625091552734, + "605": 2.186767339706421, + "606": 2.202868938446045, + "607": 2.196329355239868, + "608": 2.1574296951293945, + "609": 2.1475911140441895, + "610": 2.1136789321899414, + "611": 2.169053316116333, + "612": 2.1615538597106934, + "613": 2.1541175842285156, + "614": 2.1183393001556396, + "615": 2.0860562324523926, + "616": 2.1464667320251465, + "617": 2.1274566650390625, + "618": 2.136549711227417, + "619": 2.172416925430298, + "620": 2.166909694671631, + "621": 2.1530230045318604, + "622": 2.14167857170105, + "623": 2.1571555137634277, + "624": 2.156428337097168, + "625": 2.1737239360809326, + "626": 2.1162500381469727, + "627": 2.1566338539123535, + "628": 2.1405065059661865, + "629": 2.128350257873535, + "630": 2.086911201477051, + "631": 2.1206464767456055, + "632": 2.1384353637695312, + "633": 2.153104066848755, + "634": 2.1471080780029297, + "635": 2.156794548034668, + "636": 2.120319366455078, + "637": 2.1094422340393066, + "638": 2.1094212532043457, + "639": 2.1174988746643066, + "640": 2.1273560523986816, + "641": 2.1183457374572754, + "642": 2.151970386505127, + "643": 2.1000776290893555, + "644": 2.1109485626220703, + "645": 2.1427557468414307, + "646": 2.1026482582092285, + "647": 2.1007349491119385 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 3.10348, + 2.62806, + 2.17255, + 2.20777, + 2.60233, + 2.79328, + 3.35742, + 3.39324, + 3.41841, + 3.09441, + 2.73351, + 2.51762, + 2.56747, + 2.63699, + 3.74212, + 4.65307, + 3.75519, + 2.7856, + 2.86361, + 3.85511, + 4.6862, + 3.9235, + 3.39976, + 3.04896, + 2.99692, + 2.69399, + 2.82138, + 3.51154, + 3.78196, + 3.18076, + 2.37213, + 2.77301, + 3.81521, + 3.37254, + 3.13059, + 3.58746, + 3.92352, + 3.2646, + 2.99474, + 2.79387, + 2.39502, + 2.54612, + 3.21763, + 3.8972, + 4.12113, + 4.2498, + 4.02278, + 3.14765, + 3.3181, + 3.36857, + 3.14093, + 3.281, + 3.97656, + 3.35889 + ], + "train_epoch_time": 5.055201292037964, + "train_loss": 2.1195851120750393, + "train_score": 0.37360002684114313, + "val_loss": 2.222439574593107, + "val_score": 0.35174637588102425 + }, + { + "epoch": 12, + "grad_norm": 0.5173906087875366, + "learning_rate": 0.464, + "model_norm": 87.92743682861328, + "step_logs": { + "grad_norm": { + "648": 0.8069720268249512, + "649": 0.8512582182884216, + "650": 0.8990569114685059, + "651": 0.8587251305580139, + "652": 0.7868680357933044, + "653": 0.7565704584121704, + "654": 0.8317120671272278, + "655": 0.9230534434318542, + "656": 0.834385871887207, + "657": 0.7295063734054565, + "658": 0.721542477607727, + "659": 0.7463744878768921, + "660": 0.836582601070404, + "661": 0.8649083971977234, + "662": 0.7884148955345154, + "663": 0.698732316493988, + "664": 0.6344377994537354, + "665": 0.6611167192459106, + "666": 0.6664699912071228, + "667": 0.636164665222168, + "668": 0.589198887348175, + "669": 0.5984880924224854, + "670": 0.6751964688301086, + "671": 0.726650595664978, + "672": 0.7066138982772827, + "673": 0.6512463092803955, + "674": 0.6764916181564331, + "675": 0.7289139032363892, + "676": 0.7452622056007385, + "677": 0.720969021320343, + "678": 0.6540064811706543, + "679": 0.6373347640037537, + "680": 0.5958762764930725, + "681": 0.5619276762008667, + "682": 0.603219211101532, + "683": 0.6242123246192932, + "684": 0.6478551030158997, + "685": 0.6306843161582947, + "686": 0.5928675532341003, + "687": 0.5836822390556335, + "688": 0.6253156065940857, + "689": 0.6734954714775085, + "690": 0.7497913837432861, + "691": 0.7331075072288513, + "692": 0.7071674466133118, + "693": 0.6890398859977722, + "694": 0.6262747645378113, + "695": 0.6144706606864929, + "696": 0.6431475877761841, + "697": 0.6287190318107605, + "698": 0.5598568320274353, + "699": 0.5094016790390015, + "700": 0.5272648334503174, + "701": 0.5173906087875366 + }, + "loss": { + "648": 2.1383960247039795, + "649": 2.150754451751709, + "650": 2.1586318016052246, + "651": 2.155897378921509, + "652": 2.100804328918457, + "653": 2.0960984230041504, + "654": 2.1015677452087402, + "655": 2.1342787742614746, + "656": 2.138262987136841, + "657": 2.093071937561035, + "658": 2.1227643489837646, + "659": 2.0809314250946045, + "660": 2.0938491821289062, + "661": 2.1232917308807373, + "662": 2.106125831604004, + "663": 2.070392608642578, + "664": 2.0769429206848145, + "665": 2.086477041244507, + "666": 2.0766024589538574, + "667": 2.093046188354492, + "668": 2.058943748474121, + "669": 2.053786516189575, + "670": 2.085644245147705, + "671": 2.079237699508667, + "672": 2.0442051887512207, + "673": 2.042421579360962, + "674": 2.060612678527832, + "675": 2.0651750564575195, + "676": 2.0722737312316895, + "677": 2.077267646789551, + "678": 2.0851898193359375, + "679": 2.023092269897461, + "680": 2.027134895324707, + "681": 2.049607276916504, + "682": 2.08774995803833, + "683": 2.0501937866210938, + "684": 2.043001651763916, + "685": 2.05068302154541, + "686": 2.0486350059509277, + "687": 2.0478270053863525, + "688": 2.05025577545166, + "689": 2.0634207725524902, + "690": 2.020798683166504, + "691": 2.042860984802246, + "692": 2.0566587448120117, + "693": 2.061901330947876, + "694": 2.0499391555786133, + "695": 2.0715231895446777, + "696": 2.0470504760742188, + "697": 2.0321576595306396, + "698": 2.041411876678467, + "699": 2.0115394592285156, + "700": 2.0329654216766357, + "701": 2.036001205444336 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 3.28376, + 2.96803, + 2.67057, + 2.92361, + 3.39298, + 3.66195, + 3.03807, + 2.50494, + 3.07134, + 3.93302, + 4.07735, + 3.73546, + 2.99177, + 2.83837, + 3.38824, + 4.24064, + 5.15996, + 4.77372, + 4.67511, + 5.17178, + 5.9309, + 5.73382, + 4.57488, + 3.93779, + 4.09412, + 4.81565, + 4.50268, + 3.88691, + 3.73103, + 3.99631, + 4.87508, + 4.98059, + 5.70914, + 6.49097, + 5.73757, + 5.26175, + 4.86758, + 5.15554, + 5.8284, + 6.01091, + 5.24336, + 4.54903, + 3.59453, + 3.80105, + 4.11261, + 4.34289, + 5.2265, + 5.4864, + 4.94888, + 5.14096, + 6.51293, + 7.75189, + 7.31261, + 7.60573 + ], + "train_epoch_time": 5.059199810028076, + "train_loss": 2.0195599937712614, + "train_score": 0.4029725609328518, + "val_loss": 2.13833751689416, + "val_score": 0.37244815588955493 + }, + { + "epoch": 13, + "grad_norm": 0.36634698510169983, + "learning_rate": 0.3093333333333334, + "model_norm": 87.95773315429688, + "step_logs": { + "grad_norm": { + "702": 0.5229237675666809, + "703": 0.5588756799697876, + "704": 0.5312861800193787, + "705": 0.5209408402442932, + "706": 0.5288822650909424, + "707": 0.50210040807724, + "708": 0.5301359295845032, + "709": 0.5049073100090027, + "710": 0.5181417465209961, + "711": 0.5633845925331116, + "712": 0.5114988088607788, + "713": 0.4986511468887329, + "714": 0.486305832862854, + "715": 0.4631892144680023, + "716": 0.46501418948173523, + "717": 0.46146804094314575, + "718": 0.47717010974884033, + "719": 0.4686819016933441, + "720": 0.4504156708717346, + "721": 0.4145474135875702, + "722": 0.4249396324157715, + "723": 0.4134324789047241, + "724": 0.44097408652305603, + "725": 0.45682865381240845, + "726": 0.47396573424339294, + "727": 0.5203550457954407, + "728": 0.48601025342941284, + "729": 0.4258767068386078, + "730": 0.4721459746360779, + "731": 0.4485870897769928, + "732": 0.45657941699028015, + "733": 0.4614412784576416, + "734": 0.4318462014198303, + "735": 0.42194807529449463, + "736": 0.40368345379829407, + "737": 0.3855956494808197, + "738": 0.42440441250801086, + "739": 0.42616212368011475, + "740": 0.4026164412498474, + "741": 0.4223616123199463, + "742": 0.39871248602867126, + "743": 0.3541635274887085, + "744": 0.39917099475860596, + "745": 0.41964536905288696, + "746": 0.4138050675392151, + "747": 0.4086674451828003, + "748": 0.41194820404052734, + "749": 0.4213542640209198, + "750": 0.3916550576686859, + "751": 0.39856600761413574, + "752": 0.42884230613708496, + "753": 0.42071759700775146, + "754": 0.3940809965133667, + "755": 0.36634698510169983 + }, + "loss": { + "702": 2.0297257900238037, + "703": 2.030059337615967, + "704": 2.0213115215301514, + "705": 2.0130062103271484, + "706": 2.0177547931671143, + "707": 1.9900016784667969, + "708": 2.010685920715332, + "709": 1.9931070804595947, + "710": 2.01471209526062, + "711": 2.0091209411621094, + "712": 1.9739407300949097, + "713": 2.0124783515930176, + "714": 2.0259242057800293, + "715": 2.004572868347168, + "716": 2.015043258666992, + "717": 2.003054141998291, + "718": 2.010009765625, + "719": 2.0349745750427246, + "720": 2.0104012489318848, + "721": 1.9826961755752563, + "722": 2.0095744132995605, + "723": 2.0087170600891113, + "724": 2.0249760150909424, + "725": 1.9865930080413818, + "726": 1.9993476867675781, + "727": 2.002136707305908, + "728": 2.0181610584259033, + "729": 1.9923796653747559, + "730": 2.0079102516174316, + "731": 2.0172810554504395, + "732": 1.9989569187164307, + "733": 1.9876576662063599, + "734": 1.997286081314087, + "735": 2.011650800704956, + "736": 1.9871288537979126, + "737": 1.9906885623931885, + "738": 1.983586311340332, + "739": 2.0142931938171387, + "740": 1.9857032299041748, + "741": 1.9814748764038086, + "742": 2.005915641784668, + "743": 1.9845361709594727, + "744": 2.0007758140563965, + "745": 1.971826195716858, + "746": 1.9957859516143799, + "747": 1.9714488983154297, + "748": 1.9844257831573486, + "749": 1.9937916994094849, + "750": 1.99928879737854, + "751": 1.9672081470489502, + "752": 2.0039079189300537, + "753": 1.97898268699646, + "754": 1.9656238555908203, + "755": 1.9715864658355713 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 7.42268, + 6.49948, + 7.16104, + 7.41768, + 7.21357, + 7.89355, + 7.15434, + 7.81821, + 7.5044, + 6.32989, + 7.54475, + 8.09352, + 8.56652, + 9.3434, + 9.31863, + 9.4061, + 8.82778, + 9.26409, + 9.90959, + 11.5374, + 11.1288, + 11.7519, + 10.4134, + 9.51924, + 8.90009, + 7.39425, + 8.54407, + 10.9851, + 9.00724, + 10.0247, + 9.58894, + 9.33488, + 10.7098, + 11.2989, + 12.1939, + 13.3887, + 11.0126, + 11.0911, + 12.2499, + 11.1076, + 12.6181, + 15.8216, + 12.5568, + 11.1971, + 11.6553, + 11.8044, + 11.6936, + 11.2301, + 13.0337, + 12.3837, + 10.8964, + 11.1805, + 12.657, + 14.6903 + ], + "train_epoch_time": 5.056206703186035, + "train_loss": 1.9774697557925494, + "train_score": 0.41260199953014914, + "val_loss": 2.1022859659862845, + "val_score": 0.3795924221607081 + }, + { + "epoch": 14, + "grad_norm": 0.33583253622055054, + "learning_rate": 0.1546666666666667, + "model_norm": 87.96804809570312, + "step_logs": { + "grad_norm": { + "756": 0.3802046775817871, + "757": 0.3616058826446533, + "758": 0.3936336636543274, + "759": 0.3817594349384308, + "760": 0.37455689907073975, + "761": 0.3480111360549927, + "762": 0.3552054762840271, + "763": 0.39839914441108704, + "764": 0.4005875587463379, + "765": 0.35761916637420654, + "766": 0.3719208240509033, + "767": 0.37680840492248535, + "768": 0.3443419635295868, + "769": 0.3533635139465332, + "770": 0.36023086309432983, + "771": 0.38463178277015686, + "772": 0.3412850499153137, + "773": 0.3631596267223358, + "774": 0.3623299300670624, + "775": 0.3818444013595581, + "776": 0.3888358473777771, + "777": 0.3385566473007202, + "778": 0.3568721115589142, + "779": 0.35211560130119324, + "780": 0.36498215794563293, + "781": 0.35718590021133423, + "782": 0.3571947515010834, + "783": 0.36193642020225525, + "784": 0.3788422644138336, + "785": 0.3368608355522156, + "786": 0.36773037910461426, + "787": 0.31993165612220764, + "788": 0.3490808308124542, + "789": 0.3746925890445709, + "790": 0.36006924510002136, + "791": 0.38169974088668823, + "792": 0.3476676046848297, + "793": 0.3273763954639435, + "794": 0.36559224128723145, + "795": 0.35534340143203735, + "796": 0.3426799774169922, + "797": 0.36438578367233276, + "798": 0.3359687030315399, + "799": 0.3274863064289093, + "800": 0.36267516016960144, + "801": 0.35557401180267334, + "802": 0.3628932535648346, + "803": 0.32415181398391724, + "804": 0.34925830364227295, + "805": 0.3335937559604645, + "806": 0.31899911165237427, + "807": 0.35474878549575806, + "808": 0.3257681429386139, + "809": 0.33583253622055054 + }, + "loss": { + "756": 1.9527702331542969, + "757": 1.967621088027954, + "758": 1.9965500831604004, + "759": 1.9632370471954346, + "760": 1.946903944015503, + "761": 1.9913570880889893, + "762": 1.992281436920166, + "763": 1.9814878702163696, + "764": 1.989046573638916, + "765": 1.962976336479187, + "766": 1.9798290729522705, + "767": 1.9996309280395508, + "768": 1.9602906703948975, + "769": 1.9398014545440674, + "770": 1.99808931350708, + "771": 1.967024803161621, + "772": 1.9892666339874268, + "773": 1.9669721126556396, + "774": 1.9795551300048828, + "775": 1.9853723049163818, + "776": 1.9783544540405273, + "777": 1.9808259010314941, + "778": 1.94935941696167, + "779": 1.975574016571045, + "780": 1.9466999769210815, + "781": 1.955148696899414, + "782": 1.9818899631500244, + "783": 1.9680747985839844, + "784": 2.005983352661133, + "785": 1.954211711883545, + "786": 1.972951054573059, + "787": 1.9691839218139648, + "788": 2.0018749237060547, + "789": 1.9747917652130127, + "790": 1.9653880596160889, + "791": 1.9741907119750977, + "792": 1.9759174585342407, + "793": 1.97254478931427, + "794": 1.9574637413024902, + "795": 1.985666036605835, + "796": 1.9525442123413086, + "797": 1.9861587285995483, + "798": 1.964959740638733, + "799": 1.9495489597320557, + "800": 1.978536605834961, + "801": 1.9767224788665771, + "802": 1.9514148235321045, + "803": 1.954641580581665, + "804": 1.9631741046905518, + "805": 1.962278962135315, + "806": 1.9704556465148926, + "807": 1.972823977470398, + "808": 1.9327316284179688, + "809": 1.9612082242965698 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 13.5088, + 15.0477, + 12.8853, + 13.4708, + 13.8774, + 16.4423, + 15.7903, + 12.484, + 12.3951, + 15.3488, + 14.3129, + 14.0834, + 16.5326, + 15.5351, + 15.3976, + 13.296, + 17.0788, + 14.9143, + 15.0785, + 13.6166, + 13.0849, + 17.2816, + 15.3062, + 15.9339, + 14.6136, + 15.3247, + 15.5335, + 15.0237, + 13.9769, + 17.2215, + 14.5901, + 19.2385, + 16.428, + 14.066, + 15.1592, + 13.5502, + 16.3471, + 18.4048, + 14.6454, + 15.7257, + 16.6274, + 14.9586, + 17.4083, + 18.1781, + 15.0421, + 15.6346, + 14.8181, + 18.6025, + 16.094, + 17.6329, + 19.3637, + 15.6764, + 18.2119, + 17.3891 + ], + "train_epoch_time": 5.056036472320557, + "train_loss": 1.9639772283808572, + "train_score": 0.4159892844589403, + "val_loss": 2.092978860973354, + "val_score": 0.38209044923054225 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:30:41.397832", + "final_model_norm": 87.96804809570312, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:28:56.561042", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 12.606510162353516, + "learning_rate": 4.64e-11, + "model_norm": 87.41987609863281, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 6.298553466796875, + "3": 8.560247421264648, + "4": 17.110065460205078, + "5": 5.777316093444824, + "6": 3.025073289871216, + "7": 4.015471458435059, + "8": 7.4302473068237305, + "9": 4.279419422149658, + "10": 5.940033912658691, + "11": 7.862672805786133, + "12": 61.43458938598633, + "13": 6.147928237915039, + "14": 32.13908767700195, + "15": 4.184981822967529, + "16": 13.011384010314941, + "17": 14.165390014648438, + "18": 13.909735679626465, + "19": 8.554447174072266, + "20": 3.96052622795105, + "21": 18.339412689208984, + "22": 8.036703109741211, + "23": 8.141822814941406, + "24": 25.710947036743164, + "25": 3.3410255908966064, + "26": 12.816083908081055, + "27": 16.520401000976562, + "28": 3.8276913166046143, + "29": 6.947815895080566, + "30": 4.014272212982178, + "31": 4.714061737060547, + "32": 5.406624794006348, + "33": 15.096064567565918, + "34": 6.352755546569824, + "35": 12.0445556640625, + "36": 8.452657699584961, + "37": 7.5752058029174805, + "38": 3.7555744647979736, + "39": 17.629322052001953, + "40": 18.58986473083496, + "41": 3.2410812377929688, + "42": 7.705453872680664, + "43": 18.782581329345703, + "44": 8.247995376586914, + "45": 6.117463111877441, + "46": 3.738041639328003, + "47": 8.339092254638672, + "48": 3.7876646518707275, + "49": 7.20920467376709, + "50": 7.390130043029785, + "51": 6.423828601837158, + "52": 3.014044761657715, + "53": 12.606510162353516 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.7870278358459473, + "3": 3.843776226043701, + "4": 4.448698997497559, + "5": 4.182013988494873, + "6": 3.5125656127929688, + "7": 3.5778086185455322, + "8": 3.8151068687438965, + "9": 4.662734031677246, + "10": 3.905299186706543, + "11": 4.246630668640137, + "12": 4.94174337387085, + "13": 4.868429183959961, + "14": 6.734435081481934, + "15": 3.885507822036743, + "16": 4.395578861236572, + "17": 6.381930828094482, + "18": 4.0087666511535645, + "19": 3.788578987121582, + "20": 3.294801950454712, + "21": 3.505646228790283, + "22": 3.587235450744629, + "23": 3.910609006881714, + "24": 4.010260581970215, + "25": 3.143622398376465, + "26": 4.334235191345215, + "27": 6.767123222351074, + "28": 3.5167510509490967, + "29": 4.420467853546143, + "30": 3.5368971824645996, + "31": 4.082586288452148, + "32": 4.310230255126953, + "33": 6.0619401931762695, + "34": 4.213866233825684, + "35": 3.9019479751586914, + "36": 3.730398416519165, + "37": 4.163652420043945, + "38": 3.4271225929260254, + "39": 4.799983024597168, + "40": 5.101315498352051, + "41": 3.415433883666992, + "42": 4.737327575683594, + "43": 4.968088150024414, + "44": 3.970263957977295, + "45": 3.909759044647217, + "46": 3.664367198944092, + "47": 4.095044136047363, + "48": 3.687072277069092, + "49": 3.828706979751587, + "50": 3.560635566711426, + "51": 3.4306516647338867, + "52": 3.3406014442443848, + "53": 5.411184310913086 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 0.00874362, + 0.00824185, + 0.095459, + 0.0524548, + 0.015196, + 0.125295, + 0.383842, + 0.221893, + 0.0691035, + 0.254607, + 0.110682, + 0.0686917, + 0.00130935, + 0.128805, + 0.0065198, + 0.221851, + 0.0259639, + 0.031805, + 0.0207192, + 0.0517717, + 0.21005, + 0.0104231, + 0.0555398, + 0.0589931, + 0.00606648, + 0.281625, + 0.0263877, + 0.0247949, + 0.240031, + 0.0915739, + 0.219487, + 0.183715, + 0.147451, + 0.0266002, + 0.104413, + 0.0268968, + 0.0522118, + 0.072558, + 0.242984, + 0.0154443, + 0.0147615, + 0.325137, + 0.0797879, + 0.0140825, + 0.058361, + 0.104474, + 0.262247, + 0.0588872, + 0.257003, + 0.0736677, + 0.0651964, + 0.0831359, + 0.367727, + 0.0340489 + ], + "train_epoch_time": 5.057607889175415, + "train_loss": 3.488603684960342, + "train_score": 0.15680595404619466, + "val_loss": 3.5175707800106393, + "val_score": 0.15275635051056657 + }, + { + "epoch": 1, + "grad_norm": 2.086177110671997, + "learning_rate": 0.464, + "model_norm": 87.38078308105469, + "step_logs": { + "grad_norm": { + "54": 7.211445331573486, + "55": 5.573832988739014, + "56": 4.934818744659424, + "57": 2.683844566345215, + "58": 3.7610249519348145, + "59": 9.699939727783203, + "60": 6.837521076202393, + "61": 8.089282989501953, + "62": 5.719318389892578, + "63": 6.253697872161865, + "64": 3.220794916152954, + "65": 7.582128047943115, + "66": 11.214761734008789, + "67": 3.2234272956848145, + "68": 6.003808498382568, + "69": 2.8221869468688965, + "70": 5.528847694396973, + "71": 4.349385738372803, + "72": 5.863676071166992, + "73": 4.172426700592041, + "74": 9.085822105407715, + "75": 2.6077518463134766, + "76": 12.66690731048584, + "77": 3.5533547401428223, + "78": 3.201880693435669, + "79": 7.03429651260376, + "80": 4.152105331420898, + "81": 4.5799384117126465, + "82": 3.683461904525757, + "83": 19.63254737854004, + "84": 6.914511203765869, + "85": 2.5504748821258545, + "86": 8.619253158569336, + "87": 3.1781845092773438, + "88": 6.734860897064209, + "89": 2.6362171173095703, + "90": 7.103269100189209, + "91": 3.6764862537384033, + "92": 4.495292663574219, + "93": 2.966806411743164, + "94": 7.526096343994141, + "95": 2.2169010639190674, + "96": 9.367757797241211, + "97": 2.8542661666870117, + "98": 3.140660285949707, + "99": 3.3742315769195557, + "100": 4.749008655548096, + "101": 3.7978298664093018, + "102": 4.022359848022461, + "103": 2.3437440395355225, + "104": 3.0056989192962646, + "105": 5.0609869956970215, + "106": 3.900080442428589, + "107": 2.086177110671997 + }, + "loss": { + "54": 3.4821786880493164, + "55": 3.524817943572998, + "56": 3.4969308376312256, + "57": 2.9422590732574463, + "58": 3.396406888961792, + "59": 4.774542331695557, + "60": 4.1636199951171875, + "61": 5.306468963623047, + "62": 4.275067329406738, + "63": 3.852780342102051, + "64": 3.3348326683044434, + "65": 4.176594257354736, + "66": 4.1349711418151855, + "67": 3.3686180114746094, + "68": 4.030369281768799, + "69": 3.394491195678711, + "70": 4.017953395843506, + "71": 3.5812599658966064, + "72": 3.936899185180664, + "73": 3.600769281387329, + "74": 4.130125522613525, + "75": 3.312443733215332, + "76": 5.475100517272949, + "77": 3.4123334884643555, + "78": 3.7659895420074463, + "79": 4.2888641357421875, + "80": 3.751763343811035, + "81": 3.5230894088745117, + "82": 3.8292384147644043, + "83": 8.19796371459961, + "84": 4.1747541427612305, + "85": 3.5090599060058594, + "86": 4.884117126464844, + "87": 3.2893052101135254, + "88": 4.006354331970215, + "89": 3.4439144134521484, + "90": 4.2537126541137695, + "91": 3.4454574584960938, + "92": 3.654297351837158, + "93": 3.37580943107605, + "94": 5.030043601989746, + "95": 3.084895372390747, + "96": 5.440011978149414, + "97": 3.31675124168396, + "98": 3.6908421516418457, + "99": 3.5664219856262207, + "100": 4.165191650390625, + "101": 3.666332721710205, + "102": 3.603498697280884, + "103": 3.365772247314453, + "104": 3.3738274574279785, + "105": 3.901677370071411, + "106": 3.3689045906066895, + "107": 3.2488253116607666 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.0669586, + 0.113456, + 0.143597, + 0.408476, + 0.240108, + 0.0507451, + 0.0890582, + 0.0810934, + 0.130694, + 0.0985146, + 0.321476, + 0.0726507, + 0.032877, + 0.324202, + 0.111813, + 0.42619, + 0.131442, + 0.189313, + 0.114502, + 0.206832, + 0.0500305, + 0.487098, + 0.0341233, + 0.270255, + 0.367341, + 0.0866764, + 0.21762, + 0.167959, + 0.282228, + 0.0212693, + 0.0873188, + 0.539447, + 0.0657426, + 0.325646, + 0.0883267, + 0.495553, + 0.0843047, + 0.254907, + 0.180837, + 0.38353, + 0.0888039, + 0.627694, + 0.061991, + 0.407121, + 0.374183, + 0.313244, + 0.184684, + 0.254191, + 0.222722, + 0.612723, + 0.37345, + 0.152328, + 0.221484, + 0.74649 + ], + "train_epoch_time": 5.054611682891846, + "train_loss": 3.7866450488994934, + "train_score": 0.16468010223236795, + "val_loss": 3.796820852430762, + "val_score": 0.16142096041127543 + }, + { + "epoch": 2, + "grad_norm": 1.5617152452468872, + "learning_rate": 0.464, + "model_norm": 87.29520416259766, + "step_logs": { + "grad_norm": { + "108": 5.174774646759033, + "109": 4.12007999420166, + "110": 1.7303886413574219, + "111": 4.427254676818848, + "112": 2.8291821479797363, + "113": 4.789940357208252, + "114": 2.6759793758392334, + "115": 5.9315972328186035, + "116": 2.50423002243042, + "117": 2.4670097827911377, + "118": 2.942711353302002, + "119": 3.0502896308898926, + "120": 3.9882960319519043, + "121": 3.0690927505493164, + "122": 8.91759204864502, + "123": 2.70465350151062, + "124": 7.68349027633667, + "125": 4.860230922698975, + "126": 4.693564414978027, + "127": 2.9068589210510254, + "128": 3.3190314769744873, + "129": 3.3391661643981934, + "130": 3.1243960857391357, + "131": 3.5345096588134766, + "132": 2.4333200454711914, + "133": 3.634084463119507, + "134": 2.0293712615966797, + "135": 1.925835371017456, + "136": 2.5134263038635254, + "137": 2.383866786956787, + "138": 2.1264994144439697, + "139": 2.248568296432495, + "140": 3.6036765575408936, + "141": 2.16681170463562, + "142": 3.3844339847564697, + "143": 2.4891295433044434, + "144": 4.069764614105225, + "145": 4.275892734527588, + "146": 1.7504932880401611, + "147": 3.4661357402801514, + "148": 1.5262902975082397, + "149": 3.0400474071502686, + "150": 1.547815203666687, + "151": 1.8305917978286743, + "152": 1.9314154386520386, + "153": 2.2506613731384277, + "154": 3.288557291030884, + "155": 1.6452332735061646, + "156": 1.8329997062683105, + "157": 2.162513256072998, + "158": 1.7185066938400269, + "159": 2.794290542602539, + "160": 1.88178551197052, + "161": 1.5617152452468872 + }, + "loss": { + "108": 3.784679412841797, + "109": 3.881744146347046, + "110": 2.9859018325805664, + "111": 3.584927558898926, + "112": 3.4188284873962402, + "113": 3.917698383331299, + "114": 3.1544692516326904, + "115": 4.037147521972656, + "116": 3.2700865268707275, + "117": 3.2213311195373535, + "118": 3.755624771118164, + "119": 3.42659592628479, + "120": 3.9031686782836914, + "121": 3.338649272918701, + "122": 4.3641157150268555, + "123": 3.478846549987793, + "124": 4.221761703491211, + "125": 3.5050649642944336, + "126": 3.3685100078582764, + "127": 3.173963785171509, + "128": 3.6466803550720215, + "129": 3.533693552017212, + "130": 3.632357597351074, + "131": 3.740203380584717, + "132": 3.5694665908813477, + "133": 3.4836928844451904, + "134": 3.4337148666381836, + "135": 2.996690273284912, + "136": 3.208099842071533, + "137": 3.6351749897003174, + "138": 3.281034469604492, + "139": 3.291280746459961, + "140": 3.620832920074463, + "141": 3.2909412384033203, + "142": 3.167038917541504, + "143": 3.306180477142334, + "144": 3.371760845184326, + "145": 3.1871633529663086, + "146": 3.0126659870147705, + "147": 3.1859991550445557, + "148": 2.9950218200683594, + "149": 3.101545810699463, + "150": 3.1349854469299316, + "151": 2.9979517459869385, + "152": 3.1339712142944336, + "153": 3.026371955871582, + "154": 3.495938301086426, + "155": 3.0939064025878906, + "156": 2.956056594848633, + "157": 3.3256912231445312, + "158": 3.0154240131378174, + "159": 3.2236666679382324, + "160": 3.375075101852417, + "161": 2.961386203765869 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.141334, + 0.228673, + 0.997214, + 0.182899, + 0.427126, + 0.170754, + 0.440515, + 0.114744, + 0.521448, + 0.52929, + 0.433697, + 0.368282, + 0.245382, + 0.354447, + 0.0548784, + 0.475567, + 0.0715116, + 0.148382, + 0.152909, + 0.375625, + 0.331036, + 0.316922, + 0.372097, + 0.29939, + 0.602844, + 0.263785, + 0.83376, + 0.807985, + 0.507827, + 0.639678, + 0.725572, + 0.650958, + 0.278815, + 0.700935, + 0.276491, + 0.533619, + 0.203572, + 0.174321, + 0.983173, + 0.265188, + 1.28566, + 0.335597, + 1.30857, + 0.894626, + 0.840124, + 0.597451, + 0.323261, + 1.14302, + 0.879808, + 0.711155, + 1.02105, + 0.412864, + 0.95311, + 1.2142 + ], + "train_epoch_time": 5.058100938796997, + "train_loss": 2.9174963414070425, + "train_score": 0.2051638719255647, + "val_loss": 2.934233468107186, + "val_score": 0.20034891656085627 + }, + { + "epoch": 3, + "grad_norm": 0.8685116171836853, + "learning_rate": 0.464, + "model_norm": 87.30843353271484, + "step_logs": { + "grad_norm": { + "162": 1.3233016729354858, + "163": 1.3805890083312988, + "164": 1.3771088123321533, + "165": 1.619173288345337, + "166": 2.123507261276245, + "167": 1.580917239189148, + "168": 1.569885492324829, + "169": 1.4523284435272217, + "170": 1.5622698068618774, + "171": 1.8772118091583252, + "172": 1.3246896266937256, + "173": 1.1375446319580078, + "174": 1.2851296663284302, + "175": 1.437145709991455, + "176": 1.4072016477584839, + "177": 1.1651967763900757, + "178": 1.0694130659103394, + "179": 1.033494472503662, + "180": 1.160187840461731, + "181": 1.1136846542358398, + "182": 0.9990133047103882, + "183": 1.0658434629440308, + "184": 1.4890798330307007, + "185": 1.3053770065307617, + "186": 0.9689182639122009, + "187": 0.8400119543075562, + "188": 1.236419439315796, + "189": 1.0966224670410156, + "190": 0.8431130647659302, + "191": 0.8443288803100586, + "192": 1.1127458810806274, + "193": 0.9202256202697754, + "194": 0.6413537263870239, + "195": 0.7446667551994324, + "196": 0.9289188981056213, + "197": 1.1127568483352661, + "198": 1.0037789344787598, + "199": 0.8704903721809387, + "200": 0.8721373081207275, + "201": 0.9219102263450623, + "202": 0.8627682328224182, + "203": 0.8936599493026733, + "204": 0.9451618194580078, + "205": 1.1369582414627075, + "206": 0.9595654606819153, + "207": 0.6201716065406799, + "208": 0.6199973821640015, + "209": 0.748771071434021, + "210": 0.7995222210884094, + "211": 0.7926169037818909, + "212": 0.8660521507263184, + "213": 0.861849844455719, + "214": 0.9272751212120056, + "215": 0.8685116171836853 + }, + "loss": { + "162": 2.907710552215576, + "163": 2.7727720737457275, + "164": 2.9045190811157227, + "165": 2.775491237640381, + "166": 3.0231313705444336, + "167": 3.0611536502838135, + "168": 2.847963333129883, + "169": 2.873194694519043, + "170": 2.823009490966797, + "171": 2.9426791667938232, + "172": 2.990231513977051, + "173": 2.751009702682495, + "174": 2.726351737976074, + "175": 2.8155994415283203, + "176": 2.7724971771240234, + "177": 2.7551820278167725, + "178": 2.71229887008667, + "179": 2.6968798637390137, + "180": 2.657564878463745, + "181": 2.7476954460144043, + "182": 2.6545183658599854, + "183": 2.6813511848449707, + "184": 2.711733818054199, + "185": 2.845114231109619, + "186": 2.71380877494812, + "187": 2.6293625831604004, + "188": 2.6584177017211914, + "189": 2.7791028022766113, + "190": 2.6139655113220215, + "191": 2.6295032501220703, + "192": 2.6224162578582764, + "193": 2.7156434059143066, + "194": 2.561741352081299, + "195": 2.58410382270813, + "196": 2.5837948322296143, + "197": 2.663771867752075, + "198": 2.6909401416778564, + "199": 2.612114906311035, + "200": 2.613621711730957, + "201": 2.598762035369873, + "202": 2.6329920291900635, + "203": 2.597872257232666, + "204": 2.628574848175049, + "205": 2.6163954734802246, + "206": 2.721754312515259, + "207": 2.550924777984619, + "208": 2.541594982147217, + "209": 2.575082540512085, + "210": 2.5971484184265137, + "211": 2.5586647987365723, + "212": 2.586764097213745, + "213": 2.5999419689178467, + "214": 2.6009154319763184, + "215": 2.617593765258789 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 1.66048, + 1.45474, + 1.53157, + 1.05865, + 0.670424, + 1.2248, + 1.15558, + 1.36218, + 1.15665, + 0.835057, + 1.70403, + 2.12596, + 1.65078, + 1.36323, + 1.4001, + 2.02933, + 2.37163, + 2.52491, + 1.97436, + 2.21536, + 2.65976, + 2.3603, + 1.22296, + 1.66966, + 2.89071, + 3.72631, + 1.73897, + 2.31095, + 3.67729, + 3.68851, + 2.11792, + 3.20689, + 6.22788, + 4.66, + 2.99435, + 2.15128, + 2.67072, + 3.44718, + 3.43616, + 3.05766, + 3.53721, + 3.25292, + 2.94244, + 2.02402, + 2.95597, + 6.63245, + 6.61191, + 4.59296, + 4.0629, + 4.07274, + 3.44881, + 3.50026, + 3.02489, + 3.47017 + ], + "train_epoch_time": 5.055733919143677, + "train_loss": 2.5681115788743005, + "train_score": 0.253008428972797, + "val_loss": 2.6080097133886118, + "val_score": 0.24912098152766135 + }, + { + "epoch": 4, + "grad_norm": 0.6302034258842468, + "learning_rate": 0.464, + "model_norm": 87.34224700927734, + "step_logs": { + "grad_norm": { + "216": 0.8106901049613953, + "217": 0.8104560971260071, + "218": 1.0373423099517822, + "219": 1.0504659414291382, + "220": 0.9251309633255005, + "221": 0.8067808747291565, + "222": 0.7914109826087952, + "223": 0.853254497051239, + "224": 0.9180495142936707, + "225": 0.9719438552856445, + "226": 0.9364591836929321, + "227": 0.9152469038963318, + "228": 0.7902749180793762, + "229": 0.8063154816627502, + "230": 0.8938575387001038, + "231": 0.8603021502494812, + "232": 0.7564916014671326, + "233": 0.7629519701004028, + "234": 0.8521064519882202, + "235": 0.8194237947463989, + "236": 0.765617847442627, + "237": 0.7324422001838684, + "238": 0.7703807950019836, + "239": 0.826975405216217, + "240": 0.8410595059394836, + "241": 0.7533900141716003, + "242": 0.7005122303962708, + "243": 0.7487949728965759, + "244": 0.7345021963119507, + "245": 0.7100911140441895, + "246": 0.7654435634613037, + "247": 0.7817350029945374, + "248": 0.7169834971427917, + "249": 0.6669204235076904, + "250": 0.7327661514282227, + "251": 0.7401351928710938, + "252": 0.6776663064956665, + "253": 0.7443811297416687, + "254": 0.8012900948524475, + "255": 0.8181213736534119, + "256": 0.7857967019081116, + "257": 0.8760724663734436, + "258": 0.9442354440689087, + "259": 0.9672228097915649, + "260": 0.8491316437721252, + "261": 0.7369986176490784, + "262": 0.7399956583976746, + "263": 0.9814790487289429, + "264": 0.8665817379951477, + "265": 0.6729984879493713, + "266": 0.7656182050704956, + "267": 0.9237598180770874, + "268": 0.8459947109222412, + "269": 0.6302034258842468 + }, + "loss": { + "216": 2.5703470706939697, + "217": 2.5824482440948486, + "218": 2.554396867752075, + "219": 2.7196297645568848, + "220": 2.61134934425354, + "221": 2.6048154830932617, + "222": 2.568915367126465, + "223": 2.5954771041870117, + "224": 2.5981552600860596, + "225": 2.6149373054504395, + "226": 2.5968315601348877, + "227": 2.6212663650512695, + "228": 2.5800042152404785, + "229": 2.5684704780578613, + "230": 2.579496383666992, + "231": 2.6058897972106934, + "232": 2.5478274822235107, + "233": 2.5617589950561523, + "234": 2.5712289810180664, + "235": 2.5806498527526855, + "236": 2.5696377754211426, + "237": 2.5710458755493164, + "238": 2.544456720352173, + "239": 2.575880527496338, + "240": 2.568362236022949, + "241": 2.5800046920776367, + "242": 2.527122974395752, + "243": 2.558391809463501, + "244": 2.5342559814453125, + "245": 2.5380027294158936, + "246": 2.529360771179199, + "247": 2.554863929748535, + "248": 2.557806968688965, + "249": 2.543830394744873, + "250": 2.5246176719665527, + "251": 2.567251682281494, + "252": 2.5534110069274902, + "253": 2.5447916984558105, + "254": 2.54054594039917, + "255": 2.5529799461364746, + "256": 2.547912836074829, + "257": 2.5530014038085938, + "258": 2.5859453678131104, + "259": 2.5768985748291016, + "260": 2.582186698913574, + "261": 2.5333921909332275, + "262": 2.5380496978759766, + "263": 2.562659740447998, + "264": 2.633817672729492, + "265": 2.5268964767456055, + "266": 2.5306854248046875, + "267": 2.561985492706299, + "268": 2.577991008758545, + "269": 2.5246875286102295 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 3.91095, + 3.93163, + 2.3738, + 2.4646, + 3.05111, + 4.0019, + 4.10153, + 3.565, + 3.08271, + 2.76808, + 2.96119, + 3.12921, + 4.13108, + 3.95061, + 3.22848, + 3.5209, + 4.45207, + 4.40092, + 3.54122, + 3.84337, + 4.38377, + 4.79251, + 4.2873, + 3.76652, + 3.6308, + 4.54549, + 5.14985, + 4.5629, + 4.69747, + 5.03343, + 4.31702, + 4.1807, + 4.97565, + 5.71926, + 4.70181, + 4.68648, + 5.56018, + 4.59263, + 3.95683, + 3.81427, + 4.12633, + 3.32637, + 2.90041, + 2.75451, + 3.58127, + 4.66411, + 4.63491, + 2.66029, + 3.50725, + 5.57904, + 4.31731, + 3.00233, + 3.60202, + 6.35691 + ], + "train_epoch_time": 5.055838584899902, + "train_loss": 2.519261949407832, + "train_score": 0.2481068418306463, + "val_loss": 2.5630368206449, + "val_score": 0.2398733499916495 + }, + { + "epoch": 5, + "grad_norm": 0.6994946599006653, + "learning_rate": 0.464, + "model_norm": 87.3882827758789, + "step_logs": { + "grad_norm": { + "270": 0.6422821283340454, + "271": 0.7214401960372925, + "272": 0.693167507648468, + "273": 0.7043965458869934, + "274": 0.8024903535842896, + "275": 0.9068609476089478, + "276": 1.0002779960632324, + "277": 1.037976622581482, + "278": 0.9052839279174805, + "279": 0.6759456396102905, + "280": 0.547254204750061, + "281": 0.5854243636131287, + "282": 0.6983444690704346, + "283": 0.8034915328025818, + "284": 0.8466963768005371, + "285": 0.7880759239196777, + "286": 0.7148194909095764, + "287": 0.7774625420570374, + "288": 0.7428817749023438, + "289": 0.6522318124771118, + "290": 0.7207713723182678, + "291": 0.7975194454193115, + "292": 0.827538251876831, + "293": 0.7705461978912354, + "294": 0.7459602952003479, + "295": 0.7716484665870667, + "296": 0.7719955444335938, + "297": 0.8776944875717163, + "298": 0.8854125738143921, + "299": 0.7333412170410156, + "300": 0.6780964732170105, + "301": 0.6807690262794495, + "302": 0.7484579682350159, + "303": 0.8476479649543762, + "304": 0.823849618434906, + "305": 0.8078522086143494, + "306": 0.7772826552391052, + "307": 0.7639259696006775, + "308": 0.7532915472984314, + "309": 0.881880521774292, + "310": 0.8273184299468994, + "311": 0.7544909119606018, + "312": 0.8302048444747925, + "313": 1.113097071647644, + "314": 0.9231336712837219, + "315": 0.7493406534194946, + "316": 0.7301244139671326, + "317": 0.8729099631309509, + "318": 1.2465481758117676, + "319": 1.1864063739776611, + "320": 0.7233800888061523, + "321": 0.608873724937439, + "322": 0.6392090916633606, + "323": 0.6994946599006653 + }, + "loss": { + "270": 2.524271011352539, + "271": 2.520902156829834, + "272": 2.5403218269348145, + "273": 2.517536163330078, + "274": 2.538515090942383, + "275": 2.53365421295166, + "276": 2.586578845977783, + "277": 2.5857181549072266, + "278": 2.5882375240325928, + "279": 2.5453941822052, + "280": 2.4840569496154785, + "281": 2.501779079437256, + "282": 2.5191338062286377, + "283": 2.5484871864318848, + "284": 2.5336203575134277, + "285": 2.53934907913208, + "286": 2.531764030456543, + "287": 2.528679847717285, + "288": 2.536978006362915, + "289": 2.4981017112731934, + "290": 2.5091042518615723, + "291": 2.516425132751465, + "292": 2.519143581390381, + "293": 2.5180392265319824, + "294": 2.520934581756592, + "295": 2.5044312477111816, + "296": 2.5160160064697266, + "297": 2.5210094451904297, + "298": 2.557377338409424, + "299": 2.507169246673584, + "300": 2.495558977127075, + "301": 2.4843595027923584, + "302": 2.5111827850341797, + "303": 2.4824366569519043, + "304": 2.533926486968994, + "305": 2.4826643466949463, + "306": 2.5055675506591797, + "307": 2.4421491622924805, + "308": 2.5059823989868164, + "309": 2.4810914993286133, + "310": 2.5351505279541016, + "311": 2.4602432250976562, + "312": 2.486154317855835, + "313": 2.48165225982666, + "314": 2.5677247047424316, + "315": 2.4849119186401367, + "316": 2.4324758052825928, + "317": 2.478590726852417, + "318": 2.515430450439453, + "319": 2.5803279876708984, + "320": 2.528080940246582, + "321": 2.4399001598358154, + "322": 2.4211602210998535, + "323": 2.42375111579895 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 6.11905, + 4.84346, + 5.28704, + 5.07389, + 3.94185, + 3.08082, + 2.58514, + 2.39997, + 3.15816, + 5.57098, + 8.29437, + 7.29974, + 5.16549, + 3.94748, + 3.53416, + 4.08871, + 4.95485, + 4.18345, + 4.59703, + 5.87227, + 4.82974, + 3.95641, + 3.67855, + 4.24097, + 4.53033, + 4.20601, + 4.22167, + 3.27256, + 3.26215, + 4.66199, + 5.42731, + 5.36062, + 4.48274, + 3.45499, + 3.73334, + 3.80412, + 4.14713, + 4.18475, + 4.41623, + 3.19024, + 3.70389, + 4.32185, + 3.60709, + 2.00297, + 3.01314, + 4.4254, + 4.56305, + 3.25286, + 1.6188, + 1.83319, + 4.83123, + 6.58139, + 5.92567, + 4.95358 + ], + "train_epoch_time": 5.056430101394653, + "train_loss": 2.4404090834826957, + "train_score": 0.28100789106388174, + "val_loss": 2.4834488711045064, + "val_score": 0.27292623479522327 + }, + { + "epoch": 6, + "grad_norm": 0.785673975944519, + "learning_rate": 0.464, + "model_norm": 87.4376449584961, + "step_logs": { + "grad_norm": { + "324": 0.8277546763420105, + "325": 1.213570475578308, + "326": 1.0964643955230713, + "327": 0.9084147810935974, + "328": 0.7132125496864319, + "329": 0.6408405900001526, + "330": 0.6848652958869934, + "331": 0.9890743494033813, + "332": 0.9521641135215759, + "333": 1.1928578615188599, + "334": 1.180679202079773, + "335": 0.6317273378372192, + "336": 0.5312169194221497, + "337": 0.5352923274040222, + "338": 0.5947969555854797, + "339": 0.8144198060035706, + "340": 0.6884922981262207, + "341": 0.6703402996063232, + "342": 0.7199878692626953, + "343": 1.0107344388961792, + "344": 1.02140212059021, + "345": 0.8379251956939697, + "346": 0.817858099937439, + "347": 1.0277626514434814, + "348": 0.7688915133476257, + "349": 0.8017452359199524, + "350": 0.9028322100639343, + "351": 0.9798122048377991, + "352": 0.9224452376365662, + "353": 0.8207762837409973, + "354": 0.9562183618545532, + "355": 0.9558596611022949, + "356": 0.9506771564483643, + "357": 1.0603376626968384, + "358": 0.9694778919219971, + "359": 0.8912068605422974, + "360": 1.179460048675537, + "361": 0.6402373909950256, + "362": 0.626430332660675, + "363": 0.7695870995521545, + "364": 0.8449531197547913, + "365": 1.1801550388336182, + "366": 0.9892183542251587, + "367": 0.7648640275001526, + "368": 0.761760950088501, + "369": 0.8548610210418701, + "370": 0.8236075639724731, + "371": 0.8261203765869141, + "372": 0.7852128148078918, + "373": 0.7419406175613403, + "374": 0.8111518621444702, + "375": 0.8819378018379211, + "376": 0.8994942307472229, + "377": 0.785673975944519 + }, + "loss": { + "324": 2.442108392715454, + "325": 2.5207409858703613, + "326": 2.569349765777588, + "327": 2.5354785919189453, + "328": 2.456895351409912, + "329": 2.42048978805542, + "330": 2.42295503616333, + "331": 2.43660306930542, + "332": 2.505573272705078, + "333": 2.4801454544067383, + "334": 2.560189723968506, + "335": 2.490452766418457, + "336": 2.4108235836029053, + "337": 2.378988027572632, + "338": 2.434278964996338, + "339": 2.403108596801758, + "340": 2.4642446041107178, + "341": 2.401123523712158, + "342": 2.3979384899139404, + "343": 2.399630308151245, + "344": 2.5165722370147705, + "345": 2.410550594329834, + "346": 2.409177780151367, + "347": 2.420248031616211, + "348": 2.4806342124938965, + "349": 2.4206464290618896, + "350": 2.42093825340271, + "351": 2.45137882232666, + "352": 2.461182117462158, + "353": 2.4123477935791016, + "354": 2.421826124191284, + "355": 2.464864730834961, + "356": 2.4547338485717773, + "357": 2.45949125289917, + "358": 2.4851531982421875, + "359": 2.4057514667510986, + "360": 2.4261910915374756, + "361": 2.434016227722168, + "362": 2.4205543994903564, + "363": 2.4001364707946777, + "364": 2.421926975250244, + "365": 2.4282093048095703, + "366": 2.5044355392456055, + "367": 2.409379482269287, + "368": 2.3911569118499756, + "369": 2.3991599082946777, + "370": 2.4428553581237793, + "371": 2.378005266189575, + "372": 2.4198951721191406, + "373": 2.378296136856079, + "374": 2.3974616527557373, + "375": 2.3770086765289307, + "376": 2.4335920810699463, + "377": 2.395308494567871 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 3.5642, + 1.71158, + 2.13715, + 3.0725, + 4.83002, + 5.89391, + 5.16577, + 2.49073, + 2.76365, + 1.74301, + 1.83657, + 6.24049, + 8.54322, + 8.30253, + 6.8807, + 3.62307, + 5.19859, + 5.34348, + 4.62581, + 2.34893, + 2.41221, + 3.43325, + 3.60174, + 2.29126, + 4.19597, + 3.76581, + 2.97009, + 2.55343, + 2.89243, + 3.58088, + 2.64868, + 2.69777, + 2.71605, + 2.18754, + 2.6441, + 3.02896, + 1.74405, + 5.93802, + 6.16835, + 4.05248, + 3.39231, + 1.74344, + 2.55933, + 4.11848, + 4.1207, + 3.28298, + 3.60128, + 3.48439, + 3.92484, + 4.32044, + 3.64374, + 3.05601, + 3.00781, + 3.8804 + ], + "train_epoch_time": 5.060168266296387, + "train_loss": 2.364733493276785, + "train_score": 0.2980855452278936, + "val_loss": 2.402719494943367, + "val_score": 0.2901478194205003 + }, + { + "epoch": 7, + "grad_norm": 0.7446780800819397, + "learning_rate": 0.464, + "model_norm": 87.49325561523438, + "step_logs": { + "grad_norm": { + "378": 0.7412540912628174, + "379": 0.6758706569671631, + "380": 0.6203067898750305, + "381": 0.7059546709060669, + "382": 0.8634834289550781, + "383": 1.4178122282028198, + "384": 0.8562566637992859, + "385": 0.9920222759246826, + "386": 0.8814330697059631, + "387": 0.8397749066352844, + "388": 0.8395906686782837, + "389": 0.9221208095550537, + "390": 1.078867793083191, + "391": 0.937557578086853, + "392": 0.906349778175354, + "393": 0.9906315803527832, + "394": 0.9654544591903687, + "395": 1.0302199125289917, + "396": 0.9993744492530823, + "397": 0.7764483094215393, + "398": 0.7187926173210144, + "399": 0.835242748260498, + "400": 0.7996591925621033, + "401": 0.7218107581138611, + "402": 0.7053585052490234, + "403": 0.6843826174736023, + "404": 0.7178877592086792, + "405": 0.8363547921180725, + "406": 0.8745712041854858, + "407": 1.0696005821228027, + "408": 0.8717305660247803, + "409": 0.7696921229362488, + "410": 0.7357020378112793, + "411": 0.762376606464386, + "412": 0.7743775248527527, + "413": 0.7805383801460266, + "414": 0.7597991824150085, + "415": 0.8127756118774414, + "416": 0.8563180565834045, + "417": 0.8782772421836853, + "418": 1.390513300895691, + "419": 0.7838559150695801, + "420": 0.9620813727378845, + "421": 1.0983083248138428, + "422": 1.0849192142486572, + "423": 0.8763146996498108, + "424": 0.8500553965568542, + "425": 0.8986727595329285, + "426": 0.9238671064376831, + "427": 0.9122262001037598, + "428": 0.8475171327590942, + "429": 0.7676783204078674, + "430": 0.7072240114212036, + "431": 0.7446780800819397 + }, + "loss": { + "378": 2.3820009231567383, + "379": 2.3586697578430176, + "380": 2.3406410217285156, + "381": 2.327718496322632, + "382": 2.3691883087158203, + "383": 2.4289052486419678, + "384": 2.467930793762207, + "385": 2.4489784240722656, + "386": 2.4793336391448975, + "387": 2.3886806964874268, + "388": 2.3632330894470215, + "389": 2.3746049404144287, + "390": 2.461280345916748, + "391": 2.40838623046875, + "392": 2.376873016357422, + "393": 2.3735203742980957, + "394": 2.41024112701416, + "395": 2.407980442047119, + "396": 2.4014384746551514, + "397": 2.386277675628662, + "398": 2.3386921882629395, + "399": 2.3585050106048584, + "400": 2.3627965450286865, + "401": 2.3423075675964355, + "402": 2.3450348377227783, + "403": 2.3341221809387207, + "404": 2.3485474586486816, + "405": 2.317923069000244, + "406": 2.37357497215271, + "407": 2.362143039703369, + "408": 2.440795660018921, + "409": 2.328968048095703, + "410": 2.3197102546691895, + "411": 2.3233275413513184, + "412": 2.3452911376953125, + "413": 2.3078713417053223, + "414": 2.3342909812927246, + "415": 2.317479372024536, + "416": 2.360175609588623, + "417": 2.350813865661621, + "418": 2.350499391555786, + "419": 2.428170919418335, + "420": 2.407823085784912, + "421": 2.434412956237793, + "422": 2.4489293098449707, + "423": 2.3792316913604736, + "424": 2.3394975662231445, + "425": 2.3434176445007324, + "426": 2.340834140777588, + "427": 2.367696762084961, + "428": 2.3365559577941895, + "429": 2.338250160217285, + "430": 2.304725408554077, + "431": 2.3104665279388428 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 4.33519, + 5.16345, + 6.08306, + 4.67064, + 3.17754, + 1.2083, + 3.36608, + 2.48853, + 3.19122, + 3.38713, + 3.35252, + 2.79264, + 2.11458, + 2.73987, + 2.89344, + 2.41863, + 2.58581, + 2.26878, + 2.40445, + 3.95818, + 4.52653, + 3.38074, + 3.69502, + 4.4957, + 4.71335, + 4.9834, + 4.55708, + 3.31374, + 3.10322, + 2.06473, + 3.21194, + 3.93124, + 4.28578, + 3.99734, + 3.91103, + 3.78811, + 4.0435, + 3.50812, + 3.21865, + 3.04758, + 1.21565, + 3.95191, + 2.60136, + 2.01811, + 2.08057, + 3.09825, + 3.23764, + 2.90166, + 2.74253, + 2.84525, + 3.25296, + 3.96764, + 4.60792, + 4.16642 + ], + "train_epoch_time": 5.05716609954834, + "train_loss": 2.3124927835450797, + "train_score": 0.3259079089636782, + "val_loss": 2.3690079454713246, + "val_score": 0.31208291471894106 + }, + { + "epoch": 8, + "grad_norm": 0.6871085166931152, + "learning_rate": 0.464, + "model_norm": 87.5499496459961, + "step_logs": { + "grad_norm": { + "432": 0.8406437039375305, + "433": 0.9106219410896301, + "434": 0.8679929375648499, + "435": 0.828254759311676, + "436": 0.8065989017486572, + "437": 0.7938262820243835, + "438": 0.7247353792190552, + "439": 0.70723956823349, + "440": 0.6874766945838928, + "441": 0.7215283513069153, + "442": 0.7163143157958984, + "443": 0.6770814061164856, + "444": 0.6427270174026489, + "445": 0.7105298638343811, + "446": 1.0835726261138916, + "447": 0.8117125034332275, + "448": 0.7321748733520508, + "449": 0.7523022890090942, + "450": 0.858869731426239, + "451": 0.8754204511642456, + "452": 0.8608382940292358, + "453": 0.8122220039367676, + "454": 0.6952657103538513, + "455": 0.6551951766014099, + "456": 0.7092158198356628, + "457": 0.7777563333511353, + "458": 0.8984978795051575, + "459": 0.9228598475456238, + "460": 0.8857734799385071, + "461": 0.8402599692344666, + "462": 0.8428342342376709, + "463": 0.9065468311309814, + "464": 0.8301165699958801, + "465": 0.7544244527816772, + "466": 0.7236720323562622, + "467": 0.7239236235618591, + "468": 0.8174001574516296, + "469": 1.2561126947402954, + "470": 1.3807685375213623, + "471": 1.4568482637405396, + "472": 1.2447764873504639, + "473": 1.0912258625030518, + "474": 1.0666472911834717, + "475": 1.0848174095153809, + "476": 1.3703484535217285, + "477": 1.0738643407821655, + "478": 0.8555451035499573, + "479": 0.817487895488739, + "480": 0.7386631965637207, + "481": 0.6398859620094299, + "482": 0.6407454013824463, + "483": 0.6218218803405762, + "484": 0.6573324203491211, + "485": 0.6871085166931152 + }, + "loss": { + "432": 2.3365721702575684, + "433": 2.3326334953308105, + "434": 2.3443045616149902, + "435": 2.3600823879241943, + "436": 2.309640407562256, + "437": 2.3429317474365234, + "438": 2.257517099380493, + "439": 2.299229621887207, + "440": 2.279670000076294, + "441": 2.2828431129455566, + "442": 2.309788703918457, + "443": 2.2910618782043457, + "444": 2.296117067337036, + "445": 2.292524814605713, + "446": 2.2985317707061768, + "447": 2.385573387145996, + "448": 2.2994182109832764, + "449": 2.3080403804779053, + "450": 2.2937612533569336, + "451": 2.316946029663086, + "452": 2.3000221252441406, + "453": 2.3059005737304688, + "454": 2.2904603481292725, + "455": 2.2879676818847656, + "456": 2.2558422088623047, + "457": 2.3028252124786377, + "458": 2.27492094039917, + "459": 2.3308212757110596, + "460": 2.2834296226501465, + "461": 2.319279670715332, + "462": 2.2933809757232666, + "463": 2.2964305877685547, + "464": 2.2942891120910645, + "465": 2.2665939331054688, + "466": 2.2507266998291016, + "467": 2.286299228668213, + "468": 2.2815849781036377, + "469": 2.3181087970733643, + "470": 2.3836045265197754, + "471": 2.4310758113861084, + "472": 2.3726487159729004, + "473": 2.4063267707824707, + "474": 2.370751142501831, + "475": 2.3533759117126465, + "476": 2.3754220008850098, + "477": 2.390937089920044, + "478": 2.3499724864959717, + "479": 2.28918194770813, + "480": 2.3082470893859863, + "481": 2.231879711151123, + "482": 2.269561529159546, + "483": 2.2287118434906006, + "484": 2.2464981079101562, + "485": 2.24623441696167 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 3.3064, + 2.813, + 3.11158, + 3.44032, + 3.55001, + 3.71799, + 4.29806, + 4.59673, + 4.82343, + 4.385, + 4.50158, + 4.99753, + 5.55829, + 4.54098, + 1.95765, + 3.62066, + 4.28932, + 4.07811, + 3.10952, + 3.02331, + 3.10377, + 3.49535, + 4.73828, + 5.32977, + 4.48489, + 3.80692, + 2.81794, + 2.73676, + 2.91033, + 3.28493, + 3.22843, + 2.7943, + 3.32943, + 3.98238, + 4.29773, + 4.36262, + 3.41482, + 1.46919, + 1.25024, + 1.14543, + 1.53127, + 2.02081, + 2.08374, + 1.99976, + 1.26497, + 2.07333, + 3.21053, + 3.42545, + 4.23048, + 5.45087, + 5.52804, + 5.76397, + 5.19919, + 4.75778 + ], + "train_epoch_time": 5.055660724639893, + "train_loss": 2.25958987075937, + "train_score": 0.3259067880286377, + "val_loss": 2.3159942429867733, + "val_score": 0.31041905906663014 + }, + { + "epoch": 9, + "grad_norm": 0.907282829284668, + "learning_rate": 0.464, + "model_norm": 87.61580657958984, + "step_logs": { + "grad_norm": { + "486": 0.712562620639801, + "487": 0.7331148386001587, + "488": 0.7125338315963745, + "489": 0.7049823999404907, + "490": 0.7442678809165955, + "491": 0.7896958589553833, + "492": 0.8199194073677063, + "493": 0.9763243198394775, + "494": 0.8745664954185486, + "495": 0.7932464480400085, + "496": 0.8348462581634521, + "497": 0.8294863700866699, + "498": 0.8065939545631409, + "499": 0.7944414019584656, + "500": 0.9245107173919678, + "501": 1.0332345962524414, + "502": 1.1609129905700684, + "503": 1.1859999895095825, + "504": 1.1004586219787598, + "505": 1.0198930501937866, + "506": 0.8427152037620544, + "507": 0.6482487320899963, + "508": 0.5940559506416321, + "509": 0.6011734008789062, + "510": 0.654750406742096, + "511": 0.6831381916999817, + "512": 0.686622142791748, + "513": 0.71522456407547, + "514": 0.7784596681594849, + "515": 0.7902299761772156, + "516": 0.8374746441841125, + "517": 0.8683791756629944, + "518": 0.8480587601661682, + "519": 0.8314867615699768, + "520": 0.7016273736953735, + "521": 0.6524192690849304, + "522": 0.6798352003097534, + "523": 0.72560054063797, + "524": 0.7582122087478638, + "525": 0.7725090980529785, + "526": 0.9910992383956909, + "527": 0.9848704934120178, + "528": 0.7651165723800659, + "529": 0.7244158983230591, + "530": 0.8009435534477234, + "531": 0.8574590086936951, + "532": 0.8259586095809937, + "533": 0.7582616209983826, + "534": 0.7343605756759644, + "535": 0.7997363209724426, + "536": 0.8006985783576965, + "537": 0.7473467588424683, + "538": 0.8718458414077759, + "539": 0.907282829284668 + }, + "loss": { + "486": 2.2771244049072266, + "487": 2.2598960399627686, + "488": 2.2551674842834473, + "489": 2.2272088527679443, + "490": 2.2353591918945312, + "491": 2.2536027431488037, + "492": 2.285149097442627, + "493": 2.2854652404785156, + "494": 2.2932679653167725, + "495": 2.249964714050293, + "496": 2.278869152069092, + "497": 2.2636592388153076, + "498": 2.2679247856140137, + "499": 2.2530012130737305, + "500": 2.2624945640563965, + "501": 2.3036861419677734, + "502": 2.3021528720855713, + "503": 2.3263773918151855, + "504": 2.309734344482422, + "505": 2.3139700889587402, + "506": 2.2929272651672363, + "507": 2.2095446586608887, + "508": 2.235565185546875, + "509": 2.2293193340301514, + "510": 2.205256938934326, + "511": 2.229058027267456, + "512": 2.2393813133239746, + "513": 2.2362594604492188, + "514": 2.2351841926574707, + "515": 2.2357192039489746, + "516": 2.2410929203033447, + "517": 2.2527952194213867, + "518": 2.262061834335327, + "519": 2.2516250610351562, + "520": 2.218244791030884, + "521": 2.2390406131744385, + "522": 2.2223055362701416, + "523": 2.2392568588256836, + "524": 2.246436357498169, + "525": 2.2348380088806152, + "526": 2.2441329956054688, + "527": 2.296755313873291, + "528": 2.2151851654052734, + "529": 2.2095046043395996, + "530": 2.2269046306610107, + "531": 2.2316040992736816, + "532": 2.2452497482299805, + "533": 2.225214958190918, + "534": 2.2368764877319336, + "535": 2.2347657680511475, + "536": 2.2565245628356934, + "537": 2.2124009132385254, + "538": 2.201300621032715, + "539": 2.284648895263672 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 4.48478, + 4.20479, + 4.44189, + 4.4813, + 4.03542, + 3.61375, + 3.39916, + 2.39765, + 2.99826, + 3.57569, + 3.26969, + 3.28997, + 3.48593, + 3.56975, + 2.64706, + 2.15787, + 1.70818, + 1.65391, + 1.90728, + 2.22458, + 3.22871, + 5.25799, + 6.3348, + 6.1684, + 5.14408, + 4.77644, + 4.74999, + 4.37157, + 3.68843, + 3.58022, + 3.19534, + 2.98747, + 3.14523, + 3.25676, + 4.50605, + 5.26027, + 4.80835, + 4.25313, + 3.90762, + 3.74489, + 2.28462, + 2.36786, + 3.78403, + 4.21036, + 3.47135, + 3.03522, + 3.29115, + 3.8702, + 4.14785, + 3.49412, + 3.51967, + 3.96113, + 2.89601, + 2.77545 + ], + "train_epoch_time": 5.056230306625366, + "train_loss": 2.2159057038415284, + "train_score": 0.35210164095271096, + "val_loss": 2.2818633776444655, + "val_score": 0.33475351634720846 + }, + { + "epoch": 10, + "grad_norm": 0.6942535042762756, + "learning_rate": 0.464, + "model_norm": 87.67610931396484, + "step_logs": { + "grad_norm": { + "540": 0.7585775852203369, + "541": 0.5961969494819641, + "542": 0.5493056178092957, + "543": 0.657780110836029, + "544": 0.7750731110572815, + "545": 0.7865927815437317, + "546": 0.7444025874137878, + "547": 0.6932305693626404, + "548": 0.647156834602356, + "549": 0.708666205406189, + "550": 0.7742327451705933, + "551": 0.8068590760231018, + "552": 0.8598763346672058, + "553": 0.8082388043403625, + "554": 0.7034367322921753, + "555": 0.7649155259132385, + "556": 0.8748824000358582, + "557": 0.8523411750793457, + "558": 0.7931309938430786, + "559": 0.8179984092712402, + "560": 0.7930600047111511, + "561": 0.7689656019210815, + "562": 0.7945387959480286, + "563": 0.8083291053771973, + "564": 0.9241283535957336, + "565": 0.9414832592010498, + "566": 0.7653954029083252, + "567": 0.6973149180412292, + "568": 0.6581932306289673, + "569": 0.6959071755409241, + "570": 0.7117392420768738, + "571": 0.7333704233169556, + "572": 0.7796891331672668, + "573": 0.8865215182304382, + "574": 0.9076765775680542, + "575": 0.8328969478607178, + "576": 0.829623818397522, + "577": 0.8538678884506226, + "578": 0.8449520468711853, + "579": 0.7578330039978027, + "580": 0.7410953044891357, + "581": 0.7811304926872253, + "582": 0.8032299280166626, + "583": 0.7698058485984802, + "584": 0.812824010848999, + "585": 0.7754165530204773, + "586": 0.7512738108634949, + "587": 0.7116155624389648, + "588": 0.7505306601524353, + "589": 0.8330626487731934, + "590": 0.8475196957588196, + "591": 0.8164493441581726, + "592": 0.7601915001869202, + "593": 0.6942535042762756 + }, + "loss": { + "540": 2.2231364250183105, + "541": 2.2061266899108887, + "542": 2.1627893447875977, + "543": 2.194805860519409, + "544": 2.194080114364624, + "545": 2.2225894927978516, + "546": 2.2229509353637695, + "547": 2.2039008140563965, + "548": 2.1704890727996826, + "549": 2.1939315795898438, + "550": 2.202841281890869, + "551": 2.201186418533325, + "552": 2.221682548522949, + "553": 2.238283157348633, + "554": 2.2009692192077637, + "555": 2.2087900638580322, + "556": 2.2394473552703857, + "557": 2.219580888748169, + "558": 2.183645009994507, + "559": 2.2109286785125732, + "560": 2.205620765686035, + "561": 2.192681074142456, + "562": 2.22249698638916, + "563": 2.2442140579223633, + "564": 2.2055916786193848, + "565": 2.2416820526123047, + "566": 2.2224881649017334, + "567": 2.184573173522949, + "568": 2.1638331413269043, + "569": 2.1898579597473145, + "570": 2.212275743484497, + "571": 2.184046745300293, + "572": 2.1922593116760254, + "573": 2.1798362731933594, + "574": 2.224644184112549, + "575": 2.2149720191955566, + "576": 2.1777524948120117, + "577": 2.199397087097168, + "578": 2.1964449882507324, + "579": 2.196183681488037, + "580": 2.1861789226531982, + "581": 2.192002296447754, + "582": 2.170109272003174, + "583": 2.167527675628662, + "584": 2.200174331665039, + "585": 2.1844873428344727, + "586": 2.1879754066467285, + "587": 2.2079055309295654, + "588": 2.1804027557373047, + "589": 2.193552017211914, + "590": 2.2093706130981445, + "591": 2.196868896484375, + "592": 2.1808855533599854, + "593": 2.157546281814575 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 3.86337, + 6.20656, + 7.1678, + 5.07265, + 3.65231, + 3.59219, + 4.01157, + 4.58603, + 5.18249, + 4.36857, + 3.67485, + 3.38113, + 3.00476, + 3.42638, + 4.44799, + 3.77509, + 2.92578, + 3.05523, + 3.4713, + 3.30423, + 3.50686, + 3.70819, + 3.52055, + 3.43469, + 2.58262, + 2.529, + 3.79374, + 4.49271, + 4.99479, + 4.52182, + 4.36714, + 4.06083, + 3.60619, + 2.77361, + 2.70021, + 3.1929, + 3.16407, + 3.01663, + 3.07649, + 3.82403, + 3.9805, + 3.59248, + 3.36358, + 3.65765, + 3.33015, + 3.63312, + 3.87655, + 4.36003, + 3.87079, + 3.16077, + 3.07588, + 3.29568, + 3.77387, + 4.47635 + ], + "train_epoch_time": 5.055509328842163, + "train_loss": 2.1632014810953457, + "train_score": 0.3650062770036235, + "val_loss": 2.2497894342402778, + "val_score": 0.3419964485272474 + }, + { + "epoch": 11, + "grad_norm": 0.7919039130210876, + "learning_rate": 0.464, + "model_norm": 87.73299407958984, + "step_logs": { + "grad_norm": { + "594": 0.6798537373542786, + "595": 0.6490585207939148, + "596": 0.5988166928291321, + "597": 0.6148913502693176, + "598": 0.6750938892364502, + "599": 0.8076092004776001, + "600": 0.9198513627052307, + "601": 0.9107973575592041, + "602": 0.8270330429077148, + "603": 0.7594625949859619, + "604": 0.6789271831512451, + "605": 0.7532307505607605, + "606": 0.8689937591552734, + "607": 0.8897367119789124, + "608": 0.85323566198349, + "609": 0.7796239852905273, + "610": 0.7001063227653503, + "611": 0.6852161884307861, + "612": 0.6914094090461731, + "613": 0.712799608707428, + "614": 0.7351668477058411, + "615": 0.738058865070343, + "616": 0.7064772248268127, + "617": 0.6869327425956726, + "618": 0.7838695645332336, + "619": 0.7908899784088135, + "620": 0.7245451807975769, + "621": 0.7042706608772278, + "622": 0.7856767177581787, + "623": 0.924270510673523, + "624": 2.20139217376709, + "625": 1.6957099437713623, + "626": 1.303147792816162, + "627": 1.4234451055526733, + "628": 1.6826438903808594, + "629": 1.663301706314087, + "630": 1.6491668224334717, + "631": 1.7331079244613647, + "632": 1.1122770309448242, + "633": 0.7747372388839722, + "634": 0.7028419971466064, + "635": 0.7543326616287231, + "636": 0.8162450194358826, + "637": 0.8256059288978577, + "638": 0.7799479365348816, + "639": 0.7498819231987, + "640": 0.7176874279975891, + "641": 0.6859791874885559, + "642": 0.7241498231887817, + "643": 0.9374217391014099, + "644": 0.866894543170929, + "645": 0.7311823964118958, + "646": 0.6802024245262146, + "647": 0.7919039130210876 + }, + "loss": { + "594": 2.1565752029418945, + "595": 2.145916700363159, + "596": 2.1633694171905518, + "597": 2.1479344367980957, + "598": 2.16104793548584, + "599": 2.125016212463379, + "600": 2.199812889099121, + "601": 2.2146387100219727, + "602": 2.1745922565460205, + "603": 2.1782429218292236, + "604": 2.15200138092041, + "605": 2.1815381050109863, + "606": 2.196560859680176, + "607": 2.2157418727874756, + "608": 2.1975088119506836, + "609": 2.156369686126709, + "610": 2.1521029472351074, + "611": 2.161513566970825, + "612": 2.141385078430176, + "613": 2.1641273498535156, + "614": 2.153470516204834, + "615": 2.1648004055023193, + "616": 2.195648431777954, + "617": 2.145448684692383, + "618": 2.1772561073303223, + "619": 2.175151824951172, + "620": 2.1576876640319824, + "621": 2.142158031463623, + "622": 2.154916763305664, + "623": 2.163973331451416, + "624": 2.264434337615967, + "625": 2.461543083190918, + "626": 2.3529388904571533, + "627": 2.3282318115234375, + "628": 2.4070634841918945, + "629": 2.4230785369873047, + "630": 2.4986953735351562, + "631": 2.4733142852783203, + "632": 2.3817098140716553, + "633": 2.305549144744873, + "634": 2.2497317790985107, + "635": 2.208911180496216, + "636": 2.233792304992676, + "637": 2.246324062347412, + "638": 2.230048179626465, + "639": 2.192605495452881, + "640": 2.1972944736480713, + "641": 2.171077013015747, + "642": 2.1710762977600098, + "643": 2.2218165397644043, + "644": 2.2336792945861816, + "645": 2.215351104736328, + "646": 2.16933012008667, + "647": 2.202516555786133 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 4.66588, + 5.09384, + 6.03313, + 5.68099, + 4.74172, + 3.25806, + 2.59986, + 2.66968, + 3.1793, + 3.77654, + 4.6687, + 3.84509, + 2.90877, + 2.79896, + 3.01851, + 3.54775, + 4.39071, + 4.60365, + 4.47944, + 4.2594, + 3.98444, + 3.97407, + 4.39913, + 4.54663, + 3.54341, + 3.47742, + 4.11015, + 4.31889, + 3.49094, + 2.53311, + 0.467267, + 0.85606, + 1.38555, + 1.14907, + 0.850165, + 0.875841, + 0.918722, + 0.823433, + 1.92514, + 3.84119, + 4.55423, + 3.88197, + 3.35275, + 3.29554, + 3.66592, + 3.89919, + 4.26597, + 4.61374, + 4.14017, + 2.52836, + 2.97227, + 4.14373, + 4.68867, + 3.51216 + ], + "train_epoch_time": 5.060882806777954, + "train_loss": 2.188546709761216, + "train_score": 0.3502678890737947, + "val_loss": 2.2784656854228778, + "val_score": 0.32748816019495103 + }, + { + "epoch": 12, + "grad_norm": 0.5168001651763916, + "learning_rate": 0.464, + "model_norm": 87.79352569580078, + "step_logs": { + "grad_norm": { + "648": 0.8322305679321289, + "649": 0.86310875415802, + "650": 0.8546245694160461, + "651": 0.8540962338447571, + "652": 0.7400807738304138, + "653": 0.7047733664512634, + "654": 0.713548481464386, + "655": 0.7104220986366272, + "656": 0.6467399597167969, + "657": 0.5776260495185852, + "658": 0.59811931848526, + "659": 0.6109806895256042, + "660": 0.6301087737083435, + "661": 0.6579259634017944, + "662": 0.7061654925346375, + "663": 0.6898855566978455, + "664": 0.6281511187553406, + "665": 0.6035448908805847, + "666": 0.6117790937423706, + "667": 0.621382474899292, + "668": 0.6453878879547119, + "669": 0.7339179515838623, + "670": 0.7700932025909424, + "671": 0.7408218383789062, + "672": 0.6703903675079346, + "673": 0.6067618727684021, + "674": 0.5832111239433289, + "675": 0.5420410633087158, + "676": 0.5233203172683716, + "677": 0.49592217803001404, + "678": 0.5068687200546265, + "679": 0.5144093036651611, + "680": 0.5519264340400696, + "681": 0.514634370803833, + "682": 0.505577564239502, + "683": 0.4756849706172943, + "684": 0.47411251068115234, + "685": 0.5083258152008057, + "686": 0.5305260419845581, + "687": 0.5018282532691956, + "688": 0.503979504108429, + "689": 0.5388111472129822, + "690": 0.5257241725921631, + "691": 0.4740140438079834, + "692": 0.4806220233440399, + "693": 0.47497254610061646, + "694": 0.48448771238327026, + "695": 0.4545193314552307, + "696": 0.4641974866390228, + "697": 0.4876572787761688, + "698": 0.4619799256324768, + "699": 0.4600135087966919, + "700": 0.4902450740337372, + "701": 0.5168001651763916 + }, + "loss": { + "648": 2.1716971397399902, + "649": 2.1991939544677734, + "650": 2.199986696243286, + "651": 2.1747162342071533, + "652": 2.200076103210449, + "653": 2.1516759395599365, + "654": 2.1451101303100586, + "655": 2.1412830352783203, + "656": 2.153409481048584, + "657": 2.1272125244140625, + "658": 2.1281309127807617, + "659": 2.1119394302368164, + "660": 2.110682725906372, + "661": 2.142961025238037, + "662": 2.160482406616211, + "663": 2.146573305130005, + "664": 2.1289889812469482, + "665": 2.1267123222351074, + "666": 2.1252241134643555, + "667": 2.0985217094421387, + "668": 2.1150336265563965, + "669": 2.1408772468566895, + "670": 2.1396474838256836, + "671": 2.1032190322875977, + "672": 2.1189215183258057, + "673": 2.1333508491516113, + "674": 2.106031894683838, + "675": 2.0892810821533203, + "676": 2.1191277503967285, + "677": 2.0875582695007324, + "678": 2.0943353176116943, + "679": 2.116013526916504, + "680": 2.0936737060546875, + "681": 2.073613405227661, + "682": 2.091026782989502, + "683": 2.0991411209106445, + "684": 2.08347749710083, + "685": 2.076913833618164, + "686": 2.072753429412842, + "687": 2.0714285373687744, + "688": 2.0934219360351562, + "689": 2.067140579223633, + "690": 2.0618412494659424, + "691": 2.0936648845672607, + "692": 2.051088333129883, + "693": 2.042275905609131, + "694": 2.0578064918518066, + "695": 2.054750442504883, + "696": 2.0769400596618652, + "697": 2.0494439601898193, + "698": 2.0533528327941895, + "699": 2.0495636463165283, + "700": 2.073385238647461, + "701": 2.06715726852417 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 3.13554, + 2.95211, + 3.0121, + 2.98119, + 4.01679, + 4.33189, + 4.21311, + 4.24269, + 5.14834, + 6.37555, + 5.94871, + 5.65753, + 5.31608, + 4.95063, + 4.33249, + 4.51016, + 5.39567, + 5.83834, + 5.67826, + 5.43495, + 5.0778, + 3.97463, + 3.60791, + 3.83228, + 4.71476, + 5.79463, + 6.19175, + 7.11103, + 7.73788, + 8.48812, + 8.15183, + 7.99652, + 6.873, + 7.82943, + 8.18058, + 9.2769, + 9.26885, + 8.03774, + 7.36435, + 8.22545, + 8.24197, + 7.12028, + 7.46001, + 9.31804, + 8.87926, + 9.05268, + 8.76676, + 9.94614, + 9.6387, + 8.618, + 9.62094, + 9.68546, + 8.62687, + 7.73977 + ], + "train_epoch_time": 5.0557169914245605, + "train_loss": 2.0586307798601804, + "train_score": 0.3924015871250475, + "val_loss": 2.167074854420466, + "val_score": 0.3644383256799455 + }, + { + "epoch": 13, + "grad_norm": 0.34596049785614014, + "learning_rate": 0.3093333333333334, + "model_norm": 87.8283462524414, + "step_logs": { + "grad_norm": { + "702": 0.494188517332077, + "703": 0.429004967212677, + "704": 0.41396981477737427, + "705": 0.4276958107948303, + "706": 0.4263225197792053, + "707": 0.47544771432876587, + "708": 0.4772648811340332, + "709": 0.5201854705810547, + "710": 0.5692038536071777, + "711": 0.5221180319786072, + "712": 0.4851730763912201, + "713": 0.4506009519100189, + "714": 0.40763771533966064, + "715": 0.39443984627723694, + "716": 0.41046595573425293, + "717": 0.43167757987976074, + "718": 0.4171099066734314, + "719": 0.4390905797481537, + "720": 0.42252224683761597, + "721": 0.41707083582878113, + "722": 0.44261491298675537, + "723": 0.4352918267250061, + "724": 0.3870866596698761, + "725": 0.4072112739086151, + "726": 0.4034639000892639, + "727": 0.40515875816345215, + "728": 0.411587655544281, + "729": 0.4055381417274475, + "730": 0.3796173334121704, + "731": 0.3822411894798279, + "732": 0.387616902589798, + "733": 0.3640688359737396, + "734": 0.36904484033584595, + "735": 0.4091506004333496, + "736": 0.40216633677482605, + "737": 0.4079515337944031, + "738": 0.3719809949398041, + "739": 0.35472241044044495, + "740": 0.34964507818222046, + "741": 0.34832754731178284, + "742": 0.3783370852470398, + "743": 0.39985957741737366, + "744": 0.39254435896873474, + "745": 0.34199583530426025, + "746": 0.35096681118011475, + "747": 0.3403915464878082, + "748": 0.3526018559932709, + "749": 0.36242637038230896, + "750": 0.3520948886871338, + "751": 0.34769758582115173, + "752": 0.3255786895751953, + "753": 0.3424532115459442, + "754": 0.3264871835708618, + "755": 0.34596049785614014 + }, + "loss": { + "702": 2.055676221847534, + "703": 2.057755947113037, + "704": 2.055201530456543, + "705": 2.047668933868408, + "706": 2.040125846862793, + "707": 2.0449862480163574, + "708": 2.03873348236084, + "709": 2.0728325843811035, + "710": 2.0671229362487793, + "711": 2.0537514686584473, + "712": 2.031139850616455, + "713": 2.0507335662841797, + "714": 2.0310213565826416, + "715": 2.0710744857788086, + "716": 2.0247068405151367, + "717": 2.010361433029175, + "718": 2.0242011547088623, + "719": 2.0620484352111816, + "720": 2.0039732456207275, + "721": 2.0215840339660645, + "722": 2.049868106842041, + "723": 2.0240185260772705, + "724": 2.042682647705078, + "725": 2.0658316612243652, + "726": 2.031619071960449, + "727": 2.030827760696411, + "728": 2.0327835083007812, + "729": 2.035027265548706, + "730": 2.0169854164123535, + "731": 2.0529890060424805, + "732": 2.016752243041992, + "733": 2.0340709686279297, + "734": 2.0238027572631836, + "735": 2.042764663696289, + "736": 2.017361640930176, + "737": 2.009218215942383, + "738": 2.0296149253845215, + "739": 2.0462749004364014, + "740": 2.014038562774658, + "741": 2.0566771030426025, + "742": 2.0085175037384033, + "743": 2.0115199089050293, + "744": 2.02402925491333, + "745": 2.0086283683776855, + "746": 2.045577049255371, + "747": 2.0326685905456543, + "748": 2.0015974044799805, + "749": 1.9962797164916992, + "750": 2.036181926727295, + "751": 2.010166645050049, + "752": 2.039108991622925, + "753": 2.0363669395446777, + "754": 2.0303382873535156, + "755": 2.0272130966186523 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 8.41723, + 11.1807, + 11.9927, + 11.1941, + 11.2248, + 9.04659, + 8.95038, + 7.66034, + 6.38015, + 7.53374, + 8.62872, + 10.1001, + 12.2227, + 13.3117, + 12.0173, + 10.7884, + 11.6346, + 10.6952, + 11.2252, + 11.6218, + 10.4634, + 10.682, + 13.6328, + 12.4582, + 12.4805, + 12.3715, + 11.9996, + 12.3739, + 13.9962, + 14.0511, + 13.4229, + 15.3461, + 14.8597, + 12.2026, + 12.473, + 12.0729, + 14.668, + 16.2625, + 16.4745, + 16.9508, + 14.0319, + 12.5808, + 13.1353, + 17.1735, + 16.6067, + 17.5432, + 16.0993, + 15.1978, + 16.4247, + 16.6276, + 19.2366, + 17.3642, + 19.0474, + 16.9374 + ], + "train_epoch_time": 5.055843114852905, + "train_loss": 2.0121118987478863, + "train_score": 0.4051818058514698, + "val_loss": 2.13039591446536, + "val_score": 0.3762198620220276 + }, + { + "epoch": 14, + "grad_norm": 0.3083382546901703, + "learning_rate": 0.1546666666666667, + "model_norm": 87.84034729003906, + "step_logs": { + "grad_norm": { + "756": 0.31816309690475464, + "757": 0.34076759219169617, + "758": 0.3757580518722534, + "759": 0.3717805743217468, + "760": 0.3389896750450134, + "761": 0.33788925409317017, + "762": 0.3298880457878113, + "763": 0.3302595317363739, + "764": 0.3691927492618561, + "765": 0.334699809551239, + "766": 0.3390510380268097, + "767": 0.351433664560318, + "768": 0.34450021386146545, + "769": 0.33897507190704346, + "770": 0.3508661091327667, + "771": 0.3051116168498993, + "772": 0.3122202455997467, + "773": 0.3256988227367401, + "774": 0.32951807975769043, + "775": 0.3455788493156433, + "776": 0.3282933533191681, + "777": 0.29794615507125854, + "778": 0.31040993332862854, + "779": 0.3576716184616089, + "780": 0.33595651388168335, + "781": 0.3316764831542969, + "782": 0.3166547417640686, + "783": 0.3212023079395294, + "784": 0.3121882975101471, + "785": 0.32798659801483154, + "786": 0.32423341274261475, + "787": 0.3123420774936676, + "788": 0.32254958152770996, + "789": 0.3154264986515045, + "790": 0.32257264852523804, + "791": 0.32353290915489197, + "792": 0.3024417459964752, + "793": 0.31519627571105957, + "794": 0.31639960408210754, + "795": 0.301001638174057, + "796": 0.3022189736366272, + "797": 0.3100537359714508, + "798": 0.31416457891464233, + "799": 0.3117527961730957, + "800": 0.32066595554351807, + "801": 0.3516268730163574, + "802": 0.2929776608943939, + "803": 0.324480265378952, + "804": 0.3103832006454468, + "805": 0.3073827922344208, + "806": 0.2938455641269684, + "807": 0.32488566637039185, + "808": 0.29607847332954407, + "809": 0.3083382546901703 + }, + "loss": { + "756": 2.0284624099731445, + "757": 2.030078411102295, + "758": 2.0251684188842773, + "759": 2.037804126739502, + "760": 2.0077157020568848, + "761": 2.0122575759887695, + "762": 2.019298553466797, + "763": 2.032672882080078, + "764": 1.9646873474121094, + "765": 1.9999198913574219, + "766": 2.015259027481079, + "767": 2.016988515853882, + "768": 2.0376081466674805, + "769": 1.979867696762085, + "770": 2.0372753143310547, + "771": 1.9841111898422241, + "772": 2.01918625831604, + "773": 1.9982551336288452, + "774": 2.0085694789886475, + "775": 1.9884827136993408, + "776": 2.016232967376709, + "777": 1.9949437379837036, + "778": 2.009265184402466, + "779": 2.021888256072998, + "780": 2.008274555206299, + "781": 1.966428279876709, + "782": 2.0240259170532227, + "783": 1.971564769744873, + "784": 2.019937515258789, + "785": 2.011720895767212, + "786": 2.0009024143218994, + "787": 2.016679286956787, + "788": 2.009650230407715, + "789": 1.95314359664917, + "790": 1.9766675233840942, + "791": 2.023458957672119, + "792": 2.0272693634033203, + "793": 1.9830944538116455, + "794": 2.0164055824279785, + "795": 1.9970431327819824, + "796": 2.0086779594421387, + "797": 1.9559695720672607, + "798": 1.9930493831634521, + "799": 2.00905442237854, + "800": 1.9889888763427734, + "801": 1.9652135372161865, + "802": 2.0151801109313965, + "803": 1.999463438987732, + "804": 2.0275464057922363, + "805": 2.0180020332336426, + "806": 2.0078988075256348, + "807": 2.0101065635681152, + "808": 1.993089199066162, + "809": 1.9798625707626343 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 20.0386, + 17.4822, + 14.3432, + 14.7431, + 17.4715, + 17.6252, + 18.5553, + 18.6362, + 14.4141, + 17.8526, + 17.5308, + 16.3311, + 17.1689, + 17.2306, + 16.5488, + 21.3132, + 20.7135, + 18.8373, + 18.4982, + 16.6505, + 18.7075, + 22.4727, + 20.8529, + 15.8048, + 17.7933, + 17.8751, + 20.1857, + 19.1097, + 20.7255, + 18.7006, + 19.0331, + 20.6717, + 19.3165, + 19.6308, + 18.9967, + 19.3311, + 22.163, + 19.961, + 20.1422, + 22.0419, + 21.9921, + 20.3464, + 20.1931, + 20.6715, + 19.3431, + 15.8945, + 23.4771, + 18.9905, + 21.0462, + 21.3581, + 23.2543, + 19.044, + 22.7359, + 20.8248 + ], + "train_epoch_time": 5.054449558258057, + "train_loss": 1.998216214994105, + "train_score": 0.4091250448777652, + "val_loss": 2.121286135727174, + "val_score": 0.3789555823556591 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:32:26.513204", + "final_model_norm": 87.84034729003906, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:30:41.548803", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 5.765828609466553, + "learning_rate": 1e-10, + "model_norm": 87.36087036132812, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.300286769866943, + "3": 7.301339626312256, + "4": 14.016050338745117, + "5": 5.431191444396973, + "6": 8.543880462646484, + "7": 4.7641143798828125, + "8": 7.423945903778076, + "9": 2.887894868850708, + "10": 5.924127101898193, + "11": 13.67439079284668, + "12": 6.22044563293457, + "13": 11.159448623657227, + "14": 65.04527282714844, + "15": 4.448390007019043, + "16": 16.901765823364258, + "17": 15.437251091003418, + "18": 6.504454135894775, + "19": 7.35742712020874, + "20": 4.80839729309082, + "21": 12.031474113464355, + "22": 10.740063667297363, + "23": 5.095099925994873, + "24": 15.341471672058105, + "25": 8.56396484375, + "26": 3.7578632831573486, + "27": 12.759321212768555, + "28": 14.070226669311523, + "29": 8.846729278564453, + "30": 3.9992878437042236, + "31": 17.102956771850586, + "32": 13.401613235473633, + "33": 3.3323185443878174, + "34": 15.259978294372559, + "35": 4.542700290679932, + "36": 7.9205002784729, + "37": 5.112588405609131, + "38": 11.688754081726074, + "39": 5.768594264984131, + "40": 7.878995418548584, + "41": 6.576426982879639, + "42": 3.915175437927246, + "43": 5.074714660644531, + "44": 9.314199447631836, + "45": 4.686611175537109, + "46": 8.48810863494873, + "47": 13.346796035766602, + "48": 4.095573902130127, + "49": 9.482538223266602, + "50": 4.848084449768066, + "51": 2.400895118713379, + "52": 18.901315689086914, + "53": 5.765828609466553 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.8053386211395264, + "3": 4.116413593292236, + "4": 4.468705654144287, + "5": 4.331547260284424, + "6": 4.026200771331787, + "7": 4.461686134338379, + "8": 4.410714149475098, + "9": 4.235510349273682, + "10": 4.083352088928223, + "11": 5.169145584106445, + "12": 4.551384925842285, + "13": 4.856448173522949, + "14": 5.497134685516357, + "15": 4.817488193511963, + "16": 7.578564643859863, + "17": 5.290328502655029, + "18": 6.949967384338379, + "19": 6.369999885559082, + "20": 4.6053876876831055, + "21": 4.436546325683594, + "22": 6.863824844360352, + "23": 4.218764305114746, + "24": 7.216196060180664, + "25": 6.5491485595703125, + "26": 4.235875129699707, + "27": 6.50718355178833, + "28": 4.777423858642578, + "29": 3.743546485900879, + "30": 4.372169494628906, + "31": 9.838640213012695, + "32": 4.680777549743652, + "33": 3.572072744369507, + "34": 5.9389214515686035, + "35": 4.366957664489746, + "36": 4.583085060119629, + "37": 4.933143615722656, + "38": 5.612424850463867, + "39": 4.482638359069824, + "40": 4.526904106140137, + "41": 4.726700782775879, + "42": 3.943324327468872, + "43": 4.045255661010742, + "44": 4.196793556213379, + "45": 4.060457229614258, + "46": 4.319828033447266, + "47": 4.913107395172119, + "48": 3.64884090423584, + "49": 4.979071140289307, + "50": 3.9165995121002197, + "51": 3.559251070022583, + "52": 9.414752006530762, + "53": 4.585429668426514 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 0.00839976, + 0.00857642, + 0.0958678, + 0.0772172, + 0.0227473, + 0.146843, + 0.055155, + 0.196578, + 0.0800275, + 0.507859, + 0.11635, + 0.0276441, + 0.117625, + 0.0389972, + 0.00129929, + 0.243453, + 0.0265291, + 0.0221995, + 0.164271, + 0.117676, + 0.199189, + 0.0306484, + 0.0595049, + 0.16251, + 0.0306602, + 0.0892966, + 0.299959, + 0.0399703, + 0.0241319, + 0.0478319, + 0.273358, + 0.0336351, + 0.0260618, + 0.321682, + 0.0255035, + 0.211617, + 0.0730555, + 0.188731, + 0.0410785, + 0.134708, + 0.0729222, + 0.109289, + 0.257253, + 0.157081, + 0.0483756, + 0.184866, + 0.0599576, + 0.0275805, + 0.217533, + 0.0553731, + 0.166636, + 0.617465, + 0.0263527, + 0.137929 + ], + "train_epoch_time": 5.057088375091553, + "train_loss": 4.52632402559606, + "train_score": 0.10379192075736211, + "val_loss": 4.493531253937328, + "val_score": 0.10520863247314632 + }, + { + "epoch": 1, + "grad_norm": 1.433429479598999, + "learning_rate": 1.0, + "model_norm": 87.36151123046875, + "step_logs": { + "grad_norm": { + "54": 4.487692832946777, + "55": 12.362515449523926, + "56": 9.290770530700684, + "57": 3.39971661567688, + "58": 3.6275835037231445, + "59": 3.4811999797821045, + "60": 6.620113849639893, + "61": 13.550787925720215, + "62": 8.233078956604004, + "63": 1.8066459894180298, + "64": 19.582988739013672, + "65": 7.002932548522949, + "66": 7.648651123046875, + "67": 3.182955265045166, + "68": 12.178287506103516, + "69": 4.017116069793701, + "70": 6.878478050231934, + "71": 4.456635475158691, + "72": 2.8061683177948, + "73": 8.096805572509766, + "74": 3.9354281425476074, + "75": 6.565037250518799, + "76": 3.2019972801208496, + "77": 5.413841247558594, + "78": 3.711090564727783, + "79": 4.413052558898926, + "80": 3.263498306274414, + "81": 2.8167335987091064, + "82": 12.143589973449707, + "83": 4.7537102699279785, + "84": 4.162781238555908, + "85": 3.7110908031463623, + "86": 3.107283115386963, + "87": 3.189760208129883, + "88": 4.122842788696289, + "89": 6.179720878601074, + "90": 2.3473868370056152, + "91": 9.49435806274414, + "92": 2.004326820373535, + "93": 3.83107328414917, + "94": 4.368336200714111, + "95": 2.172074794769287, + "96": 12.80830192565918, + "97": 7.9598517417907715, + "98": 4.475029945373535, + "99": 5.5807342529296875, + "100": 4.715944290161133, + "101": 1.104247808456421, + "102": 6.83452033996582, + "103": 3.088413715362549, + "104": 3.1336474418640137, + "105": 1.9625157117843628, + "106": 2.579343318939209, + "107": 1.433429479598999 + }, + "loss": { + "54": 4.529766082763672, + "55": 5.664586067199707, + "56": 5.814496040344238, + "57": 3.8436694145202637, + "58": 3.773864269256592, + "59": 3.608013153076172, + "60": 4.405400276184082, + "61": 6.577364921569824, + "62": 4.018462181091309, + "63": 3.2224414348602295, + "64": 5.774176120758057, + "65": 4.372259140014648, + "66": 4.477726936340332, + "67": 3.5280508995056152, + "68": 5.907240390777588, + "69": 3.836770534515381, + "70": 4.493083953857422, + "71": 4.355321884155273, + "72": 3.2985966205596924, + "73": 4.426855564117432, + "74": 3.898265838623047, + "75": 4.32016134262085, + "76": 3.7217884063720703, + "77": 4.30015754699707, + "78": 3.8478171825408936, + "79": 3.876591682434082, + "80": 3.4705288410186768, + "81": 3.785193681716919, + "82": 6.087458610534668, + "83": 4.790887832641602, + "84": 3.5112109184265137, + "85": 3.6132373809814453, + "86": 3.556417465209961, + "87": 3.5093908309936523, + "88": 3.624424457550049, + "89": 4.082795143127441, + "90": 3.3027796745300293, + "91": 5.520716667175293, + "92": 3.3483028411865234, + "93": 3.8985466957092285, + "94": 4.639792442321777, + "95": 3.4257729053497314, + "96": 5.431247711181641, + "97": 4.581187725067139, + "98": 4.119652271270752, + "99": 4.083475112915039, + "100": 3.6307711601257324, + "101": 3.0953259468078613, + "102": 4.768978595733643, + "103": 3.212509870529175, + "104": 3.23966121673584, + "105": 3.5053300857543945, + "106": 3.384728193283081, + "107": 3.250051259994507 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.224921, + 0.0370642, + 0.067361, + 0.332553, + 0.286782, + 0.297721, + 0.10052, + 0.0358198, + 0.0592837, + 0.987277, + 0.0150568, + 0.0891551, + 0.0765399, + 0.348236, + 0.0398302, + 0.237759, + 0.0949642, + 0.219284, + 0.418892, + 0.0675255, + 0.251702, + 0.100236, + 0.363003, + 0.146715, + 0.27939, + 0.199054, + 0.325858, + 0.477086, + 0.0412802, + 0.212007, + 0.202623, + 0.262357, + 0.368342, + 0.344918, + 0.213229, + 0.10691, + 0.599391, + 0.0612441, + 0.833466, + 0.265621, + 0.243146, + 0.726121, + 0.0331067, + 0.072305, + 0.205716, + 0.131113, + 0.163253, + 2.53848, + 0.102096, + 0.336801, + 0.329913, + 0.910128, + 0.508751, + 1.58175 + ], + "train_epoch_time": 5.06270956993103, + "train_loss": 3.823686411596268, + "train_score": 0.12427479381455239, + "val_loss": 3.796891169761544, + "val_score": 0.12547538736589192 + }, + { + "epoch": 2, + "grad_norm": 5.456674575805664, + "learning_rate": 1.0, + "model_norm": 87.40853881835938, + "step_logs": { + "grad_norm": { + "108": 5.709460258483887, + "109": 2.780217409133911, + "110": 1.8220134973526, + "111": 8.052960395812988, + "112": 1.5795549154281616, + "113": 9.591748237609863, + "114": 2.4433400630950928, + "115": 1.5015907287597656, + "116": 5.175107002258301, + "117": 1.8479372262954712, + "118": 9.004857063293457, + "119": 5.777551174163818, + "120": 2.4527392387390137, + "121": 4.525572776794434, + "122": 1.8827574253082275, + "123": 9.842826843261719, + "124": 5.56711483001709, + "125": 2.6885156631469727, + "126": 2.365950107574463, + "127": 15.701992988586426, + "128": 3.8434715270996094, + "129": 9.37134075164795, + "130": 1.5705770254135132, + "131": 7.028792858123779, + "132": 2.9641029834747314, + "133": 6.267132759094238, + "134": 1.6342846155166626, + "135": 3.1726131439208984, + "136": 1.0219895839691162, + "137": 11.144745826721191, + "138": 1.959206223487854, + "139": 1.9480067491531372, + "140": 16.675106048583984, + "141": 6.225305080413818, + "142": 2.398484945297241, + "143": 3.144951820373535, + "144": 2.075913429260254, + "145": 12.180719375610352, + "146": 5.990593433380127, + "147": 3.4510912895202637, + "148": 2.7853033542633057, + "149": 3.629406690597534, + "150": 1.5068943500518799, + "151": 7.937716484069824, + "152": 3.4166417121887207, + "153": 6.068278789520264, + "154": 2.0362634658813477, + "155": 3.8298466205596924, + "156": 3.7409844398498535, + "157": 1.6658438444137573, + "158": 4.7560577392578125, + "159": 1.2474490404129028, + "160": 1.7983179092407227, + "161": 5.456674575805664 + }, + "loss": { + "108": 3.8252687454223633, + "109": 3.269702911376953, + "110": 3.153294563293457, + "111": 4.942331314086914, + "112": 3.3671398162841797, + "113": 5.608525276184082, + "114": 3.4998526573181152, + "115": 3.3788342475891113, + "116": 4.009459495544434, + "117": 3.241333484649658, + "118": 5.1927056312561035, + "119": 4.227705001831055, + "120": 3.502326726913452, + "121": 4.063412666320801, + "122": 3.116813898086548, + "123": 5.995430946350098, + "124": 3.944603204727173, + "125": 3.3716001510620117, + "126": 3.5189130306243896, + "127": 6.205752372741699, + "128": 3.3606443405151367, + "129": 4.448931694030762, + "130": 2.9477643966674805, + "131": 4.288956165313721, + "132": 3.2721753120422363, + "133": 4.976713180541992, + "134": 2.9573683738708496, + "135": 3.64422345161438, + "136": 3.0619139671325684, + "137": 5.0049004554748535, + "138": 3.1333329677581787, + "139": 3.0090208053588867, + "140": 9.09743881225586, + "141": 5.671896934509277, + "142": 3.4275870323181152, + "143": 3.2397115230560303, + "144": 3.2290961742401123, + "145": 5.568178176879883, + "146": 3.4855480194091797, + "147": 3.347588539123535, + "148": 3.7354249954223633, + "149": 3.313076972961426, + "150": 2.966855764389038, + "151": 4.197963237762451, + "152": 3.1319057941436768, + "153": 4.345335960388184, + "154": 3.05686092376709, + "155": 3.5193305015563965, + "156": 3.561901569366455, + "157": 2.991948366165161, + "158": 4.493997573852539, + "159": 3.201479434967041, + "160": 3.1088132858276367, + "161": 4.162567138671875 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 0.117347, + 0.42301, + 0.949864, + 0.0762115, + 1.34956, + 0.0609611, + 0.586249, + 1.49852, + 0.149709, + 0.949182, + 0.0640383, + 0.126653, + 0.582175, + 0.198401, + 0.87927, + 0.0618843, + 0.127275, + 0.466456, + 0.628633, + 0.0251701, + 0.227497, + 0.0506585, + 1.19502, + 0.0868141, + 0.372435, + 0.126708, + 1.10726, + 0.362052, + 2.93157, + 0.0402954, + 0.816293, + 0.792947, + 0.0327176, + 0.146355, + 0.595819, + 0.327551, + 0.749312, + 0.037529, + 0.0971251, + 0.281073, + 0.481499, + 0.251513, + 1.30656, + 0.0666266, + 0.268293, + 0.118003, + 0.737238, + 0.239937, + 0.254513, + 1.07817, + 0.198673, + 2.05734, + 0.961306, + 0.139799 + ], + "train_epoch_time": 5.056260108947754, + "train_loss": 3.2343624127304538, + "train_score": 0.1522114866860799, + "val_loss": 3.25644829845319, + "val_score": 0.15120909858311085 + }, + { + "epoch": 3, + "grad_norm": 15.653616905212402, + "learning_rate": 1.0, + "model_norm": 87.44014739990234, + "step_logs": { + "grad_norm": { + "162": 1.7654263973236084, + "163": 3.2836763858795166, + "164": 4.768516540527344, + "165": 1.2640773057937622, + "166": 3.1585166454315186, + "167": 4.194464206695557, + "168": 1.0103245973587036, + "169": 3.5152182579040527, + "170": 1.4377050399780273, + "171": 10.770119667053223, + "172": 2.7500264644622803, + "173": 3.5346522331237793, + "174": 2.2370402812957764, + "175": 10.087536811828613, + "176": 2.886577606201172, + "177": 2.4982433319091797, + "178": 1.0791091918945312, + "179": 2.3742637634277344, + "180": 4.836894989013672, + "181": 1.3748149871826172, + "182": 1.0512306690216064, + "183": 1.7874820232391357, + "184": 12.56811237335205, + "185": 2.213726282119751, + "186": 6.64543342590332, + "187": 2.425590991973877, + "188": 2.9407246112823486, + "189": 12.566993713378906, + "190": 2.8077874183654785, + "191": 1.7313799858093262, + "192": 1.6948881149291992, + "193": 8.366637229919434, + "194": 3.905769109725952, + "195": 2.00282621383667, + "196": 1.3125073909759521, + "197": 11.505144119262695, + "198": 1.131795048713684, + "199": 1.2343827486038208, + "200": 1.2906420230865479, + "201": 1.2155762910842896, + "202": 0.8945650458335876, + "203": 0.6896288990974426, + "204": 2.5183982849121094, + "205": 5.40286922454834, + "206": 1.7006113529205322, + "207": 3.429755926132202, + "208": 16.784442901611328, + "209": 3.5395991802215576, + "210": 3.0890109539031982, + "211": 1.4761098623275757, + "212": 10.205368041992188, + "213": 0.6079724431037903, + "214": 2.004495620727539, + "215": 15.653616905212402 + }, + "loss": { + "162": 3.2351136207580566, + "163": 3.216046094894409, + "164": 4.396238327026367, + "165": 2.87646746635437, + "166": 3.5071041584014893, + "167": 3.5331854820251465, + "168": 2.9195051193237305, + "169": 3.314512252807617, + "170": 2.895765781402588, + "171": 5.235464096069336, + "172": 2.8600525856018066, + "173": 3.5523056983947754, + "174": 3.1644835472106934, + "175": 5.578177452087402, + "176": 2.9697630405426025, + "177": 3.391751766204834, + "178": 2.770040988922119, + "179": 3.3368654251098633, + "180": 3.568667411804199, + "181": 3.020956039428711, + "182": 2.926006317138672, + "183": 2.8224878311157227, + "184": 6.070935249328613, + "185": 2.8124871253967285, + "186": 4.371795654296875, + "187": 3.254304885864258, + "188": 3.483398199081421, + "189": 7.132011413574219, + "190": 3.204470634460449, + "191": 3.081040143966675, + "192": 3.048574686050415, + "193": 5.798521041870117, + "194": 4.38168478012085, + "195": 3.0108323097229004, + "196": 2.982576370239258, + "197": 5.273983955383301, + "198": 2.873634099960327, + "199": 3.1454124450683594, + "200": 2.952078342437744, + "201": 2.826287269592285, + "202": 2.982473373413086, + "203": 2.719486713409424, + "204": 3.003004789352417, + "205": 3.5425233840942383, + "206": 2.876796245574951, + "207": 3.1663661003112793, + "208": 8.677878379821777, + "209": 3.081639289855957, + "210": 3.453032970428467, + "211": 2.9425806999206543, + "212": 5.308302879333496, + "213": 2.687962532043457, + "214": 2.797548294067383, + "215": 9.18140983581543 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 1.03798, + 0.298265, + 0.193337, + 1.80016, + 0.351546, + 0.200823, + 2.86014, + 0.268235, + 1.40095, + 0.0451351, + 0.378181, + 0.284326, + 0.632347, + 0.0548179, + 0.356414, + 0.543444, + 2.37879, + 0.591944, + 0.152536, + 1.59829, + 2.64776, + 0.883382, + 0.038434, + 0.573909, + 0.098995, + 0.553125, + 0.402805, + 0.0451595, + 0.406469, + 1.02781, + 1.06124, + 0.0828353, + 0.287229, + 0.750585, + 1.73136, + 0.0398432, + 2.24334, + 2.06432, + 1.77221, + 1.91272, + 3.72694, + 5.71816, + 0.473486, + 0.121357, + 0.994716, + 0.269175, + 0.0308035, + 0.245965, + 0.361878, + 1.35049, + 0.0509681, + 7.27203, + 0.696253, + 0.0374697 + ], + "train_epoch_time": 5.055885076522827, + "train_loss": 4.309854684498594, + "train_score": 0.19230182922553468, + "val_loss": 4.315080170790172, + "val_score": 0.18784084389185113 + }, + { + "epoch": 4, + "grad_norm": 0.5307763814926147, + "learning_rate": 1.0, + "model_norm": 87.4842529296875, + "step_logs": { + "grad_norm": { + "216": 7.264382839202881, + "217": 3.016519546508789, + "218": 2.009855031967163, + "219": 1.2515026330947876, + "220": 5.147071361541748, + "221": 1.1269272565841675, + "222": 2.9056191444396973, + "223": 1.3414016962051392, + "224": 6.588144302368164, + "225": 1.778350591659546, + "226": 4.2911272048950195, + "227": 7.035225868225098, + "228": 0.7696641087532043, + "229": 1.9932821989059448, + "230": 4.497763633728027, + "231": 1.2307993173599243, + "232": 1.7367925643920898, + "233": 1.1126596927642822, + "234": 1.053977370262146, + "235": 1.603644847869873, + "236": 1.641119122505188, + "237": 14.096592903137207, + "238": 1.7305207252502441, + "239": 8.768121719360352, + "240": 2.761319160461426, + "241": 2.863421678543091, + "242": 1.2003724575042725, + "243": 1.185773491859436, + "244": 2.565387725830078, + "245": 0.809199869632721, + "246": 0.7568778991699219, + "247": 1.031437635421753, + "248": 0.7673467397689819, + "249": 0.718783438205719, + "250": 0.8749423027038574, + "251": 0.8532248139381409, + "252": 0.6159232258796692, + "253": 0.6433970928192139, + "254": 0.7505868077278137, + "255": 0.8013109564781189, + "256": 0.6608864068984985, + "257": 0.6655183434486389, + "258": 0.7530298829078674, + "259": 0.8087952733039856, + "260": 0.7129753828048706, + "261": 0.5630432367324829, + "262": 0.3851890563964844, + "263": 0.43649014830589294, + "264": 0.5901908874511719, + "265": 0.6514227986335754, + "266": 0.8378380537033081, + "267": 0.8493033051490784, + "268": 0.6025286316871643, + "269": 0.5307763814926147 + }, + "loss": { + "216": 4.313517093658447, + "217": 2.972172498703003, + "218": 3.3032941818237305, + "219": 2.791508197784424, + "220": 4.311266899108887, + "221": 2.7992138862609863, + "222": 3.243692398071289, + "223": 2.982128620147705, + "224": 3.80674409866333, + "225": 2.820188283920288, + "226": 3.479891777038574, + "227": 3.8139331340789795, + "228": 2.721371650695801, + "229": 2.8763344287872314, + "230": 3.9594669342041016, + "231": 3.052639961242676, + "232": 3.0569255352020264, + "233": 3.190089225769043, + "234": 2.9966259002685547, + "235": 2.92966890335083, + "236": 3.07914400100708, + "237": 6.789307594299316, + "238": 2.9938905239105225, + "239": 5.213659286499023, + "240": 3.7490806579589844, + "241": 3.0940699577331543, + "242": 3.018930673599243, + "243": 2.842966318130493, + "244": 3.148573160171509, + "245": 2.912014961242676, + "246": 2.6648151874542236, + "247": 2.790278911590576, + "248": 2.7896454334259033, + "249": 2.690268039703369, + "250": 2.6885101795196533, + "251": 2.815004587173462, + "252": 2.627066135406494, + "253": 2.6681008338928223, + "254": 2.6348934173583984, + "255": 2.7513134479522705, + "256": 2.6638846397399902, + "257": 2.670868396759033, + "258": 2.6168901920318604, + "259": 2.721342086791992, + "260": 2.657644271850586, + "261": 2.6717302799224854, + "262": 2.572261333465576, + "263": 2.5654335021972656, + "264": 2.5559775829315186, + "265": 2.64631724357605, + "266": 2.626314640045166, + "267": 2.7487263679504395, + "268": 2.627315044403076, + "269": 2.599977970123291 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 0.0817399, + 0.326634, + 0.817745, + 1.78228, + 0.162736, + 2.20417, + 0.384204, + 1.65733, + 0.0877056, + 0.89175, + 0.188983, + 0.0770579, + 4.59394, + 0.723939, + 0.195724, + 2.01512, + 1.01342, + 2.57678, + 2.69755, + 1.13921, + 1.14327, + 0.0341662, + 0.999729, + 0.0678156, + 0.49169, + 0.377363, + 2.09518, + 2.02194, + 0.478418, + 4.44715, + 4.65174, + 2.62278, + 4.73768, + 5.20714, + 3.51199, + 3.8668, + 6.92497, + 6.44531, + 4.67693, + 4.28487, + 6.09904, + 6.03021, + 4.61489, + 4.16012, + 5.22815, + 8.4277, + 17.3367, + 13.4652, + 7.3379, + 6.23614, + 3.74133, + 3.81071, + 7.23697, + 9.22883 + ], + "train_epoch_time": 5.055017471313477, + "train_loss": 2.57431765382567, + "train_score": 0.2514156653130243, + "val_loss": 2.6006610806856036, + "val_score": 0.2447393443328505 + }, + { + "epoch": 5, + "grad_norm": 0.6712852716445923, + "learning_rate": 1.0, + "model_norm": 87.5792007446289, + "step_logs": { + "grad_norm": { + "270": 0.46761178970336914, + "271": 0.5040448307991028, + "272": 0.5838162899017334, + "273": 0.6276520490646362, + "274": 0.7859411835670471, + "275": 0.7103044986724854, + "276": 0.45779117941856384, + "277": 0.4440959692001343, + "278": 0.5940555334091187, + "279": 0.6490227580070496, + "280": 0.7053412199020386, + "281": 0.7040328979492188, + "282": 0.5998298525810242, + "283": 0.594127893447876, + "284": 0.7145086526870728, + "285": 0.6666063070297241, + "286": 0.5562670230865479, + "287": 0.554636538028717, + "288": 0.6019771099090576, + "289": 0.6950008869171143, + "290": 0.8517137169837952, + "291": 0.6693003177642822, + "292": 0.46996134519577026, + "293": 0.49752071499824524, + "294": 0.5653920769691467, + "295": 0.5723724365234375, + "296": 0.6215378046035767, + "297": 0.603495180606842, + "298": 0.5566955804824829, + "299": 0.5698720812797546, + "300": 0.6220775246620178, + "301": 0.6228402256965637, + "302": 0.608216404914856, + "303": 0.5949286222457886, + "304": 0.5897703170776367, + "305": 0.6326521635055542, + "306": 0.6502781510353088, + "307": 0.7034972310066223, + "308": 0.6854209899902344, + "309": 0.659753143787384, + "310": 0.5547028183937073, + "311": 0.5182896256446838, + "312": 0.5836197733879089, + "313": 0.8791462182998657, + "314": 0.6663444638252258, + "315": 0.6161668300628662, + "316": 0.5707175731658936, + "317": 0.6068826913833618, + "318": 0.6142266392707825, + "319": 0.6523970365524292, + "320": 0.6213333010673523, + "321": 0.6482954025268555, + "322": 0.6766254901885986, + "323": 0.6712852716445923 + }, + "loss": { + "270": 2.5571160316467285, + "271": 2.5527584552764893, + "272": 2.607752799987793, + "273": 2.605752944946289, + "274": 2.6232571601867676, + "275": 2.7005960941314697, + "276": 2.5935139656066895, + "277": 2.5643858909606934, + "278": 2.5594587326049805, + "279": 2.659778118133545, + "280": 2.5996789932250977, + "281": 2.6754727363586426, + "282": 2.5792224407196045, + "283": 2.593381404876709, + "284": 2.594367504119873, + "285": 2.665796995162964, + "286": 2.581049919128418, + "287": 2.571040153503418, + "288": 2.5634853839874268, + "289": 2.611403226852417, + "290": 2.6381478309631348, + "291": 2.7140612602233887, + "292": 2.5541610717773438, + "293": 2.572448492050171, + "294": 2.5328023433685303, + "295": 2.6092112064361572, + "296": 2.552314519882202, + "297": 2.6138644218444824, + "298": 2.54337739944458, + "299": 2.566132068634033, + "300": 2.5600030422210693, + "301": 2.6317367553710938, + "302": 2.566258192062378, + "303": 2.591238260269165, + "304": 2.559795379638672, + "305": 2.5886669158935547, + "306": 2.5549769401550293, + "307": 2.6214754581451416, + "308": 2.5994701385498047, + "309": 2.586261749267578, + "310": 2.5598111152648926, + "311": 2.5569286346435547, + "312": 2.5062479972839355, + "313": 2.570241928100586, + "314": 2.596400022506714, + "315": 2.593870162963867, + "316": 2.5686187744140625, + "317": 2.5699620246887207, + "318": 2.5624499320983887, + "319": 2.6041147708892822, + "320": 2.5424764156341553, + "321": 2.5638246536254883, + "322": 2.563735008239746, + "323": 2.561342239379883 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 11.6944, + 10.0478, + 7.65093, + 6.61447, + 4.24679, + 5.35267, + 12.3752, + 13.0026, + 7.25261, + 6.3143, + 5.22542, + 5.39777, + 7.16857, + 7.34694, + 5.08179, + 5.99913, + 8.34122, + 8.3578, + 7.07409, + 5.40634, + 3.63674, + 6.05867, + 11.5644, + 10.3926, + 7.92322, + 7.96438, + 6.60692, + 7.17688, + 8.20683, + 7.90177, + 6.61533, + 6.78405, + 6.9372, + 7.32112, + 7.35935, + 6.46765, + 6.04211, + 5.29689, + 5.53312, + 5.94168, + 8.31931, + 9.51861, + 7.35807, + 3.32546, + 5.84755, + 6.83206, + 7.88601, + 6.97778, + 6.79201, + 6.11838, + 6.58579, + 6.10018, + 5.59985, + 5.68399 + ], + "train_epoch_time": 5.055832147598267, + "train_loss": 2.584099040585577, + "train_score": 0.2575681492536619, + "val_loss": 2.619497704861769, + "val_score": 0.24536721476799586 + }, + { + "epoch": 6, + "grad_norm": 0.7342073917388916, + "learning_rate": 1.0, + "model_norm": 87.63153076171875, + "step_logs": { + "grad_norm": { + "324": 1.411607027053833, + "325": 5.936882019042969, + "326": 1.0874830484390259, + "327": 1.1495829820632935, + "328": 2.0652434825897217, + "329": 1.788154125213623, + "330": 2.5353336334228516, + "331": 9.002483367919922, + "332": 1.5860260725021362, + "333": 2.067275047302246, + "334": 2.326392412185669, + "335": 1.2262860536575317, + "336": 1.1714569330215454, + "337": 1.292888879776001, + "338": 1.0985143184661865, + "339": 2.9780819416046143, + "340": 1.0537818670272827, + "341": 0.7638192772865295, + "342": 1.1163246631622314, + "343": 1.7499282360076904, + "344": 0.8857640027999878, + "345": 0.9231863617897034, + "346": 0.7891698479652405, + "347": 0.8837443590164185, + "348": 0.7318992018699646, + "349": 0.5817071795463562, + "350": 0.6186779737472534, + "351": 0.5380474328994751, + "352": 0.4428447186946869, + "353": 0.47682973742485046, + "354": 0.5803439617156982, + "355": 0.5815485715866089, + "356": 0.5424709916114807, + "357": 0.5546571612358093, + "358": 0.5741904973983765, + "359": 0.5957809686660767, + "360": 0.5432384610176086, + "361": 0.5247166156768799, + "362": 0.5479821562767029, + "363": 0.5592751502990723, + "364": 0.624038577079773, + "365": 0.6352159976959229, + "366": 0.5675089955329895, + "367": 0.5796844959259033, + "368": 0.590277910232544, + "369": 0.5761415362358093, + "370": 0.5166272521018982, + "371": 0.5033401250839233, + "372": 0.5725637674331665, + "373": 0.6078308820724487, + "374": 0.4935343265533447, + "375": 0.48055365681648254, + "376": 0.5760100483894348, + "377": 0.7342073917388916 + }, + "loss": { + "324": 2.5620944499969482, + "325": 3.287554979324341, + "326": 2.700340509414673, + "327": 2.6189565658569336, + "328": 2.732553005218506, + "329": 2.6074466705322266, + "330": 3.005770444869995, + "331": 4.07434606552124, + "332": 2.9722516536712646, + "333": 2.9607715606689453, + "334": 3.0647597312927246, + "335": 2.727865219116211, + "336": 2.898146629333496, + "337": 2.8877696990966797, + "338": 2.7290725708007812, + "339": 2.994896650314331, + "340": 2.703648567199707, + "341": 2.6480765342712402, + "342": 2.7425317764282227, + "343": 2.867856502532959, + "344": 2.7015531063079834, + "345": 2.731201171875, + "346": 2.6815595626831055, + "347": 2.6596436500549316, + "348": 2.635072708129883, + "349": 2.5978190898895264, + "350": 2.6004061698913574, + "351": 2.606260299682617, + "352": 2.5429019927978516, + "353": 2.546093463897705, + "354": 2.5586318969726562, + "355": 2.5845251083374023, + "356": 2.5404248237609863, + "357": 2.563230276107788, + "358": 2.566281318664551, + "359": 2.5606184005737305, + "360": 2.521711826324463, + "361": 2.543833017349243, + "362": 2.5183985233306885, + "363": 2.567601442337036, + "364": 2.508497953414917, + "365": 2.5992743968963623, + "366": 2.5552685260772705, + "367": 2.550887107849121, + "368": 2.5539798736572266, + "369": 2.544661283493042, + "370": 2.515361785888672, + "371": 2.52848744392395, + "372": 2.5067756175994873, + "373": 2.5384414196014404, + "374": 2.516066551208496, + "375": 2.4882755279541016, + "376": 2.4791855812072754, + "377": 2.522308349609375 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 1.28578, + 0.0932731, + 2.28336, + 1.98174, + 0.640658, + 0.815465, + 0.467612, + 0.0502728, + 1.18158, + 0.692801, + 0.566279, + 1.81401, + 2.11187, + 1.72759, + 2.26154, + 0.337683, + 2.43472, + 4.53889, + 2.20075, + 0.93652, + 3.44332, + 3.20461, + 4.30573, + 3.40542, + 4.91915, + 7.67715, + 6.79379, + 9.00278, + 12.9666, + 11.1982, + 7.59691, + 7.64203, + 8.63283, + 8.33179, + 7.78381, + 7.21392, + 8.54505, + 9.2393, + 8.38671, + 8.20874, + 6.44156, + 6.44183, + 7.93398, + 7.59116, + 7.33001, + 7.66605, + 9.42423, + 9.98016, + 7.64659, + 6.87071, + 10.3297, + 10.7749, + 7.47221, + 4.67908 + ], + "train_epoch_time": 5.055371284484863, + "train_loss": 2.526646657106353, + "train_score": 0.2635155127927597, + "val_loss": 2.5588604218650493, + "val_score": 0.25524271691023415 + }, + { + "epoch": 7, + "grad_norm": 0.5210717916488647, + "learning_rate": 1.0, + "model_norm": 87.73303985595703, + "step_logs": { + "grad_norm": { + "378": 0.602575421333313, + "379": 0.5277130603790283, + "380": 0.5227085947990417, + "381": 0.71000075340271, + "382": 0.6231046915054321, + "383": 0.6421539783477783, + "384": 0.7804208397865295, + "385": 0.7286295294761658, + "386": 0.6196457147598267, + "387": 0.6605471968650818, + "388": 0.7016088366508484, + "389": 0.6097334623336792, + "390": 0.5113112926483154, + "391": 0.6405589580535889, + "392": 0.6165372729301453, + "393": 0.5911381840705872, + "394": 0.7189100384712219, + "395": 0.7505009770393372, + "396": 0.8339499831199646, + "397": 0.666320264339447, + "398": 0.7846480011940002, + "399": 0.6380552053451538, + "400": 0.5929601192474365, + "401": 0.6609094738960266, + "402": 0.9639849662780762, + "403": 0.6394780278205872, + "404": 0.605449914932251, + "405": 0.5324991345405579, + "406": 0.4752742648124695, + "407": 0.5447858572006226, + "408": 0.8335340619087219, + "409": 0.5592840909957886, + "410": 0.6268500089645386, + "411": 1.0854082107543945, + "412": 0.6443994641304016, + "413": 0.5713462829589844, + "414": 0.5971332788467407, + "415": 0.6650311946868896, + "416": 0.7565819025039673, + "417": 1.3656957149505615, + "418": 0.7001889944076538, + "419": 0.47380998730659485, + "420": 0.38597381114959717, + "421": 0.42630496621131897, + "422": 0.5677585005760193, + "423": 0.5772583484649658, + "424": 0.5121479630470276, + "425": 0.5330544710159302, + "426": 0.5912601351737976, + "427": 0.6170485615730286, + "428": 0.6126735806465149, + "429": 0.7320794463157654, + "430": 0.5360994935035706, + "431": 0.5210717916488647 + }, + "loss": { + "378": 2.5299153327941895, + "379": 2.493353843688965, + "380": 2.4669981002807617, + "381": 2.4794561862945557, + "382": 2.5080678462982178, + "383": 2.521314859390259, + "384": 2.4862966537475586, + "385": 2.526296377182007, + "386": 2.498164176940918, + "387": 2.475986957550049, + "388": 2.5053858757019043, + "389": 2.512781858444214, + "390": 2.433912992477417, + "391": 2.4429068565368652, + "392": 2.4752416610717773, + "393": 2.4649362564086914, + "394": 2.4418139457702637, + "395": 2.5195460319519043, + "396": 2.4962844848632812, + "397": 2.5044150352478027, + "398": 2.4830188751220703, + "399": 2.500295877456665, + "400": 2.445892095565796, + "401": 2.462839126586914, + "402": 2.490461826324463, + "403": 2.5121071338653564, + "404": 2.436614513397217, + "405": 2.4701952934265137, + "406": 2.406588315963745, + "407": 2.4216835498809814, + "408": 2.426663637161255, + "409": 2.5226311683654785, + "410": 2.4596550464630127, + "411": 2.5497500896453857, + "412": 2.552874803543091, + "413": 2.4740447998046875, + "414": 2.4519267082214355, + "415": 2.4554038047790527, + "416": 2.4858202934265137, + "417": 2.555680274963379, + "418": 2.535151958465576, + "419": 2.515368938446045, + "420": 2.437239408493042, + "421": 2.4427995681762695, + "422": 2.4465582370758057, + "423": 2.50795316696167, + "424": 2.450932502746582, + "425": 2.4523844718933105, + "426": 2.4315786361694336, + "427": 2.4662482738494873, + "428": 2.4301352500915527, + "429": 2.450061082839966, + "430": 2.446341037750244, + "431": 2.4056081771850586 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 6.9676, + 8.95341, + 9.02921, + 4.91857, + 6.45977, + 6.11433, + 4.08221, + 4.75851, + 6.5063, + 5.67467, + 5.08961, + 6.75888, + 9.30967, + 5.95372, + 6.51177, + 7.05387, + 4.72458, + 4.47322, + 3.58934, + 5.64079, + 4.03302, + 6.14151, + 6.95643, + 5.63835, + 2.68003, + 6.14309, + 6.64707, + 8.71151, + 10.654, + 8.15954, + 3.49271, + 8.06471, + 6.2596, + 2.16427, + 6.14779, + 7.57894, + 6.87646, + 5.55186, + 4.34268, + 1.37025, + 5.17099, + 11.2045, + 16.36, + 13.4415, + 7.58976, + 7.52626, + 9.34416, + 8.63069, + 6.95554, + 6.47736, + 6.47399, + 4.57152, + 8.51189, + 8.85992 + ], + "train_epoch_time": 5.055718898773193, + "train_loss": 2.400428195663299, + "train_score": 0.30167682931960227, + "val_loss": 2.4404160708154796, + "val_score": 0.2931660803464743 + }, + { + "epoch": 8, + "grad_norm": 0.7102885246276855, + "learning_rate": 1.0, + "model_norm": 87.84078979492188, + "step_logs": { + "grad_norm": { + "432": 0.5291587114334106, + "433": 0.5080680847167969, + "434": 0.5914666056632996, + "435": 0.6211421489715576, + "436": 0.6727566719055176, + "437": 0.5964329838752747, + "438": 0.5353344678878784, + "439": 0.5753464698791504, + "440": 0.6464072465896606, + "441": 0.6724745035171509, + "442": 0.6912034749984741, + "443": 0.647833526134491, + "444": 0.584959864616394, + "445": 0.5919318199157715, + "446": 0.6382439732551575, + "447": 0.6012136340141296, + "448": 0.5454508066177368, + "449": 0.6023106575012207, + "450": 0.6230897903442383, + "451": 0.5920654535293579, + "452": 0.7590808868408203, + "453": 0.6764283180236816, + "454": 0.5318419337272644, + "455": 0.5110647678375244, + "456": 0.6268112063407898, + "457": 0.6288836598396301, + "458": 0.6159000396728516, + "459": 0.6438373923301697, + "460": 0.6103591322898865, + "461": 0.5652045607566833, + "462": 0.5193692445755005, + "463": 0.547308087348938, + "464": 0.5841281414031982, + "465": 0.5704706907272339, + "466": 0.4866780936717987, + "467": 0.5057941675186157, + "468": 0.5549024939537048, + "469": 0.5555694699287415, + "470": 0.5647054314613342, + "471": 0.5819074511528015, + "472": 0.6872416734695435, + "473": 0.5997867584228516, + "474": 0.6576035022735596, + "475": 0.5740867853164673, + "476": 0.5741524696350098, + "477": 0.57319176197052, + "478": 0.6367378234863281, + "479": 0.5653982758522034, + "480": 0.4593934118747711, + "481": 0.48867759108543396, + "482": 0.6057224273681641, + "483": 0.7308072447776794, + "484": 0.7546696066856384, + "485": 0.7102885246276855 + }, + "loss": { + "432": 2.4098615646362305, + "433": 2.399606704711914, + "434": 2.382483959197998, + "435": 2.4312515258789062, + "436": 2.409421920776367, + "437": 2.437469005584717, + "438": 2.389492988586426, + "439": 2.3906571865081787, + "440": 2.413485050201416, + "441": 2.4353957176208496, + "442": 2.39896821975708, + "443": 2.414506673812866, + "444": 2.385709762573242, + "445": 2.4022057056427, + "446": 2.370846748352051, + "447": 2.3770575523376465, + "448": 2.3413352966308594, + "449": 2.3526244163513184, + "450": 2.388529062271118, + "451": 2.365694046020508, + "452": 2.359882354736328, + "453": 2.4510598182678223, + "454": 2.3693623542785645, + "455": 2.33272123336792, + "456": 2.341805934906006, + "457": 2.4156875610351562, + "458": 2.353476047515869, + "459": 2.398212432861328, + "460": 2.3470890522003174, + "461": 2.366288185119629, + "462": 2.3093433380126953, + "463": 2.330211639404297, + "464": 2.325465679168701, + "465": 2.370218276977539, + "466": 2.323535680770874, + "467": 2.317265272140503, + "468": 2.3108184337615967, + "469": 2.328054904937744, + "470": 2.3119654655456543, + "471": 2.324441909790039, + "472": 2.332365036010742, + "473": 2.38411021232605, + "474": 2.2995452880859375, + "475": 2.3521382808685303, + "476": 2.3074820041656494, + "477": 2.3330323696136475, + "478": 2.3407046794891357, + "479": 2.3514766693115234, + "480": 2.3115811347961426, + "481": 2.2945120334625244, + "482": 2.3046698570251465, + "483": 2.334651470184326, + "484": 2.3936595916748047, + "485": 2.395461082458496 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 8.60637, + 9.296, + 6.81035, + 6.30156, + 5.32349, + 6.85198, + 8.33787, + 7.22201, + 5.77607, + 5.3854, + 5.02126, + 5.7531, + 6.97213, + 6.85594, + 5.82009, + 6.57631, + 7.8696, + 6.48502, + 6.15218, + 6.74868, + 4.09557, + 5.35686, + 8.37657, + 8.93122, + 5.96043, + 6.10802, + 6.20425, + 5.78543, + 6.30026, + 7.40724, + 8.56123, + 7.77914, + 6.81544, + 7.28319, + 9.80993, + 9.05791, + 7.50468, + 7.54252, + 7.24999, + 6.86453, + 4.9383, + 6.62724, + 5.31758, + 7.13687, + 6.99977, + 7.10102, + 5.77332, + 7.35583, + 10.9532, + 9.60828, + 6.28147, + 4.37136, + 4.2029, + 4.7481 + ], + "train_epoch_time": 5.055600881576538, + "train_loss": 2.364268625278555, + "train_score": 0.2852156564443662, + "val_loss": 2.4210136120135965, + "val_score": 0.2710022609488425 + }, + { + "epoch": 9, + "grad_norm": 0.6309716701507568, + "learning_rate": 1.0, + "model_norm": 87.9674072265625, + "step_logs": { + "grad_norm": { + "486": 0.6555119156837463, + "487": 0.6065738201141357, + "488": 0.6766089797019958, + "489": 0.6968892216682434, + "490": 0.8175331354141235, + "491": 0.6847348809242249, + "492": 0.6094216108322144, + "493": 0.570482611656189, + "494": 0.6077772378921509, + "495": 0.6636857986450195, + "496": 0.6628976464271545, + "497": 0.6685439348220825, + "498": 0.6621301770210266, + "499": 0.6826894879341125, + "500": 0.7284846305847168, + "501": 0.6596152186393738, + "502": 0.555325984954834, + "503": 0.49520543217658997, + "504": 0.5145333409309387, + "505": 0.6164749264717102, + "506": 0.6721234917640686, + "507": 0.7313554883003235, + "508": 0.7766849398612976, + "509": 0.6086581349372864, + "510": 0.5633295774459839, + "511": 0.6057594418525696, + "512": 0.6070159077644348, + "513": 0.5995933413505554, + "514": 0.5777207612991333, + "515": 0.5417271256446838, + "516": 0.6076132655143738, + "517": 0.6441335678100586, + "518": 0.6251640319824219, + "519": 0.6875194311141968, + "520": 0.6550372838973999, + "521": 0.623849093914032, + "522": 0.5926386713981628, + "523": 0.5614331364631653, + "524": 0.5475859642028809, + "525": 0.5731501579284668, + "526": 0.5876085162162781, + "527": 0.6310720443725586, + "528": 0.5888630151748657, + "529": 0.5407215356826782, + "530": 0.5888542532920837, + "531": 0.6244917511940002, + "532": 0.6489545106887817, + "533": 0.5564259886741638, + "534": 0.48338624835014343, + "535": 0.5624719858169556, + "536": 0.6948518753051758, + "537": 0.8001134991645813, + "538": 0.6677476167678833, + "539": 0.6309716701507568 + }, + "loss": { + "486": 2.3659470081329346, + "487": 2.3216490745544434, + "488": 2.331204891204834, + "489": 2.3603601455688477, + "490": 2.3416693210601807, + "491": 2.3846657276153564, + "492": 2.3112521171569824, + "493": 2.2980527877807617, + "494": 2.3205103874206543, + "495": 2.332919120788574, + "496": 2.3330044746398926, + "497": 2.319004535675049, + "498": 2.35805344581604, + "499": 2.2802436351776123, + "500": 2.3656234741210938, + "501": 2.336944103240967, + "502": 2.3160767555236816, + "503": 2.265007972717285, + "504": 2.291518449783325, + "505": 2.2510130405426025, + "506": 2.3210577964782715, + "507": 2.3007216453552246, + "508": 2.342376232147217, + "509": 2.3094828128814697, + "510": 2.2949156761169434, + "511": 2.2486014366149902, + "512": 2.295055866241455, + "513": 2.3104748725891113, + "514": 2.2957658767700195, + "515": 2.2595434188842773, + "516": 2.256690740585327, + "517": 2.2956085205078125, + "518": 2.2952845096588135, + "519": 2.2891552448272705, + "520": 2.328775405883789, + "521": 2.2614190578460693, + "522": 2.2824063301086426, + "523": 2.276197671890259, + "524": 2.25631046295166, + "525": 2.263183355331421, + "526": 2.276244640350342, + "527": 2.2238001823425293, + "528": 2.2641139030456543, + "529": 2.2153940200805664, + "530": 2.27239727973938, + "531": 2.252530097961426, + "532": 2.274031639099121, + "533": 2.2768046855926514, + "534": 2.203139305114746, + "535": 2.199826955795288, + "536": 2.263258934020996, + "537": 2.2897324562072754, + "538": 2.2780838012695312, + "539": 2.2499399185180664 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 5.5061, + 6.31, + 5.09219, + 4.86016, + 3.5036, + 5.08607, + 6.22317, + 7.06115, + 6.28195, + 5.29633, + 5.30912, + 5.1885, + 5.37857, + 4.89254, + 4.45764, + 5.37115, + 7.51029, + 9.23632, + 8.65558, + 5.92307, + 5.13793, + 4.30137, + 3.883, + 6.23402, + 7.23172, + 6.12791, + 6.22864, + 6.42669, + 6.87847, + 7.69945, + 6.11248, + 5.53281, + 5.87285, + 4.8429, + 5.42745, + 5.81061, + 6.4985, + 7.22128, + 7.52479, + 6.88942, + 6.59239, + 5.5839, + 6.52935, + 7.57711, + 6.55343, + 5.77587, + 5.39968, + 7.35379, + 9.42873, + 6.95323, + 4.68759, + 3.57669, + 5.10911, + 5.65134 + ], + "train_epoch_time": 5.059556007385254, + "train_loss": 2.248896524040737, + "train_score": 0.3355664903583964, + "val_loss": 2.320340652279684, + "val_score": 0.31700721140159666 + }, + { + "epoch": 10, + "grad_norm": 0.6014808416366577, + "learning_rate": 1.0, + "model_norm": 88.1033706665039, + "step_logs": { + "grad_norm": { + "540": 0.556962788105011, + "541": 0.4922509789466858, + "542": 0.46269673109054565, + "543": 0.4437045753002167, + "544": 0.46495282649993896, + "545": 0.5269874930381775, + "546": 0.6012157797813416, + "547": 0.6110914945602417, + "548": 0.5938258767127991, + "549": 0.5913670659065247, + "550": 0.5411767363548279, + "551": 0.48985549807548523, + "552": 0.49795278906822205, + "553": 0.5598718523979187, + "554": 0.7001277804374695, + "555": 0.6461341381072998, + "556": 0.5456030368804932, + "557": 0.5796665549278259, + "558": 0.6592769622802734, + "559": 0.70893394947052, + "560": 0.631971538066864, + "561": 0.5096566677093506, + "562": 0.5030943155288696, + "563": 0.5146854519844055, + "564": 0.5239552855491638, + "565": 0.5368452072143555, + "566": 0.5880934000015259, + "567": 0.6512694954872131, + "568": 0.6467437148094177, + "569": 0.601622998714447, + "570": 0.6149700880050659, + "571": 0.594676673412323, + "572": 0.5724245309829712, + "573": 0.5899494290351868, + "574": 0.599740207195282, + "575": 0.604749858379364, + "576": 0.6063494086265564, + "577": 0.6455590128898621, + "578": 0.7162782549858093, + "579": 0.7747810482978821, + "580": 0.6035782098770142, + "581": 0.6915206909179688, + "582": 0.6963188648223877, + "583": 0.7954487800598145, + "584": 0.6717667579650879, + "585": 0.5934470295906067, + "586": 0.5472848415374756, + "587": 0.6223129034042358, + "588": 0.6549533009529114, + "589": 0.7186363935470581, + "590": 0.715755045413971, + "591": 0.6313241720199585, + "592": 0.5664232969284058, + "593": 0.6014808416366577 + }, + "loss": { + "540": 2.2582836151123047, + "541": 2.225924253463745, + "542": 2.2293286323547363, + "543": 2.2244949340820312, + "544": 2.1676442623138428, + "545": 2.203770160675049, + "546": 2.2265186309814453, + "547": 2.2523059844970703, + "548": 2.2320499420166016, + "549": 2.230039119720459, + "550": 2.237853527069092, + "551": 2.2208521366119385, + "552": 2.1823201179504395, + "553": 2.215790271759033, + "554": 2.227344512939453, + "555": 2.301483631134033, + "556": 2.2014963626861572, + "557": 2.2087697982788086, + "558": 2.1855506896972656, + "559": 2.275979995727539, + "560": 2.204580545425415, + "561": 2.2193448543548584, + "562": 2.1965672969818115, + "563": 2.1859724521636963, + "564": 2.2182860374450684, + "565": 2.1638054847717285, + "566": 2.1911654472351074, + "567": 2.2026207447052, + "568": 2.2449052333831787, + "569": 2.236891269683838, + "570": 2.218660593032837, + "571": 2.238295555114746, + "572": 2.221579074859619, + "573": 2.230358600616455, + "574": 2.1804862022399902, + "575": 2.1922364234924316, + "576": 2.183056354522705, + "577": 2.214650869369507, + "578": 2.2242043018341064, + "579": 2.274226665496826, + "580": 2.2473220825195312, + "581": 2.1946420669555664, + "582": 2.2512881755828857, + "583": 2.2347519397735596, + "584": 2.2603650093078613, + "585": 2.196855068206787, + "586": 2.164666175842285, + "587": 2.149979829788208, + "588": 2.2092437744140625, + "589": 2.21726655960083, + "590": 2.24027681350708, + "591": 2.221879720687866, + "592": 2.1838583946228027, + "593": 2.1815268993377686 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 7.27991, + 9.18623, + 10.4131, + 11.2991, + 10.027, + 7.93534, + 6.15979, + 6.03136, + 6.32974, + 6.37673, + 7.64106, + 9.25515, + 8.8012, + 7.06889, + 4.54394, + 5.51268, + 7.39545, + 6.57346, + 5.02835, + 4.52853, + 5.5199, + 8.54416, + 8.67852, + 8.25203, + 8.08033, + 7.50793, + 6.33552, + 5.193, + 5.36702, + 6.18011, + 5.86655, + 6.3293, + 6.77993, + 6.40833, + 6.06215, + 5.99426, + 5.93771, + 5.31414, + 4.33522, + 3.78857, + 6.16876, + 4.58937, + 4.64317, + 3.53187, + 5.00889, + 6.23789, + 7.2271, + 5.55158, + 5.15019, + 4.29338, + 4.37293, + 5.57463, + 6.80679, + 6.02999 + ], + "train_epoch_time": 5.066505193710327, + "train_loss": 2.1937105183621903, + "train_score": 0.3679463324451036, + "val_loss": 2.2935922800057518, + "val_score": 0.339991748606707 + }, + { + "epoch": 11, + "grad_norm": 0.4858298897743225, + "learning_rate": 1.0, + "model_norm": 88.24640655517578, + "step_logs": { + "grad_norm": { + "594": 0.5973585844039917, + "595": 0.5813920497894287, + "596": 0.5318682789802551, + "597": 0.5529276728630066, + "598": 0.6715949773788452, + "599": 0.7248722910881042, + "600": 0.7247588038444519, + "601": 0.7221391201019287, + "602": 0.6781218647956848, + "603": 0.7130929231643677, + "604": 0.6518683433532715, + "605": 0.6411283612251282, + "606": 0.6091272234916687, + "607": 0.563484251499176, + "608": 0.5655494928359985, + "609": 0.5641517639160156, + "610": 0.5741521716117859, + "611": 0.5244921445846558, + "612": 0.48222866654396057, + "613": 0.551892876625061, + "614": 0.6759764552116394, + "615": 0.7844778299331665, + "616": 0.79837566614151, + "617": 0.7085427045822144, + "618": 0.6418455839157104, + "619": 0.6136366128921509, + "620": 0.5651835203170776, + "621": 0.5280206799507141, + "622": 0.5549091696739197, + "623": 0.5597411394119263, + "624": 0.5573664307594299, + "625": 0.5344337224960327, + "626": 0.5359423160552979, + "627": 0.5972744822502136, + "628": 0.620598554611206, + "629": 0.6067497730255127, + "630": 0.5799751281738281, + "631": 0.575186014175415, + "632": 0.6336397528648376, + "633": 0.6197266578674316, + "634": 0.6179752945899963, + "635": 0.5942938327789307, + "636": 0.5791890025138855, + "637": 0.5822694301605225, + "638": 0.5426585674285889, + "639": 0.5100070238113403, + "640": 0.5412781834602356, + "641": 0.6127952337265015, + "642": 0.6500569581985474, + "643": 0.6201475858688354, + "644": 0.6241342425346375, + "645": 0.6081094741821289, + "646": 0.5275270342826843, + "647": 0.4858298897743225 + }, + "loss": { + "594": 2.1896016597747803, + "595": 2.2009925842285156, + "596": 2.1809935569763184, + "597": 2.1452231407165527, + "598": 2.1617276668548584, + "599": 2.1946449279785156, + "600": 2.18570613861084, + "601": 2.242854118347168, + "602": 2.205009937286377, + "603": 2.2188169956207275, + "604": 2.1857519149780273, + "605": 2.2027411460876465, + "606": 2.1736578941345215, + "607": 2.1626667976379395, + "608": 2.135918140411377, + "609": 2.185819625854492, + "610": 2.157137870788574, + "611": 2.133901596069336, + "612": 2.141611099243164, + "613": 2.1238460540771484, + "614": 2.1684212684631348, + "615": 2.210869312286377, + "616": 2.243278980255127, + "617": 2.2225825786590576, + "618": 2.2220232486724854, + "619": 2.195187568664551, + "620": 2.154479503631592, + "621": 2.15898060798645, + "622": 2.128307819366455, + "623": 2.1356043815612793, + "624": 2.141587972640991, + "625": 2.1088027954101562, + "626": 2.1308999061584473, + "627": 2.163480520248413, + "628": 2.1570582389831543, + "629": 2.133984088897705, + "630": 2.1156363487243652, + "631": 2.1106784343719482, + "632": 2.157172441482544, + "633": 2.1684060096740723, + "634": 2.120621919631958, + "635": 2.1279208660125732, + "636": 2.1433229446411133, + "637": 2.151803493499756, + "638": 2.140707492828369, + "639": 2.0865628719329834, + "640": 2.107567071914673, + "641": 2.122465133666992, + "642": 2.1497349739074707, + "643": 2.1593289375305176, + "644": 2.1215906143188477, + "645": 2.1376967430114746, + "646": 2.1326732635498047, + "647": 2.087991237640381 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 6.13614, + 6.51149, + 7.70986, + 7.01675, + 4.79276, + 4.17678, + 4.16107, + 4.3009, + 4.79507, + 4.36344, + 5.14376, + 5.35887, + 5.85835, + 6.81124, + 6.67795, + 6.86788, + 6.54371, + 7.75705, + 9.20947, + 6.9729, + 4.74548, + 3.59254, + 3.5194, + 4.42717, + 5.39371, + 5.82974, + 6.74471, + 7.74367, + 6.91179, + 6.81626, + 6.89373, + 7.38326, + 7.41869, + 6.06464, + 5.60067, + 5.79658, + 6.28959, + 6.37978, + 5.37279, + 5.64599, + 5.55292, + 6.02494, + 6.38921, + 6.3468, + 7.26949, + 8.02194, + 7.1935, + 5.6521, + 5.08724, + 5.61473, + 5.44635, + 5.78073, + 7.66364, + 8.84627 + ], + "train_epoch_time": 5.060525178909302, + "train_loss": 2.10171169666854, + "train_score": 0.3767216642925695, + "val_loss": 2.2307200163568615, + "val_score": 0.3445931402399674 + }, + { + "epoch": 12, + "grad_norm": 0.5706313848495483, + "learning_rate": 1.0, + "model_norm": 88.36199951171875, + "step_logs": { + "grad_norm": { + "648": 0.5740098357200623, + "649": 0.6466655731201172, + "650": 0.6953812837600708, + "651": 0.6530663371086121, + "652": 0.6339909434318542, + "653": 0.6604085564613342, + "654": 0.6708871126174927, + "655": 0.6575053930282593, + "656": 0.5776991844177246, + "657": 0.5261021256446838, + "658": 0.5378570556640625, + "659": 0.4962868094444275, + "660": 0.498409241437912, + "661": 0.5092989206314087, + "662": 0.5121369361877441, + "663": 0.5202082395553589, + "664": 0.5542728900909424, + "665": 0.506466269493103, + "666": 0.4806251525878906, + "667": 0.4512490928173065, + "668": 0.47063738107681274, + "669": 0.4600653052330017, + "670": 0.4330039322376251, + "671": 0.4742850363254547, + "672": 0.5568130612373352, + "673": 0.6384620666503906, + "674": 0.6680463552474976, + "675": 0.6430903673171997, + "676": 0.6576550006866455, + "677": 0.6591098308563232, + "678": 0.5544489622116089, + "679": 0.4980860948562622, + "680": 0.4885252118110657, + "681": 0.5175431370735168, + "682": 0.579421877861023, + "683": 0.5397631525993347, + "684": 0.4855535924434662, + "685": 0.4905700385570526, + "686": 0.45717519521713257, + "687": 0.4416581392288208, + "688": 0.45960694551467896, + "689": 0.5193267464637756, + "690": 0.5908728241920471, + "691": 0.5973421931266785, + "692": 0.5113508105278015, + "693": 0.4968108534812927, + "694": 0.481393039226532, + "695": 0.448138028383255, + "696": 0.42587414383888245, + "697": 0.4288692772388458, + "698": 0.460195928812027, + "699": 0.5171209573745728, + "700": 0.5499134659767151, + "701": 0.5706313848495483 + }, + "loss": { + "648": 2.0968985557556152, + "649": 2.1383843421936035, + "650": 2.1135270595550537, + "651": 2.143108606338501, + "652": 2.138307571411133, + "653": 2.1188862323760986, + "654": 2.140519618988037, + "655": 2.1357192993164062, + "656": 2.1380577087402344, + "657": 2.0793752670288086, + "658": 2.0889854431152344, + "659": 2.083698034286499, + "660": 2.0854196548461914, + "661": 2.058002471923828, + "662": 2.1096043586730957, + "663": 2.110630750656128, + "664": 2.0826213359832764, + "665": 2.102409839630127, + "666": 2.0383758544921875, + "667": 2.021230697631836, + "668": 2.061807155609131, + "669": 2.068986415863037, + "670": 2.0378987789154053, + "671": 2.0598032474517822, + "672": 2.074517250061035, + "673": 2.0568490028381348, + "674": 2.091447353363037, + "675": 2.0944764614105225, + "676": 2.0467898845672607, + "677": 2.083927631378174, + "678": 2.04795241355896, + "679": 2.0639595985412598, + "680": 2.0506954193115234, + "681": 2.056553602218628, + "682": 2.0537021160125732, + "683": 2.0639634132385254, + "684": 2.0526318550109863, + "685": 2.036045789718628, + "686": 2.060379981994629, + "687": 2.0722928047180176, + "688": 2.0301663875579834, + "689": 2.0304300785064697, + "690": 2.042562961578369, + "691": 2.057929277420044, + "692": 2.016390323638916, + "693": 1.987950086593628, + "694": 2.023526191711426, + "695": 2.035890579223633, + "696": 1.9924757480621338, + "697": 2.03464937210083, + "698": 2.0130343437194824, + "699": 2.040757179260254, + "700": 2.0372366905212402, + "701": 2.0226988792419434 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 6.36413, + 5.11359, + 4.37081, + 5.02492, + 5.31991, + 4.85828, + 4.75576, + 4.94021, + 6.40643, + 7.51264, + 7.22107, + 8.45998, + 8.39501, + 7.93415, + 8.0432, + 7.79934, + 6.77896, + 8.19627, + 8.82412, + 9.9262, + 9.3084, + 9.77504, + 10.8692, + 9.15687, + 6.69111, + 5.04583, + 4.68634, + 5.06444, + 4.73235, + 4.79697, + 6.66188, + 8.31941, + 8.59265, + 7.67798, + 6.11713, + 7.08428, + 8.70636, + 8.46029, + 9.85785, + 10.6238, + 9.61078, + 7.52847, + 5.85043, + 5.76745, + 7.71146, + 8.05422, + 8.73191, + 10.1375, + 10.9858, + 11.0622, + 9.5053, + 7.63145, + 6.73679, + 6.21184 + ], + "train_epoch_time": 5.060104608535767, + "train_loss": 2.0253352661214907, + "train_score": 0.3991896073223016, + "val_loss": 2.165483419164314, + "val_score": 0.36478365415409975 + }, + { + "epoch": 13, + "grad_norm": 0.317649245262146, + "learning_rate": 0.6666666666666667, + "model_norm": 88.42914581298828, + "step_logs": { + "grad_norm": { + "702": 0.5659703612327576, + "703": 0.5189346671104431, + "704": 0.47177356481552124, + "705": 0.417879581451416, + "706": 0.40076515078544617, + "707": 0.4317120611667633, + "708": 0.4694533050060272, + "709": 0.4872726798057556, + "710": 0.48583105206489563, + "711": 0.45588475465774536, + "712": 0.4326063096523285, + "713": 0.4227367639541626, + "714": 0.3997606933116913, + "715": 0.39034223556518555, + "716": 0.3973705470561981, + "717": 0.38979142904281616, + "718": 0.3650878071784973, + "719": 0.3651696443557739, + "720": 0.3692726492881775, + "721": 0.3553318977355957, + "722": 0.40644368529319763, + "723": 0.45729878544807434, + "724": 0.4858042895793915, + "725": 0.4603227376937866, + "726": 0.40636059641838074, + "727": 0.35133248567581177, + "728": 0.3609521985054016, + "729": 0.3591592311859131, + "730": 0.34919023513793945, + "731": 0.3697125017642975, + "732": 0.3958294093608856, + "733": 0.4055362045764923, + "734": 0.39539051055908203, + "735": 0.388135701417923, + "736": 0.3824511468410492, + "737": 0.4091372489929199, + "738": 0.43867823481559753, + "739": 0.4322429895401001, + "740": 0.4089679718017578, + "741": 0.39429357647895813, + "742": 0.384443461894989, + "743": 0.3956994116306305, + "744": 0.38095951080322266, + "745": 0.31946438550949097, + "746": 0.34223631024360657, + "747": 0.3230850100517273, + "748": 0.31484317779541016, + "749": 0.30294936895370483, + "750": 0.3151356279850006, + "751": 0.3215409815311432, + "752": 0.32952404022216797, + "753": 0.29319998621940613, + "754": 0.31509700417518616, + "755": 0.317649245262146 + }, + "loss": { + "702": 2.017223358154297, + "703": 2.0105414390563965, + "704": 1.9751551151275635, + "705": 1.9659652709960938, + "706": 1.9936549663543701, + "707": 1.993722677230835, + "708": 2.018618583679199, + "709": 1.9937139749526978, + "710": 2.004532814025879, + "711": 2.0115342140197754, + "712": 2.001229763031006, + "713": 1.9961116313934326, + "714": 1.973388910293579, + "715": 1.9947682619094849, + "716": 1.9720706939697266, + "717": 1.9973225593566895, + "718": 1.9759191274642944, + "719": 1.9842751026153564, + "720": 1.9865155220031738, + "721": 1.975325584411621, + "722": 1.9642441272735596, + "723": 1.950222373008728, + "724": 1.9741594791412354, + "725": 1.9610042572021484, + "726": 1.9816988706588745, + "727": 1.9468576908111572, + "728": 1.9696624279022217, + "729": 1.975189447402954, + "730": 1.9782061576843262, + "731": 1.9392387866973877, + "732": 1.9939148426055908, + "733": 1.957559585571289, + "734": 1.9628493785858154, + "735": 1.952294111251831, + "736": 1.9606735706329346, + "737": 1.9367647171020508, + "738": 1.9504387378692627, + "739": 1.9885499477386475, + "740": 1.9830780029296875, + "741": 1.9423038959503174, + "742": 1.9734399318695068, + "743": 1.9925496578216553, + "744": 1.9633898735046387, + "745": 1.9748756885528564, + "746": 1.9599859714508057, + "747": 1.9530096054077148, + "748": 1.9434850215911865, + "749": 1.9006482362747192, + "750": 1.944066047668457, + "751": 1.9230057001113892, + "752": 1.912054419517517, + "753": 1.9313993453979492, + "754": 1.915595293045044, + "755": 1.9286563396453857 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 6.29748, + 7.46599, + 8.8743, + 11.2583, + 12.4128, + 10.6974, + 9.15945, + 8.39689, + 8.49264, + 9.67871, + 10.6933, + 11.1698, + 12.3485, + 13.0919, + 12.4891, + 13.1457, + 14.8243, + 14.8803, + 14.5679, + 15.6448, + 11.8904, + 9.32576, + 8.36488, + 9.25452, + 12.0009, + 15.7724, + 15.1179, + 15.3121, + 16.2236, + 14.1874, + 12.726, + 11.903, + 12.5555, + 12.9592, + 13.4046, + 11.5701, + 10.1354, + 10.6434, + 11.8566, + 12.4933, + 13.3524, + 12.7256, + 13.5285, + 19.3506, + 16.734, + 18.7099, + 19.6062, + 20.7091, + 19.5756, + 18.5998, + 17.6086, + 22.467, + 19.2937, + 19.1143 + ], + "train_epoch_time": 5.057008504867554, + "train_loss": 1.932453947806119, + "train_score": 0.4267115764330586, + "val_loss": 2.091862884646305, + "val_score": 0.3893692590954931 + }, + { + "epoch": 14, + "grad_norm": 0.2712746560573578, + "learning_rate": 0.33333333333333337, + "model_norm": 88.4502944946289, + "step_logs": { + "grad_norm": { + "756": 0.3041907250881195, + "757": 0.28176945447921753, + "758": 0.2795979380607605, + "759": 0.30784180760383606, + "760": 0.31386154890060425, + "761": 0.30351075530052185, + "762": 0.29732781648635864, + "763": 0.3302968442440033, + "764": 0.3865208923816681, + "765": 0.3399239480495453, + "766": 0.3118269145488739, + "767": 0.2835691273212433, + "768": 0.29692161083221436, + "769": 0.2873125970363617, + "770": 0.3080272078514099, + "771": 0.304837167263031, + "772": 0.2905215620994568, + "773": 0.2977312207221985, + "774": 0.28075090050697327, + "775": 0.28373321890830994, + "776": 0.29116812348365784, + "777": 0.28700709342956543, + "778": 0.27636873722076416, + "779": 0.294653058052063, + "780": 0.282988578081131, + "781": 0.28469496965408325, + "782": 0.2952168583869934, + "783": 0.3174051344394684, + "784": 0.2970033586025238, + "785": 0.28664711117744446, + "786": 0.2819790542125702, + "787": 0.2900996208190918, + "788": 0.2898542582988739, + "789": 0.28095611929893494, + "790": 0.2739872634410858, + "791": 0.295239120721817, + "792": 0.2741515338420868, + "793": 0.26222220063209534, + "794": 0.28728288412094116, + "795": 0.2734343707561493, + "796": 0.27362772822380066, + "797": 0.2574091851711273, + "798": 0.2741304636001587, + "799": 0.2779829204082489, + "800": 0.26481491327285767, + "801": 0.2693365812301636, + "802": 0.28728002309799194, + "803": 0.2559102475643158, + "804": 0.26661020517349243, + "805": 0.28670406341552734, + "806": 0.2667236030101776, + "807": 0.27433377504348755, + "808": 0.2881845235824585, + "809": 0.2712746560573578 + }, + "loss": { + "756": 1.9308396577835083, + "757": 1.9228020906448364, + "758": 1.9220657348632812, + "759": 1.915724515914917, + "760": 1.9114307165145874, + "761": 1.9362471103668213, + "762": 1.9335390329360962, + "763": 1.9228084087371826, + "764": 1.9295916557312012, + "765": 1.9304677248001099, + "766": 1.9512678384780884, + "767": 1.9297728538513184, + "768": 1.9571216106414795, + "769": 1.8947694301605225, + "770": 1.9244377613067627, + "771": 1.8902053833007812, + "772": 1.9376835823059082, + "773": 1.9160189628601074, + "774": 1.947719931602478, + "775": 1.9249310493469238, + "776": 1.922447681427002, + "777": 1.9170377254486084, + "778": 1.9406498670578003, + "779": 1.930999994277954, + "780": 1.9417288303375244, + "781": 1.908583402633667, + "782": 1.8922275304794312, + "783": 1.9377039670944214, + "784": 1.924414873123169, + "785": 1.9473352432250977, + "786": 1.886911153793335, + "787": 1.934464454650879, + "788": 1.9261902570724487, + "789": 1.9330294132232666, + "790": 1.911017894744873, + "791": 1.9252042770385742, + "792": 1.925364375114441, + "793": 1.9318610429763794, + "794": 1.9344477653503418, + "795": 1.921375036239624, + "796": 1.9071916341781616, + "797": 1.9250377416610718, + "798": 1.924267053604126, + "799": 1.9045524597167969, + "800": 1.9065269231796265, + "801": 1.9118849039077759, + "802": 1.9054911136627197, + "803": 1.8885326385498047, + "804": 1.8974063396453857, + "805": 1.9161100387573242, + "806": 1.9150605201721191, + "807": 1.8886849880218506, + "808": 1.9351091384887695, + "809": 1.9303133487701416 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 20.8667, + 24.2185, + 24.5867, + 20.2152, + 19.4036, + 21.019, + 21.8717, + 17.6249, + 12.9157, + 16.707, + 20.0673, + 23.9987, + 22.199, + 22.9534, + 20.2827, + 20.341, + 22.9576, + 21.6148, + 24.7107, + 23.9108, + 22.676, + 23.2726, + 25.4079, + 22.2413, + 24.2466, + 23.5479, + 21.7116, + 19.2336, + 21.816, + 23.6998, + 23.7311, + 22.9862, + 22.9266, + 24.4885, + 25.4568, + 22.0866, + 25.6172, + 28.0955, + 23.4389, + 25.6984, + 25.4726, + 29.053, + 25.6065, + 24.6466, + 27.1868, + 26.3555, + 23.0885, + 28.8369, + 26.6936, + 23.3106, + 26.919, + 25.0958, + 23.3004, + 26.2307 + ], + "train_epoch_time": 5.059141159057617, + "train_loss": 1.9118006241372874, + "train_score": 0.4325513360832138, + "val_loss": 2.0735781036753607, + "val_score": 0.39506494040877893 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:34:11.713210", + "final_model_norm": 88.4502944946289, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:32:26.669694", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 9.14046573638916, + "learning_rate": 1e-10, + "model_norm": 87.33110809326172, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.540498733520508, + "3": 7.735904216766357, + "4": 10.549470901489258, + "5": 5.151391506195068, + "6": 3.74910306930542, + "7": 5.637049674987793, + "8": 14.351365089416504, + "9": 5.641451835632324, + "10": 14.016672134399414, + "11": 6.264270782470703, + "12": 11.634200096130371, + "13": 8.58130931854248, + "14": 13.615200996398926, + "15": 5.19003438949585, + "16": 12.74057388305664, + "17": 14.252779960632324, + "18": 4.026570796966553, + "19": 21.443078994750977, + "20": 19.168149948120117, + "21": 6.144132614135742, + "22": 18.513757705688477, + "23": 14.245088577270508, + "24": 4.473949909210205, + "25": 8.687564849853516, + "26": 6.300623893737793, + "27": 12.811702728271484, + "28": 10.243854522705078, + "29": 5.129880428314209, + "30": 5.583020210266113, + "31": 7.059929370880127, + "32": 6.936434268951416, + "33": 5.920306205749512, + "34": 6.535542011260986, + "35": 3.8587324619293213, + "36": 17.113021850585938, + "37": 12.387015342712402, + "38": 22.945186614990234, + "39": 3.712006092071533, + "40": 15.06403636932373, + "41": 9.611918449401855, + "42": 3.478496789932251, + "43": 14.003998756408691, + "44": 5.800577163696289, + "45": 11.2997465133667, + "46": 14.906519889831543, + "47": 4.250407695770264, + "48": 16.47998046875, + "49": 8.455068588256836, + "50": 6.812468528747559, + "51": 3.6709096431732178, + "52": 15.428959846496582, + "53": 9.14046573638916 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.8002490997314453, + "3": 4.169719219207764, + "4": 4.191576957702637, + "5": 4.636710166931152, + "6": 3.619516611099243, + "7": 4.080648899078369, + "8": 5.285762786865234, + "9": 4.630393981933594, + "10": 5.848069667816162, + "11": 5.102724552154541, + "12": 4.933624267578125, + "13": 6.367702960968018, + "14": 3.974186420440674, + "15": 5.206032752990723, + "16": 4.3080339431762695, + "17": 4.3890910148620605, + "18": 3.9542670249938965, + "19": 8.378786087036133, + "20": 6.230710029602051, + "21": 4.233414173126221, + "22": 7.307021617889404, + "23": 4.967606544494629, + "24": 3.9721381664276123, + "25": 4.917096138000488, + "26": 5.059874534606934, + "27": 7.570440292358398, + "28": 4.816390514373779, + "29": 4.824719429016113, + "30": 7.568645477294922, + "31": 4.005067348480225, + "32": 5.854215621948242, + "33": 4.211982250213623, + "34": 4.359143257141113, + "35": 5.44965934753418, + "36": 8.726181030273438, + "37": 5.648348808288574, + "38": 5.257631301879883, + "39": 3.895961284637451, + "40": 6.710821151733398, + "41": 5.615940570831299, + "42": 4.509154796600342, + "43": 6.425422191619873, + "44": 4.653565406799316, + "45": 4.685014724731445, + "46": 5.431200981140137, + "47": 4.134403228759766, + "48": 9.482399940490723, + "49": 4.99036979675293, + "50": 4.327738285064697, + "51": 4.721538066864014, + "52": 8.824649810791016, + "53": 5.54055118560791 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 0.00869338, + 0.00874607, + 0.0888363, + 0.0696762, + 0.0376631, + 0.174727, + 0.257511, + 0.128418, + 0.0256638, + 0.145491, + 0.0297662, + 0.130035, + 0.0364496, + 0.086472, + 0.0214388, + 0.193271, + 0.02654, + 0.0216061, + 0.243891, + 0.0182225, + 0.0169581, + 0.112142, + 0.0213182, + 0.0244803, + 0.198446, + 0.0651497, + 0.12746, + 0.0461219, + 0.0458981, + 0.18334, + 0.242817, + 0.0803543, + 0.121674, + 0.120171, + 0.102056, + 0.365999, + 0.0297969, + 0.0368119, + 0.00998635, + 0.282747, + 0.0295728, + 0.0607858, + 0.372659, + 0.032764, + 0.138307, + 0.0366922, + 0.0244424, + 0.22885, + 0.0349144, + 0.0698069, + 0.0932507, + 0.350377, + 0.0370701, + 0.0663157 + ], + "train_epoch_time": 5.061728000640869, + "train_loss": 4.287693012188291, + "train_score": 0.15268673783053285, + "val_loss": 4.306716903068989, + "val_score": 0.15118667517545986 + }, + { + "epoch": 1, + "grad_norm": 7.175811767578125, + "learning_rate": 1.0, + "model_norm": 87.40782165527344, + "step_logs": { + "grad_norm": { + "54": 3.4528262615203857, + "55": 6.201093673706055, + "56": 7.180618762969971, + "57": 1.6493779420852661, + "58": 10.742693901062012, + "59": 8.11362075805664, + "60": 9.391319274902344, + "61": 6.1381988525390625, + "62": 7.680230617523193, + "63": 2.422365188598633, + "64": 9.0484619140625, + "65": 5.7457170486450195, + "66": 11.505653381347656, + "67": 2.1069905757904053, + "68": 20.572898864746094, + "69": 25.69393539428711, + "70": 4.730662822723389, + "71": 8.376721382141113, + "72": 8.69029426574707, + "73": 3.0753259658813477, + "74": 11.962082862854004, + "75": 1.7809722423553467, + "76": 8.898144721984863, + "77": 2.424302816390991, + "78": 6.994433879852295, + "79": 1.733713150024414, + "80": 6.690541744232178, + "81": 7.088050842285156, + "82": 11.619805335998535, + "83": 2.3821053504943848, + "84": 17.625072479248047, + "85": 16.93046760559082, + "86": 4.84429407119751, + "87": 2.1030845642089844, + "88": 8.235093116760254, + "89": 4.266262054443359, + "90": 2.3467159271240234, + "91": 8.875101089477539, + "92": 4.1571855545043945, + "93": 1.907282829284668, + "94": 2.7073915004730225, + "95": 12.969512939453125, + "96": 2.8833096027374268, + "97": 1.6625070571899414, + "98": 10.158586502075195, + "99": 3.3437583446502686, + "100": 3.261718273162842, + "101": 9.503498077392578, + "102": 2.379850149154663, + "103": 6.041413307189941, + "104": 3.3136770725250244, + "105": 1.3911731243133545, + "106": 5.885049343109131, + "107": 7.175811767578125 + }, + "loss": { + "54": 4.309749603271484, + "55": 4.78863525390625, + "56": 4.782060623168945, + "57": 3.8678321838378906, + "58": 4.864871978759766, + "59": 4.850564002990723, + "60": 4.199524402618408, + "61": 4.783591270446777, + "62": 4.48616886138916, + "63": 3.7532734870910645, + "64": 4.647047996520996, + "65": 5.3974385261535645, + "66": 5.241326332092285, + "67": 3.7134509086608887, + "68": 11.796606063842773, + "69": 9.279533386230469, + "70": 4.44937801361084, + "71": 5.696907043457031, + "72": 4.520971298217773, + "73": 4.343443870544434, + "74": 7.670567512512207, + "75": 3.5260934829711914, + "76": 5.6596479415893555, + "77": 4.112434387207031, + "78": 4.969648361206055, + "79": 3.5763425827026367, + "80": 4.389827728271484, + "81": 6.5713348388671875, + "82": 5.505765914916992, + "83": 4.025215148925781, + "84": 11.941483497619629, + "85": 7.970841884613037, + "86": 4.357478141784668, + "87": 3.6590209007263184, + "88": 5.739189147949219, + "89": 4.797023773193359, + "90": 3.9198288917541504, + "91": 5.6885528564453125, + "92": 4.415287017822266, + "93": 3.770115375518799, + "94": 4.294294834136963, + "95": 4.677727699279785, + "96": 4.070676803588867, + "97": 3.5197064876556396, + "98": 6.075753688812256, + "99": 3.819824695587158, + "100": 4.312519073486328, + "101": 6.454775810241699, + "102": 3.901374340057373, + "103": 4.629861831665039, + "104": 4.020097732543945, + "105": 3.722615957260132, + "106": 3.950429916381836, + "107": 5.197689056396484 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.361495, + 0.12453, + 0.0927452, + 1.42176, + 0.0421546, + 0.0736822, + 0.0476153, + 0.126962, + 0.0760549, + 0.639633, + 0.0567581, + 0.163493, + 0.039593, + 0.836474, + 0.0278719, + 0.0140561, + 0.198818, + 0.0811878, + 0.0598636, + 0.459253, + 0.0536061, + 1.11168, + 0.071481, + 0.699722, + 0.101583, + 1.18983, + 0.0980675, + 0.130798, + 0.0407774, + 0.709361, + 0.0384412, + 0.0278078, + 0.185684, + 0.827278, + 0.0846279, + 0.263558, + 0.71178, + 0.0722196, + 0.255482, + 1.03639, + 0.585854, + 0.0278091, + 0.489648, + 1.27344, + 0.0588754, + 0.341644, + 0.405357, + 0.0714684, + 0.68884, + 0.12685, + 0.366114, + 1.92347, + 0.114063, + 0.100941 + ], + "train_epoch_time": 5.054845571517944, + "train_loss": 3.5755339094350806, + "train_score": 0.085475699445353, + "val_loss": 3.594644247322214, + "val_score": 0.08256493986987357 + }, + { + "epoch": 2, + "grad_norm": 7.0098371505737305, + "learning_rate": 1.0, + "model_norm": 87.48278045654297, + "step_logs": { + "grad_norm": { + "108": 1.2204278707504272, + "109": 5.494037628173828, + "110": 4.409607410430908, + "111": 0.7654702067375183, + "112": 1.916879653930664, + "113": 12.691763877868652, + "114": 2.0235939025878906, + "115": 11.483722686767578, + "116": 3.067296266555786, + "117": 3.7349209785461426, + "118": 17.036848068237305, + "119": 7.859272003173828, + "120": 1.3270570039749146, + "121": 6.711935997009277, + "122": 15.696311950683594, + "123": 1.587878942489624, + "124": 17.753625869750977, + "125": 10.150809288024902, + "126": 5.839665412902832, + "127": 8.229072570800781, + "128": 3.3307056427001953, + "129": 7.483287811279297, + "130": 3.436750888824463, + "131": 6.053197383880615, + "132": 2.3088631629943848, + "133": 6.7248616218566895, + "134": 6.891343116760254, + "135": 3.3974430561065674, + "136": 6.117650032043457, + "137": 2.8520541191101074, + "138": 3.0273396968841553, + "139": 11.214179992675781, + "140": 3.588993549346924, + "141": 15.84964370727539, + "142": 2.3070456981658936, + "143": 6.358824729919434, + "144": 2.4433462619781494, + "145": 3.083045244216919, + "146": 12.42273998260498, + "147": 3.2101223468780518, + "148": 1.143220067024231, + "149": 9.141404151916504, + "150": 0.843994140625, + "151": 7.48250150680542, + "152": 1.2340359687805176, + "153": 3.6514508724212646, + "154": 2.6564178466796875, + "155": 6.637094497680664, + "156": 0.8658096194267273, + "157": 2.854630470275879, + "158": 1.4180574417114258, + "159": 13.111741065979004, + "160": 4.767974853515625, + "161": 7.0098371505737305 + }, + "loss": { + "108": 3.5613646507263184, + "109": 5.051475524902344, + "110": 3.8872427940368652, + "111": 3.353987693786621, + "112": 3.6256444454193115, + "113": 5.47585916519165, + "114": 3.7858524322509766, + "115": 6.3127031326293945, + "116": 4.1142168045043945, + "117": 4.043586730957031, + "118": 8.346199035644531, + "119": 4.9846296310424805, + "120": 3.797414779663086, + "121": 5.008763790130615, + "122": 4.252381801605225, + "123": 3.622262716293335, + "124": 8.223060607910156, + "125": 5.423888206481934, + "126": 5.160830020904541, + "127": 5.00778341293335, + "128": 4.168197154998779, + "129": 4.644274711608887, + "130": 4.079484939575195, + "131": 4.279280662536621, + "132": 3.867823600769043, + "133": 4.053799629211426, + "134": 4.461296558380127, + "135": 4.58814811706543, + "136": 4.663360595703125, + "137": 4.162822723388672, + "138": 3.6984639167785645, + "139": 6.408550262451172, + "140": 4.045624732971191, + "141": 9.524209976196289, + "142": 3.49643874168396, + "143": 3.620353937149048, + "144": 3.507394790649414, + "145": 3.743192672729492, + "146": 8.14825439453125, + "147": 3.5900979042053223, + "148": 3.5978665351867676, + "149": 6.260908603668213, + "150": 3.3536481857299805, + "151": 4.870438575744629, + "152": 3.466395139694214, + "153": 3.8796536922454834, + "154": 3.573780059814453, + "155": 4.511680603027344, + "156": 3.4413130283355713, + "157": 3.4824066162109375, + "158": 3.356820583343506, + "159": 8.899965286254883, + "160": 3.874288558959961, + "161": 4.553544998168945 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 2.39107, + 0.167354, + 0.199913, + 5.72407, + 0.986724, + 0.0339945, + 0.924521, + 0.0478685, + 0.437296, + 0.28987, + 0.0287548, + 0.080699, + 2.1563, + 0.111182, + 0.0172598, + 1.43663, + 0.0260891, + 0.0526392, + 0.151336, + 0.0739509, + 0.37573, + 0.0829341, + 0.34539, + 0.116789, + 0.725555, + 0.0896387, + 0.0939406, + 0.397496, + 0.124603, + 0.511767, + 0.403552, + 0.0509594, + 0.31408, + 0.0379132, + 0.656922, + 0.0895359, + 0.587509, + 0.393806, + 0.0527995, + 0.348388, + 2.75287, + 0.0749224, + 4.70802, + 0.086991, + 2.27626, + 0.290979, + 0.506448, + 0.102419, + 4.5907, + 0.427346, + 1.66932, + 0.0517687, + 0.170421, + 0.0926688 + ], + "train_epoch_time": 5.053528785705566, + "train_loss": 3.23747347109287, + "train_score": 0.17411226684941108, + "val_loss": 3.259279743264655, + "val_score": 0.16584744532811793 + }, + { + "epoch": 3, + "grad_norm": 7.853468418121338, + "learning_rate": 1.0, + "model_norm": 87.54737091064453, + "step_logs": { + "grad_norm": { + "162": 0.6172052621841431, + "163": 1.9305968284606934, + "164": 4.235742568969727, + "165": 2.452990770339966, + "166": 5.52947998046875, + "167": 1.1597410440444946, + "168": 9.82834243774414, + "169": 0.4273718297481537, + "170": 1.0338839292526245, + "171": 2.1340601444244385, + "172": 4.069834232330322, + "173": 11.8688383102417, + "174": 3.339928388595581, + "175": 1.7839720249176025, + "176": 3.062002658843994, + "177": 3.803767204284668, + "178": 16.14332389831543, + "179": 1.9862242937088013, + "180": 14.544238090515137, + "181": 1.266656756401062, + "182": 2.9746577739715576, + "183": 6.9587273597717285, + "184": 1.6131359338760376, + "185": 1.3693383932113647, + "186": 4.1614179611206055, + "187": 3.629279851913452, + "188": 9.446269989013672, + "189": 2.237483024597168, + "190": 2.3593883514404297, + "191": 23.537370681762695, + "192": 12.847769737243652, + "193": 4.78884744644165, + "194": 0.6743913888931274, + "195": 3.7378122806549072, + "196": 1.1711188554763794, + "197": 17.905345916748047, + "198": 2.683520555496216, + "199": 6.063100337982178, + "200": 0.49248263239860535, + "201": 5.227361679077148, + "202": 1.9489352703094482, + "203": 7.9462151527404785, + "204": 1.9341949224472046, + "205": 14.63154411315918, + "206": 0.8991637825965881, + "207": 5.2558159828186035, + "208": 8.559276580810547, + "209": 0.727784276008606, + "210": 2.4869818687438965, + "211": 17.560007095336914, + "212": 12.949395179748535, + "213": 4.641385078430176, + "214": 1.5345288515090942, + "215": 7.853468418121338 + }, + "loss": { + "162": 3.2352874279022217, + "163": 3.313533067703247, + "164": 3.998837947845459, + "165": 3.569653034210205, + "166": 3.628875255584717, + "167": 3.510059118270874, + "168": 5.282551288604736, + "169": 3.336914300918579, + "170": 3.3781416416168213, + "171": 4.039737224578857, + "172": 4.64213752746582, + "173": 6.292857646942139, + "174": 3.917433977127075, + "175": 3.502185821533203, + "176": 3.984590768814087, + "177": 4.618399620056152, + "178": 7.850765705108643, + "179": 3.507638692855835, + "180": 6.019065856933594, + "181": 3.395009756088257, + "182": 4.153111457824707, + "183": 4.425516128540039, + "184": 3.4384937286376953, + "185": 3.6666247844696045, + "186": 3.678715705871582, + "187": 4.557547569274902, + "188": 4.31423282623291, + "189": 3.6561923027038574, + "190": 3.309612274169922, + "191": 12.58102035522461, + "192": 8.059301376342773, + "193": 5.145270347595215, + "194": 3.1643738746643066, + "195": 3.66489577293396, + "196": 3.1843864917755127, + "197": 9.492039680480957, + "198": 3.327655792236328, + "199": 4.9989519119262695, + "200": 3.0658178329467773, + "201": 3.711319923400879, + "202": 3.4502482414245605, + "203": 4.297966957092285, + "204": 3.5015501976013184, + "205": 8.624146461486816, + "206": 3.18898868560791, + "207": 3.6747875213623047, + "208": 5.067270278930664, + "209": 3.1893138885498047, + "210": 3.2587647438049316, + "211": 8.330520629882812, + "212": 5.538828372955322, + "213": 3.901963233947754, + "214": 3.286574125289917, + "215": 3.848623275756836 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 8.49285, + 0.889013, + 0.222882, + 0.593245, + 0.118687, + 2.60971, + 0.0546869, + 18.2698, + 3.16034, + 0.887033, + 0.280262, + 0.0446716, + 0.351178, + 1.10043, + 0.424984, + 0.319201, + 0.0301249, + 0.889116, + 0.0284542, + 2.11604, + 0.469353, + 0.0913912, + 1.32138, + 1.95544, + 0.212429, + 0.346011, + 0.0483485, + 0.730314, + 0.594536, + 0.0227091, + 0.048825, + 0.22436, + 6.95767, + 0.262317, + 2.32179, + 0.029607, + 0.462092, + 0.135985, + 12.6405, + 0.13582, + 0.908355, + 0.0680679, + 0.935966, + 0.0402843, + 3.94435, + 0.133031, + 0.0691672, + 6.02132, + 0.526875, + 0.0270161, + 0.0330308, + 0.181129, + 1.3957, + 0.0623997 + ], + "train_epoch_time": 5.054205894470215, + "train_loss": 3.422062549221635, + "train_score": 0.16294050397968704, + "val_loss": 3.4332452540282676, + "val_score": 0.161725925540404 + }, + { + "epoch": 4, + "grad_norm": 3.5653417110443115, + "learning_rate": 1.0, + "model_norm": 87.57437896728516, + "step_logs": { + "grad_norm": { + "216": 2.9874424934387207, + "217": 12.020308494567871, + "218": 3.519164800643921, + "219": 1.7579094171524048, + "220": 4.699622631072998, + "221": 3.8211545944213867, + "222": 7.332284927368164, + "223": 1.3821289539337158, + "224": 13.866406440734863, + "225": 6.391863822937012, + "226": 0.764019250869751, + "227": 1.0673123598098755, + "228": 3.289612293243408, + "229": 2.5527477264404297, + "230": 15.946304321289062, + "231": 1.965239405632019, + "232": 14.026849746704102, + "233": 4.282284736633301, + "234": 13.360882759094238, + "235": 2.3937058448791504, + "236": 12.24718952178955, + "237": 5.173635005950928, + "238": 3.0230350494384766, + "239": 2.0358736515045166, + "240": 3.061434745788574, + "241": 1.8501865863800049, + "242": 7.4878644943237305, + "243": 7.070363998413086, + "244": 4.0442962646484375, + "245": 8.527976989746094, + "246": 1.5660851001739502, + "247": 4.070375442504883, + "248": 2.791182518005371, + "249": 2.0854952335357666, + "250": 5.372145652770996, + "251": 5.553926467895508, + "252": 2.3045144081115723, + "253": 6.567760944366455, + "254": 1.954593539237976, + "255": 19.65473747253418, + "256": 3.2429943084716797, + "257": 12.28372859954834, + "258": 4.283907413482666, + "259": 9.966032981872559, + "260": 0.7446930408477783, + "261": 1.3271123170852661, + "262": 7.779701232910156, + "263": 3.4554829597473145, + "264": 5.531050205230713, + "265": 0.6556902527809143, + "266": 0.7802390456199646, + "267": 1.6575236320495605, + "268": 2.9647600650787354, + "269": 3.5653417110443115 + }, + "loss": { + "216": 3.4151387214660645, + "217": 5.445035457611084, + "218": 4.039470672607422, + "219": 3.2380259037017822, + "220": 4.49238395690918, + "221": 4.033121109008789, + "222": 4.075538635253906, + "223": 3.3841769695281982, + "224": 7.204192161560059, + "225": 3.931335926055908, + "226": 3.2176685333251953, + "227": 3.145339012145996, + "228": 3.6017677783966064, + "229": 3.7597885131835938, + "230": 5.91054630279541, + "231": 3.1509499549865723, + "232": 10.538995742797852, + "233": 3.3791840076446533, + "234": 6.149799346923828, + "235": 3.2908077239990234, + "236": 5.412066459655762, + "237": 4.008817195892334, + "238": 3.629178524017334, + "239": 3.342593193054199, + "240": 4.230134963989258, + "241": 3.3233747482299805, + "242": 4.715473175048828, + "243": 3.988399028778076, + "244": 3.8643691539764404, + "245": 5.3102498054504395, + "246": 3.37919545173645, + "247": 4.587761878967285, + "248": 3.408756732940674, + "249": 3.3481478691101074, + "250": 3.5039312839508057, + "251": 4.441200256347656, + "252": 3.299842357635498, + "253": 4.784389495849609, + "254": 3.215451717376709, + "255": 9.556995391845703, + "256": 3.1535205841064453, + "257": 7.965487003326416, + "258": 4.3026018142700195, + "259": 4.868965148925781, + "260": 3.0050177574157715, + "261": 3.155625820159912, + "262": 3.924818277359009, + "263": 3.555025577545166, + "264": 4.410586357116699, + "265": 2.9707541465759277, + "266": 3.061960220336914, + "267": 3.229783296585083, + "268": 3.5198092460632324, + "269": 4.313325881958008 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 0.382657, + 0.0376851, + 0.326171, + 1.04782, + 0.2034, + 0.276218, + 0.0758065, + 1.77156, + 0.0374677, + 0.0962244, + 5.5123, + 2.76111, + 0.332833, + 0.576963, + 0.0232438, + 0.81585, + 0.0535647, + 0.184273, + 0.0344501, + 0.574329, + 0.036082, + 0.14977, + 0.39712, + 0.806458, + 0.45134, + 0.970841, + 0.0841026, + 0.0797839, + 0.236261, + 0.0730168, + 1.37779, + 0.276906, + 0.437542, + 0.769815, + 0.121412, + 0.143979, + 0.621347, + 0.110915, + 0.841645, + 0.0247393, + 0.299849, + 0.05279, + 0.23445, + 0.0490221, + 5.41867, + 1.79172, + 0.0648476, + 0.297732, + 0.144172, + 6.90986, + 5.02972, + 1.17558, + 0.400442, + 0.33932 + ], + "train_epoch_time": 5.053279638290405, + "train_loss": 4.454620310292182, + "train_score": 0.15660980094539556, + "val_loss": 4.4871344747006825, + "val_score": 0.15454129624927906 + }, + { + "epoch": 5, + "grad_norm": 1.597386360168457, + "learning_rate": 1.0, + "model_norm": 87.60677337646484, + "step_logs": { + "grad_norm": { + "270": 8.417795181274414, + "271": 1.4617302417755127, + "272": 1.4719135761260986, + "273": 15.30471420288086, + "274": 4.0817766189575195, + "275": 3.172008752822876, + "276": 3.6194591522216797, + "277": 8.161170959472656, + "278": 0.9640328884124756, + "279": 1.392067313194275, + "280": 8.410759925842285, + "281": 2.810370683670044, + "282": 12.157135009765625, + "283": 0.6151496171951294, + "284": 7.482866287231445, + "285": 2.229586601257324, + "286": 12.283149719238281, + "287": 9.091983795166016, + "288": 2.0408763885498047, + "289": 20.326557159423828, + "290": 6.192249298095703, + "291": 6.54166841506958, + "292": 2.60191011428833, + "293": 6.936756610870361, + "294": 2.6483194828033447, + "295": 1.08181631565094, + "296": 4.590422630310059, + "297": 1.1043760776519775, + "298": 4.337271690368652, + "299": 2.220872640609741, + "300": 3.5218417644500732, + "301": 5.153324604034424, + "302": 1.065319538116455, + "303": 2.6600210666656494, + "304": 8.69677734375, + "305": 3.3303956985473633, + "306": 9.6586332321167, + "307": 1.1139414310455322, + "308": 7.699582099914551, + "309": 0.9463465809822083, + "310": 2.01292085647583, + "311": 7.939146995544434, + "312": 2.6454808712005615, + "313": 14.22165298461914, + "314": 2.000232458114624, + "315": 7.018811225891113, + "316": 2.581660509109497, + "317": 9.128364562988281, + "318": 2.0029964447021484, + "319": 20.82325553894043, + "320": 17.0695743560791, + "321": 6.073815822601318, + "322": 5.015772342681885, + "323": 1.597386360168457 + }, + "loss": { + "270": 4.441112995147705, + "271": 3.257725238800049, + "272": 3.1244821548461914, + "273": 9.042449951171875, + "274": 4.215341091156006, + "275": 3.6792783737182617, + "276": 4.04018497467041, + "277": 4.592798709869385, + "278": 3.0867416858673096, + "279": 3.073683261871338, + "280": 4.694775581359863, + "281": 3.5920286178588867, + "282": 5.0841474533081055, + "283": 2.957658290863037, + "284": 3.9417872428894043, + "285": 3.2137694358825684, + "286": 5.38819694519043, + "287": 3.749476909637451, + "288": 3.2391600608825684, + "289": 11.951048851013184, + "290": 3.421565055847168, + "291": 4.073790550231934, + "292": 3.080479383468628, + "293": 4.6739654541015625, + "294": 3.5670626163482666, + "295": 2.984158515930176, + "296": 4.135107517242432, + "297": 3.0487303733825684, + "298": 3.6981968879699707, + "299": 3.355250835418701, + "300": 3.2816882133483887, + "301": 4.190012454986572, + "302": 3.058805465698242, + "303": 3.6626811027526855, + "304": 3.765085220336914, + "305": 3.299260377883911, + "306": 4.9024786949157715, + "307": 3.06119704246521, + "308": 4.217517852783203, + "309": 2.9054903984069824, + "310": 3.2250277996063232, + "311": 4.552387237548828, + "312": 3.392378330230713, + "313": 6.536539554595947, + "314": 3.0595862865448, + "315": 5.461810111999512, + "316": 3.5451297760009766, + "317": 4.10023307800293, + "318": 3.104283332824707, + "319": 11.160408020019531, + "320": 6.498387336730957, + "321": 3.823298692703247, + "322": 3.855466842651367, + "323": 3.2338428497314453 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 0.0626751, + 1.52468, + 1.44216, + 0.0386043, + 0.253008, + 0.365674, + 0.3084, + 0.0689561, + 3.32136, + 1.58613, + 0.0663658, + 0.454792, + 0.0343998, + 7.81603, + 0.0703975, + 0.646496, + 0.0357128, + 0.0453579, + 0.777677, + 0.0289253, + 0.0892335, + 0.0951966, + 0.455023, + 0.0971343, + 0.508593, + 2.54985, + 0.196237, + 2.49968, + 0.196588, + 0.680264, + 0.26458, + 0.157776, + 2.69521, + 0.517641, + 0.0497804, + 0.297457, + 0.0525514, + 2.46699, + 0.0711414, + 3.24429, + 0.79594, + 0.0722257, + 0.484725, + 0.0323182, + 0.764719, + 0.110869, + 0.531905, + 0.0492065, + 0.773751, + 0.0257385, + 0.0223028, + 0.103637, + 0.15325, + 1.26736 + ], + "train_epoch_time": 5.054124355316162, + "train_loss": 3.313465996965274, + "train_score": 0.13426963772886624, + "val_loss": 3.340253053384862, + "val_score": 0.13163748585397697 + }, + { + "epoch": 6, + "grad_norm": 1.318442702293396, + "learning_rate": 1.0, + "model_norm": 87.65409088134766, + "step_logs": { + "grad_norm": { + "324": 3.0638318061828613, + "325": 12.547501564025879, + "326": 1.7084776163101196, + "327": 1.8790518045425415, + "328": 19.083650588989258, + "329": 5.657773494720459, + "330": 5.736194610595703, + "331": 2.032508611679077, + "332": 9.56373119354248, + "333": 3.7583446502685547, + "334": 11.385799407958984, + "335": 1.3768935203552246, + "336": 1.9528535604476929, + "337": 6.754176616668701, + "338": 1.1516849994659424, + "339": 2.5240859985351562, + "340": 2.83101224899292, + "341": 4.5815815925598145, + "342": 2.5633962154388428, + "343": 2.7312276363372803, + "344": 9.310015678405762, + "345": 1.2251770496368408, + "346": 14.864018440246582, + "347": 5.785479545593262, + "348": 1.253085970878601, + "349": 0.7698249220848083, + "350": 1.2421190738677979, + "351": 7.74329948425293, + "352": 1.8452045917510986, + "353": 7.990381240844727, + "354": 1.5456805229187012, + "355": 12.205272674560547, + "356": 5.901059627532959, + "357": 5.625377178192139, + "358": 2.415379285812378, + "359": 3.196598768234253, + "360": 3.5479352474212646, + "361": 1.6618934869766235, + "362": 4.429972171783447, + "363": 1.192750096321106, + "364": 1.450696349143982, + "365": 3.5292797088623047, + "366": 5.045935153961182, + "367": 1.5372174978256226, + "368": 3.057121992111206, + "369": 4.3452229499816895, + "370": 1.2983942031860352, + "371": 2.991034507751465, + "372": 2.1848366260528564, + "373": 7.562220096588135, + "374": 1.6354453563690186, + "375": 1.0294127464294434, + "376": 0.9437535405158997, + "377": 1.318442702293396 + }, + "loss": { + "324": 3.3156137466430664, + "325": 6.474240303039551, + "326": 3.3303921222686768, + "327": 3.134063243865967, + "328": 11.810436248779297, + "329": 3.640613317489624, + "330": 4.041092872619629, + "331": 3.205493688583374, + "332": 4.940890789031982, + "333": 3.2587618827819824, + "334": 5.539989471435547, + "335": 3.1151323318481445, + "336": 3.157496452331543, + "337": 4.483071327209473, + "338": 3.2040624618530273, + "339": 3.286912202835083, + "340": 3.587984800338745, + "341": 3.3555707931518555, + "342": 3.631004810333252, + "343": 3.1962809562683105, + "344": 5.258813858032227, + "345": 2.946056604385376, + "346": 7.268603324890137, + "347": 3.9211862087249756, + "348": 2.9738659858703613, + "349": 2.8974318504333496, + "350": 2.9549481868743896, + "351": 4.75447940826416, + "352": 2.9924001693725586, + "353": 4.7252912521362305, + "354": 2.9017269611358643, + "355": 6.49427604675293, + "356": 4.867671012878418, + "357": 4.351002216339111, + "358": 3.3011906147003174, + "359": 3.2043204307556152, + "360": 4.115439414978027, + "361": 2.9570465087890625, + "362": 3.3966400623321533, + "363": 3.3509864807128906, + "364": 3.225102424621582, + "365": 3.329432487487793, + "366": 4.254251480102539, + "367": 3.0636508464813232, + "368": 3.234981060028076, + "369": 3.8633975982666016, + "370": 2.89851713180542, + "371": 3.677140712738037, + "372": 3.105072021484375, + "373": 4.470493316650391, + "374": 3.2047791481018066, + "375": 3.0857763290405273, + "376": 2.8954758644104004, + "377": 2.9634528160095215 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 0.353211, + 0.041122, + 1.14098, + 0.887626, + 0.0324297, + 0.113732, + 0.122815, + 0.775944, + 0.0540195, + 0.230706, + 0.0427348, + 1.64314, + 0.827949, + 0.0982722, + 2.41565, + 0.515917, + 0.447679, + 0.159858, + 0.55258, + 0.428479, + 0.0606718, + 1.96265, + 0.0328987, + 0.117149, + 1.89391, + 4.8891, + 1.91524, + 0.0792959, + 0.878882, + 0.0740105, + 1.21455, + 0.0435949, + 0.139785, + 0.137495, + 0.565848, + 0.313588, + 0.326938, + 1.07066, + 0.17308, + 2.35545, + 1.53246, + 0.267299, + 0.167086, + 1.29649, + 0.346136, + 0.204619, + 1.71934, + 0.411024, + 0.65048, + 0.078173, + 1.19819, + 2.91196, + 3.25089, + 1.70481 + ], + "train_epoch_time": 5.054388761520386, + "train_loss": 7.305970393910128, + "train_score": 0.09277483861107737, + "val_loss": 7.319182059247513, + "val_score": 0.08958811701267924 + }, + { + "epoch": 7, + "grad_norm": 10.765946388244629, + "learning_rate": 1.0, + "model_norm": 87.68486785888672, + "step_logs": { + "grad_norm": { + "378": 16.040483474731445, + "379": 6.54575252532959, + "380": 3.660334825515747, + "381": 1.4622583389282227, + "382": 3.6394519805908203, + "383": 2.0608949661254883, + "384": 2.3969147205352783, + "385": 17.816192626953125, + "386": 4.788029193878174, + "387": 0.9748720526695251, + "388": 1.0093756914138794, + "389": 1.5624254941940308, + "390": 4.910091400146484, + "391": 2.506113290786743, + "392": 3.3157408237457275, + "393": 6.0191802978515625, + "394": 1.1205449104309082, + "395": 1.7486339807510376, + "396": 3.960408926010132, + "397": 7.913658618927002, + "398": 1.0244340896606445, + "399": 14.30500602722168, + "400": 1.8058902025222778, + "401": 2.6652021408081055, + "402": 17.545543670654297, + "403": 4.092648029327393, + "404": 0.9539400339126587, + "405": 0.704377293586731, + "406": 1.2798709869384766, + "407": 1.3182331323623657, + "408": 11.263158798217773, + "409": 1.4286426305770874, + "410": 4.004367351531982, + "411": 1.3061292171478271, + "412": 2.056056261062622, + "413": 0.5090538263320923, + "414": 0.37913328409194946, + "415": 3.504302740097046, + "416": 7.081801891326904, + "417": 0.7794700860977173, + "418": 1.9352850914001465, + "419": 1.1940546035766602, + "420": 3.6309051513671875, + "421": 0.9127716422080994, + "422": 1.2052929401397705, + "423": 1.1224899291992188, + "424": 0.6754429340362549, + "425": 0.7979111671447754, + "426": 2.139202356338501, + "427": 1.3098818063735962, + "428": 13.790217399597168, + "429": 2.17396879196167, + "430": 1.4117320775985718, + "431": 10.765946388244629 + }, + "loss": { + "378": 7.28767728805542, + "379": 4.843731880187988, + "380": 3.365016222000122, + "381": 2.9327170848846436, + "382": 3.264479160308838, + "383": 3.3833961486816406, + "384": 3.165174961090088, + "385": 10.999656677246094, + "386": 3.9248909950256348, + "387": 2.911363124847412, + "388": 2.8497610092163086, + "389": 3.074125289916992, + "390": 3.433293342590332, + "391": 3.32497501373291, + "392": 3.445816993713379, + "393": 4.257643699645996, + "394": 2.925107002258301, + "395": 2.9123294353485107, + "396": 4.722896099090576, + "397": 3.664672374725342, + "398": 2.894896984100342, + "399": 6.906097412109375, + "400": 2.846384286880493, + "401": 3.2363619804382324, + "402": 10.042871475219727, + "403": 3.3986458778381348, + "404": 3.043595314025879, + "405": 2.8186025619506836, + "406": 2.8277218341827393, + "407": 2.88370943069458, + "408": 5.247016429901123, + "409": 2.9663054943084717, + "410": 4.31719446182251, + "411": 2.9837493896484375, + "412": 3.439638137817383, + "413": 2.7930190563201904, + "414": 2.6830191612243652, + "415": 3.115664482116699, + "416": 4.403307914733887, + "417": 2.753737688064575, + "418": 3.036257266998291, + "419": 2.8469786643981934, + "420": 3.9103312492370605, + "421": 2.8575730323791504, + "422": 2.954301118850708, + "423": 2.9263386726379395, + "424": 2.8519067764282227, + "425": 2.7864508628845215, + "426": 2.9868521690368652, + "427": 2.929961919784546, + "428": 7.111339569091797, + "429": 3.026057243347168, + "430": 3.060291290283203, + "431": 5.268321990966797 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 0.028324, + 0.113047, + 0.251157, + 1.37158, + 0.246457, + 0.796601, + 0.550925, + 0.0346537, + 0.171204, + 3.06338, + 2.79707, + 1.25928, + 0.142407, + 0.529404, + 0.313423, + 0.117515, + 2.32961, + 0.952451, + 0.301112, + 0.0585168, + 2.75845, + 0.0337487, + 0.872792, + 0.455614, + 0.032623, + 0.202907, + 3.3446, + 5.68098, + 1.72625, + 1.65946, + 0.0413611, + 1.45334, + 0.269236, + 1.749, + 0.81366, + 10.7782, + 18.6655, + 0.253716, + 0.0877994, + 4.53235, + 0.810678, + 1.99681, + 0.296609, + 3.42983, + 2.03362, + 2.32252, + 6.25112, + 4.37665, + 0.652695, + 1.70765, + 0.0373946, + 0.640281, + 1.53553, + 0.0454536 + ], + "train_epoch_time": 5.054681062698364, + "train_loss": 2.857751656741631, + "train_score": 0.2232077205754079, + "val_loss": 2.8952823470845974, + "val_score": 0.2193509616753264 + }, + { + "epoch": 8, + "grad_norm": 0.39214810729026794, + "learning_rate": 1.0, + "model_norm": 87.74516296386719, + "step_logs": { + "grad_norm": { + "432": 2.168149471282959, + "433": 1.7605563402175903, + "434": 1.1056766510009766, + "435": 5.546317100524902, + "436": 0.8200904726982117, + "437": 2.1366324424743652, + "438": 1.5437718629837036, + "439": 1.4036411046981812, + "440": 2.254242181777954, + "441": 1.900120735168457, + "442": 4.486093997955322, + "443": 1.224945068359375, + "444": 1.8282771110534668, + "445": 1.2649118900299072, + "446": 1.4886144399642944, + "447": 1.285316824913025, + "448": 0.9994505643844604, + "449": 1.1726855039596558, + "450": 1.128950834274292, + "451": 0.6170597672462463, + "452": 0.6761702299118042, + "453": 0.8361582159996033, + "454": 0.567862868309021, + "455": 0.30281925201416016, + "456": 0.29304444789886475, + "457": 0.38171258568763733, + "458": 0.4615105986595154, + "459": 0.6041643619537354, + "460": 0.5766599178314209, + "461": 0.46891769766807556, + "462": 0.5205958485603333, + "463": 0.5562890768051147, + "464": 0.4942656457424164, + "465": 0.5113962292671204, + "466": 0.48930346965789795, + "467": 0.45395800471305847, + "468": 0.5059782862663269, + "469": 0.6073412299156189, + "470": 0.5460482239723206, + "471": 0.4364083707332611, + "472": 0.43995529413223267, + "473": 0.48216062784194946, + "474": 0.4718942642211914, + "475": 0.46095019578933716, + "476": 0.4751294255256653, + "477": 0.5134651064872742, + "478": 0.5055081844329834, + "479": 0.4690570831298828, + "480": 0.4867781698703766, + "481": 0.48497480154037476, + "482": 0.4860299527645111, + "483": 0.5182611346244812, + "484": 0.471593976020813, + "485": 0.39214810729026794 + }, + "loss": { + "432": 2.8666603565216064, + "433": 3.3808634281158447, + "434": 2.907029867172241, + "435": 4.311890602111816, + "436": 2.824399948120117, + "437": 3.127277374267578, + "438": 3.1276135444641113, + "439": 3.1792473793029785, + "440": 3.2935683727264404, + "441": 3.2622523307800293, + "442": 3.510226011276245, + "443": 3.0254292488098145, + "444": 3.121299982070923, + "445": 3.1505966186523438, + "446": 3.000731945037842, + "447": 3.341097593307495, + "448": 2.9106626510620117, + "449": 2.8891658782958984, + "450": 3.0368666648864746, + "451": 2.789989471435547, + "452": 2.7675275802612305, + "453": 2.7667741775512695, + "454": 2.7946126461029053, + "455": 2.6461939811706543, + "456": 2.6305489540100098, + "457": 2.6349916458129883, + "458": 2.671107769012451, + "459": 2.67372989654541, + "460": 2.7174861431121826, + "461": 2.6675281524658203, + "462": 2.689272403717041, + "463": 2.6624982357025146, + "464": 2.672647476196289, + "465": 2.670224189758301, + "466": 2.680692672729492, + "467": 2.642289638519287, + "468": 2.6854379177093506, + "469": 2.67608642578125, + "470": 2.7051339149475098, + "471": 2.675262928009033, + "472": 2.6756863594055176, + "473": 2.6470866203308105, + "474": 2.6868438720703125, + "475": 2.6295394897460938, + "476": 2.645143508911133, + "477": 2.650545120239258, + "478": 2.67291259765625, + "479": 2.6213717460632324, + "480": 2.6770386695861816, + "481": 2.634006977081299, + "482": 2.666285991668701, + "483": 2.658201217651367, + "484": 2.672632932662964, + "485": 2.6321921348571777 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 0.609815, + 1.09076, + 2.3779, + 0.140171, + 4.19955, + 0.685026, + 1.31234, + 1.61366, + 0.648135, + 0.903556, + 0.174421, + 2.01629, + 0.933795, + 1.96912, + 1.35414, + 2.02241, + 2.91386, + 2.10092, + 2.38273, + 7.32737, + 6.05313, + 3.95728, + 8.66632, + 28.8572, + 30.6323, + 18.0845, + 12.5409, + 7.32499, + 8.17199, + 12.1315, + 9.92278, + 8.60376, + 10.9401, + 10.2102, + 11.1967, + 12.8218, + 10.4894, + 7.25495, + 9.0725, + 14.0469, + 13.8235, + 11.3864, + 12.0657, + 12.3758, + 11.7172, + 10.0534, + 10.4599, + 11.9145, + 11.2978, + 11.199, + 11.2871, + 9.8967, + 12.0172, + 17.1166 + ], + "train_epoch_time": 5.055560350418091, + "train_loss": 2.6317582880236325, + "train_score": 0.2569628766568183, + "val_loss": 2.6588520338559944, + "val_score": 0.2486590485245708 + }, + { + "epoch": 9, + "grad_norm": 0.44535982608795166, + "learning_rate": 1.0, + "model_norm": 87.81460571289062, + "step_logs": { + "grad_norm": { + "486": 0.3982353210449219, + "487": 0.43557921051979065, + "488": 0.4927448034286499, + "489": 0.5576047301292419, + "490": 0.5290592908859253, + "491": 0.5076530575752258, + "492": 0.5319492220878601, + "493": 0.5007034540176392, + "494": 0.5210025310516357, + "495": 0.5394535660743713, + "496": 0.5030139088630676, + "497": 0.4605294466018677, + "498": 0.4453807473182678, + "499": 0.44898661971092224, + "500": 0.46897217631340027, + "501": 0.48422494530677795, + "502": 0.4780116081237793, + "503": 0.4895212948322296, + "504": 0.535971999168396, + "505": 0.5532742142677307, + "506": 0.49585941433906555, + "507": 0.4018837511539459, + "508": 0.402686208486557, + "509": 0.487864226102829, + "510": 0.54140305519104, + "511": 0.5641275644302368, + "512": 0.5425588488578796, + "513": 0.48240816593170166, + "514": 0.4451873004436493, + "515": 0.43826135993003845, + "516": 0.49162229895591736, + "517": 0.5225799679756165, + "518": 0.4846140742301941, + "519": 0.4425986409187317, + "520": 0.44406652450561523, + "521": 0.4758024215698242, + "522": 0.49207353591918945, + "523": 0.48803049325942993, + "524": 0.4920251965522766, + "525": 0.4774007797241211, + "526": 0.48840782046318054, + "527": 0.46008068323135376, + "528": 0.4562215209007263, + "529": 0.46872478723526, + "530": 0.4545108675956726, + "531": 0.453834593296051, + "532": 0.44453826546669006, + "533": 0.46896350383758545, + "534": 0.5051932334899902, + "535": 0.5283986330032349, + "536": 0.5464285016059875, + "537": 0.5510775446891785, + "538": 0.5074120163917542, + "539": 0.44535982608795166 + }, + "loss": { + "486": 2.6298608779907227, + "487": 2.61812686920166, + "488": 2.661372184753418, + "489": 2.642263650894165, + "490": 2.6732637882232666, + "491": 2.6382131576538086, + "492": 2.6739726066589355, + "493": 2.6452417373657227, + "494": 2.6538732051849365, + "495": 2.63507080078125, + "496": 2.659754514694214, + "497": 2.621670722961426, + "498": 2.630413770675659, + "499": 2.6134023666381836, + "500": 2.64987850189209, + "501": 2.6161062717437744, + "502": 2.6288294792175293, + "503": 2.6221654415130615, + "504": 2.6408352851867676, + "505": 2.6397414207458496, + "506": 2.653784990310669, + "507": 2.5975451469421387, + "508": 2.609415292739868, + "509": 2.598310947418213, + "510": 2.637651205062866, + "511": 2.64570951461792, + "512": 2.6593527793884277, + "513": 2.6202707290649414, + "514": 2.6349387168884277, + "515": 2.609262466430664, + "516": 2.617173671722412, + "517": 2.6070337295532227, + "518": 2.639909029006958, + "519": 2.5958285331726074, + "520": 2.6046042442321777, + "521": 2.5836737155914307, + "522": 2.6052651405334473, + "523": 2.5893874168395996, + "524": 2.6298346519470215, + "525": 2.6113293170928955, + "526": 2.60847544670105, + "527": 2.5925660133361816, + "528": 2.609440803527832, + "529": 2.602233409881592, + "530": 2.622786283493042, + "531": 2.577503204345703, + "532": 2.5837182998657227, + "533": 2.5699374675750732, + "534": 2.61978816986084, + "535": 2.6043004989624023, + "536": 2.6448514461517334, + "537": 2.6181392669677734, + "538": 2.64224910736084, + "539": 2.574448585510254 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 16.5826, + 13.7993, + 10.9613, + 8.49813, + 9.55065, + 10.2371, + 9.44967, + 10.5513, + 9.77689, + 9.05491, + 10.5119, + 12.3613, + 13.2605, + 12.964, + 12.0485, + 11.1574, + 11.505, + 10.9425, + 9.193, + 8.62344, + 10.7932, + 16.0828, + 16.092, + 10.9167, + 8.99862, + 8.31357, + 9.03404, + 11.2594, + 13.2949, + 13.5847, + 10.8285, + 9.54643, + 11.2408, + 13.2512, + 13.2083, + 11.4126, + 10.7595, + 10.8718, + 10.8631, + 11.4576, + 10.9351, + 12.2479, + 12.5371, + 11.8443, + 12.6962, + 12.5142, + 13.0746, + 11.6854, + 10.2648, + 9.32755, + 8.85798, + 8.62119, + 10.2625, + 12.9796 + ], + "train_epoch_time": 5.059179782867432, + "train_loss": 2.5937873890957497, + "train_score": 0.2638618634027593, + "val_loss": 2.62206916283521, + "val_score": 0.25666439396368784 + }, + { + "epoch": 10, + "grad_norm": 0.5642505884170532, + "learning_rate": 1.0, + "model_norm": 87.89739227294922, + "step_logs": { + "grad_norm": { + "540": 0.43889129161834717, + "541": 0.4413491189479828, + "542": 0.44447293877601624, + "543": 0.4841398596763611, + "544": 0.5043590664863586, + "545": 0.5050851702690125, + "546": 0.5263308882713318, + "547": 0.5236912965774536, + "548": 0.5113937258720398, + "549": 0.4963991343975067, + "550": 0.49282658100128174, + "551": 0.4909944236278534, + "552": 0.49198561906814575, + "553": 0.4985528588294983, + "554": 0.5279410481452942, + "555": 0.5446391701698303, + "556": 0.5695044994354248, + "557": 0.6038550734519958, + "558": 0.6127456426620483, + "559": 0.5691845417022705, + "560": 0.5303208827972412, + "561": 0.4937804937362671, + "562": 0.4819372892379761, + "563": 0.5437955260276794, + "564": 0.5550265312194824, + "565": 0.5230482220649719, + "566": 0.5679828524589539, + "567": 0.653569221496582, + "568": 0.6410107016563416, + "569": 0.5594778656959534, + "570": 0.5172213912010193, + "571": 0.5734394192695618, + "572": 0.5833762884140015, + "573": 0.5947811007499695, + "574": 0.5898205637931824, + "575": 0.5590900182723999, + "576": 0.5380058288574219, + "577": 0.5170160531997681, + "578": 0.5525601506233215, + "579": 0.6289882659912109, + "580": 0.6177880167961121, + "581": 0.5601961016654968, + "582": 0.5203932523727417, + "583": 0.6104792952537537, + "584": 0.7420664429664612, + "585": 0.9171600937843323, + "586": 0.6264526844024658, + "587": 0.41215798258781433, + "588": 0.3642028272151947, + "589": 0.38623979687690735, + "590": 0.4369104504585266, + "591": 0.5044211149215698, + "592": 0.5352712869644165, + "593": 0.5642505884170532 + }, + "loss": { + "540": 2.5873799324035645, + "541": 2.561875820159912, + "542": 2.588291645050049, + "543": 2.591524600982666, + "544": 2.5904970169067383, + "545": 2.6010489463806152, + "546": 2.619211196899414, + "547": 2.5873899459838867, + "548": 2.5964908599853516, + "549": 2.580134868621826, + "550": 2.5930261611938477, + "551": 2.5900847911834717, + "552": 2.5755648612976074, + "553": 2.5766239166259766, + "554": 2.5866951942443848, + "555": 2.5667924880981445, + "556": 2.6079423427581787, + "557": 2.5970170497894287, + "558": 2.594780683517456, + "559": 2.583996057510376, + "560": 2.5805587768554688, + "561": 2.559217929840088, + "562": 2.5547826290130615, + "563": 2.560436248779297, + "564": 2.5935473442077637, + "565": 2.540757656097412, + "566": 2.5761923789978027, + "567": 2.565190315246582, + "568": 2.613154888153076, + "569": 2.5507757663726807, + "570": 2.5534820556640625, + "571": 2.543264150619507, + "572": 2.5765066146850586, + "573": 2.5365328788757324, + "574": 2.562178134918213, + "575": 2.533876419067383, + "576": 2.5681610107421875, + "577": 2.5384161472320557, + "578": 2.538158416748047, + "579": 2.5635013580322266, + "580": 2.580650568008423, + "581": 2.5635955333709717, + "582": 2.533137321472168, + "583": 2.518244981765747, + "584": 2.584805727005005, + "585": 2.62091326713562, + "586": 2.6482770442962646, + "587": 2.5014262199401855, + "588": 2.4759087562561035, + "589": 2.505929470062256, + "590": 2.501068353652954, + "591": 2.498375415802002, + "592": 2.5276522636413574, + "593": 2.505748748779297 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 13.4322, + 13.1521, + 13.1015, + 11.0564, + 10.1836, + 10.1958, + 9.45481, + 9.43433, + 9.92833, + 10.4708, + 10.6762, + 10.7439, + 10.6406, + 10.3664, + 9.28056, + 8.65313, + 8.04088, + 7.12212, + 6.91099, + 7.97601, + 9.17564, + 10.4964, + 10.9995, + 8.6585, + 8.41912, + 9.28709, + 7.98561, + 6.00532, + 6.35967, + 8.14904, + 9.54509, + 7.73422, + 7.57066, + 7.17012, + 7.36494, + 8.10629, + 8.87255, + 9.49631, + 8.31304, + 6.4796, + 6.76161, + 8.16901, + 9.35396, + 6.75703, + 4.69399, + 3.11575, + 6.74818, + 14.7252, + 18.6659, + 16.7979, + 13.1021, + 9.81909, + 8.82205, + 7.87034 + ], + "train_epoch_time": 5.054889917373657, + "train_loss": 2.532563376939793, + "train_score": 0.2531956151576774, + "val_loss": 2.5702949673382203, + "val_score": 0.244730374826082 + }, + { + "epoch": 11, + "grad_norm": 0.39967411756515503, + "learning_rate": 1.0, + "model_norm": 87.97118377685547, + "step_logs": { + "grad_norm": { + "594": 0.5848799347877502, + "595": 0.5964754223823547, + "596": 0.5421091914176941, + "597": 0.5192058086395264, + "598": 0.5717551708221436, + "599": 0.7164661288261414, + "600": 0.6851852536201477, + "601": 0.6123928427696228, + "602": 0.5692559480667114, + "603": 0.5325015783309937, + "604": 0.5284161567687988, + "605": 0.5314343571662903, + "606": 0.46375802159309387, + "607": 0.38732707500457764, + "608": 0.39238420128822327, + "609": 0.4661814272403717, + "610": 0.5565862059593201, + "611": 0.622113823890686, + "612": 0.662469208240509, + "613": 0.7418915629386902, + "614": 0.695763349533081, + "615": 0.4969223141670227, + "616": 0.3927744925022125, + "617": 0.38481831550598145, + "618": 0.40111711621284485, + "619": 0.44783347845077515, + "620": 0.47042691707611084, + "621": 0.4632425606250763, + "622": 0.4563848674297333, + "623": 0.4969564974308014, + "624": 0.5564669966697693, + "625": 0.553923487663269, + "626": 0.5589980483055115, + "627": 0.5743780136108398, + "628": 0.5629103183746338, + "629": 0.530710756778717, + "630": 0.5712909698486328, + "631": 0.601861298084259, + "632": 0.5945495963096619, + "633": 0.5405967831611633, + "634": 0.49745386838912964, + "635": 0.4777105152606964, + "636": 0.4817655384540558, + "637": 0.5321722030639648, + "638": 0.5701903700828552, + "639": 0.638569176197052, + "640": 0.6125555634498596, + "641": 0.5752057433128357, + "642": 0.6520606279373169, + "643": 0.7927195429801941, + "644": 0.7485238313674927, + "645": 0.6046181321144104, + "646": 0.4587094187736511, + "647": 0.39967411756515503 + }, + "loss": { + "594": 2.5547661781311035, + "595": 2.516893148422241, + "596": 2.5100326538085938, + "597": 2.508277177810669, + "598": 2.5054330825805664, + "599": 2.520761489868164, + "600": 2.587801694869995, + "601": 2.535466194152832, + "602": 2.5401253700256348, + "603": 2.508542537689209, + "604": 2.4997620582580566, + "605": 2.498105764389038, + "606": 2.512206554412842, + "607": 2.4507334232330322, + "608": 2.461714267730713, + "609": 2.4511613845825195, + "610": 2.4950764179229736, + "611": 2.518521547317505, + "612": 2.532031536102295, + "613": 2.5306496620178223, + "614": 2.5719969272613525, + "615": 2.5061893463134766, + "616": 2.478519916534424, + "617": 2.4409000873565674, + "618": 2.4431161880493164, + "619": 2.4791605472564697, + "620": 2.461848020553589, + "621": 2.4595818519592285, + "622": 2.4788684844970703, + "623": 2.4791648387908936, + "624": 2.492696762084961, + "625": 2.4767537117004395, + "626": 2.481961727142334, + "627": 2.492222785949707, + "628": 2.4911341667175293, + "629": 2.4656450748443604, + "630": 2.474794387817383, + "631": 2.498361110687256, + "632": 2.497685432434082, + "633": 2.489813804626465, + "634": 2.4618020057678223, + "635": 2.4729981422424316, + "636": 2.436277389526367, + "637": 2.4613890647888184, + "638": 2.476503372192383, + "639": 2.478748083114624, + "640": 2.519442319869995, + "641": 2.481924057006836, + "642": 2.4866180419921875, + "643": 2.5106842517852783, + "644": 2.5524468421936035, + "645": 2.4838905334472656, + "646": 2.4614758491516113, + "647": 2.432997465133667 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 7.46823, + 7.07424, + 8.54094, + 9.30457, + 7.66413, + 4.91067, + 5.51207, + 6.76079, + 7.83863, + 8.84667, + 8.95255, + 8.84528, + 11.6808, + 16.3358, + 15.9888, + 11.2788, + 8.05414, + 6.50738, + 5.76949, + 4.59781, + 5.31309, + 10.1493, + 16.0659, + 16.4831, + 15.1845, + 12.3615, + 11.1244, + 11.4616, + 11.9012, + 10.0385, + 8.0499, + 8.07204, + 7.94282, + 7.55425, + 7.86174, + 8.75417, + 7.58271, + 6.89703, + 7.0658, + 8.51962, + 9.94827, + 10.8366, + 10.4968, + 8.69112, + 7.61727, + 6.07878, + 6.7145, + 7.5014, + 5.84835, + 3.99533, + 4.5556, + 6.7947, + 11.6982, + 15.231 + ], + "train_epoch_time": 5.055654525756836, + "train_loss": 2.439288756495056, + "train_score": 0.2868398044424043, + "val_loss": 2.482028426861243, + "val_score": 0.27646024663741225 + }, + { + "epoch": 12, + "grad_norm": 0.29364854097366333, + "learning_rate": 1.0, + "model_norm": 88.0263442993164, + "step_logs": { + "grad_norm": { + "648": 0.42541980743408203, + "649": 0.4486834406852722, + "650": 0.45945242047309875, + "651": 0.49960291385650635, + "652": 0.5295918583869934, + "653": 0.48951247334480286, + "654": 0.4583229422569275, + "655": 0.4312009811401367, + "656": 0.44353899359703064, + "657": 0.4767831861972809, + "658": 0.5039674639701843, + "659": 0.4627559185028076, + "660": 0.40119317173957825, + "661": 0.4087030291557312, + "662": 0.4404308497905731, + "663": 0.44678232073783875, + "664": 0.4529418349266052, + "665": 0.46593615412712097, + "666": 0.44451114535331726, + "667": 0.38647735118865967, + "668": 0.40457549691200256, + "669": 0.46550610661506653, + "670": 0.4510818421840668, + "671": 0.38932713866233826, + "672": 0.38247546553611755, + "673": 0.3675960898399353, + "674": 0.3833872079849243, + "675": 0.4027199149131775, + "676": 0.3951297700405121, + "677": 0.38941389322280884, + "678": 0.3857887387275696, + "679": 0.39228710532188416, + "680": 0.3770066797733307, + "681": 0.3638162612915039, + "682": 0.2915111184120178, + "683": 0.22512826323509216, + "684": 0.23857611417770386, + "685": 0.24426469206809998, + "686": 0.2337995320558548, + "687": 0.2389509081840515, + "688": 0.27739131450653076, + "689": 0.37564846873283386, + "690": 0.33689069747924805, + "691": 0.31788650155067444, + "692": 0.27095070481300354, + "693": 0.2696477174758911, + "694": 0.2522718608379364, + "695": 0.22648663818836212, + "696": 0.1982397884130478, + "697": 0.1943386346101761, + "698": 0.18440209329128265, + "699": 0.17222952842712402, + "700": 0.2079312950372696, + "701": 0.29364854097366333 + }, + "loss": { + "648": 2.470357656478882, + "649": 2.4453601837158203, + "650": 2.449160099029541, + "651": 2.4454240798950195, + "652": 2.460860013961792, + "653": 2.4324779510498047, + "654": 2.436105251312256, + "655": 2.4385781288146973, + "656": 2.4207332134246826, + "657": 2.417686939239502, + "658": 2.447439670562744, + "659": 2.4163479804992676, + "660": 2.4013919830322266, + "661": 2.4144887924194336, + "662": 2.4173641204833984, + "663": 2.411128520965576, + "664": 2.4195351600646973, + "665": 2.4303739070892334, + "666": 2.418672800064087, + "667": 2.410890579223633, + "668": 2.38808012008667, + "669": 2.4081008434295654, + "670": 2.4407973289489746, + "671": 2.396388530731201, + "672": 2.377713918685913, + "673": 2.3852338790893555, + "674": 2.387624979019165, + "675": 2.386573314666748, + "676": 2.386322498321533, + "677": 2.39239764213562, + "678": 2.4039902687072754, + "679": 2.3619823455810547, + "680": 2.373314380645752, + "681": 2.3773021697998047, + "682": 2.4054088592529297, + "683": 2.3658714294433594, + "684": 2.3728911876678467, + "685": 2.3722269535064697, + "686": 2.36580228805542, + "687": 2.3664608001708984, + "688": 2.3748106956481934, + "689": 2.3627021312713623, + "690": 2.3584699630737305, + "691": 2.384561777114868, + "692": 2.3721556663513184, + "693": 2.3655025959014893, + "694": 2.3643574714660645, + "695": 2.3771610260009766, + "696": 2.3603529930114746, + "697": 2.344078540802002, + "698": 2.361901044845581, + "699": 2.3433241844177246, + "700": 2.369680643081665, + "701": 2.346346855163574 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 13.6497, + 12.1468, + 11.6021, + 9.79725, + 8.77413, + 10.1513, + 11.5972, + 13.1153, + 12.305, + 10.6355, + 9.63623, + 11.2838, + 14.9196, + 14.4547, + 12.462, + 12.0789, + 11.7936, + 11.1949, + 12.2409, + 16.141, + 14.5898, + 11.1128, + 11.9956, + 15.8099, + 16.2537, + 17.6518, + 16.2439, + 14.7153, + 15.2844, + 15.7765, + 16.1523, + 15.3486, + 16.6977, + 17.9606, + 28.306, + 46.68, + 41.6892, + 39.7589, + 43.2804, + 41.4459, + 30.8634, + 16.7435, + 20.7803, + 23.5974, + 32.3119, + 32.5334, + 37.1514, + 46.3419, + 60.0614, + 62.066, + 69.4592, + 78.9982, + 54.8088, + 27.2105 + ], + "train_epoch_time": 5.059138059616089, + "train_loss": 2.3581412921505986, + "train_score": 0.3145422346195155, + "val_loss": 2.3948335491557073, + "val_score": 0.30483549802503135 + }, + { + "epoch": 13, + "grad_norm": 0.25887343287467957, + "learning_rate": 0.6666666666666667, + "model_norm": 88.05181121826172, + "step_logs": { + "grad_norm": { + "702": 0.2517961263656616, + "703": 0.2711848020553589, + "704": 0.28986310958862305, + "705": 0.387401282787323, + "706": 0.31486833095550537, + "707": 0.2716244161128998, + "708": 0.2819242477416992, + "709": 0.29586946964263916, + "710": 0.2761354446411133, + "711": 0.2607029974460602, + "712": 0.24697215855121613, + "713": 0.24466226994991302, + "714": 0.24863287806510925, + "715": 0.4027062654495239, + "716": 0.5951622724533081, + "717": 0.5314468741416931, + "718": 0.3024754226207733, + "719": 0.1868147999048233, + "720": 0.16853132843971252, + "721": 0.15246227383613586, + "722": 0.14348968863487244, + "723": 0.1538223773241043, + "724": 0.16693183779716492, + "725": 0.1562102735042572, + "726": 0.15216726064682007, + "727": 0.14588244259357452, + "728": 0.1636030673980713, + "729": 0.17198902368545532, + "730": 0.18787144124507904, + "731": 0.1787651926279068, + "732": 0.21472664177417755, + "733": 0.1897151619195938, + "734": 0.20950596034526825, + "735": 0.21321755647659302, + "736": 0.18100839853286743, + "737": 0.1854843646287918, + "738": 0.20567253232002258, + "739": 0.21263569593429565, + "740": 0.19190005958080292, + "741": 0.19911877810955048, + "742": 0.16771019995212555, + "743": 0.16397693753242493, + "744": 0.17830683290958405, + "745": 0.14768682420253754, + "746": 0.15644720196723938, + "747": 0.18825316429138184, + "748": 0.1873304843902588, + "749": 0.1808556467294693, + "750": 0.17900241911411285, + "751": 0.17609266936779022, + "752": 0.20888206362724304, + "753": 0.19842274487018585, + "754": 0.20021870732307434, + "755": 0.25887343287467957 + }, + "loss": { + "702": 2.3515613079071045, + "703": 2.352108955383301, + "704": 2.3576269149780273, + "705": 2.353095054626465, + "706": 2.3777709007263184, + "707": 2.348574161529541, + "708": 2.3504819869995117, + "709": 2.333197593688965, + "710": 2.346602439880371, + "711": 2.35115122795105, + "712": 2.342665195465088, + "713": 2.346747875213623, + "714": 2.3737337589263916, + "715": 2.3537206649780273, + "716": 2.35170578956604, + "717": 2.3858892917633057, + "718": 2.368903160095215, + "719": 2.35884428024292, + "720": 2.3632116317749023, + "721": 2.3307394981384277, + "722": 2.347752094268799, + "723": 2.3527705669403076, + "724": 2.363231897354126, + "725": 2.349132537841797, + "726": 2.32879638671875, + "727": 2.3327298164367676, + "728": 2.3728489875793457, + "729": 2.3306446075439453, + "730": 2.3423075675964355, + "731": 2.349453926086426, + "732": 2.3523170948028564, + "733": 2.335592269897461, + "734": 2.348813056945801, + "735": 2.351442813873291, + "736": 2.317401885986328, + "737": 2.341111898422241, + "738": 2.326826572418213, + "739": 2.3484549522399902, + "740": 2.3195347785949707, + "741": 2.3290226459503174, + "742": 2.3366763591766357, + "743": 2.333981513977051, + "744": 2.343405246734619, + "745": 2.326702117919922, + "746": 2.3418684005737305, + "747": 2.32875394821167, + "748": 2.3379621505737305, + "749": 2.3344545364379883, + "750": 2.3377208709716797, + "751": 2.3253703117370605, + "752": 2.3394603729248047, + "753": 2.3347830772399902, + "754": 2.328230381011963, + "755": 2.326993227005005 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 37.0901, + 31.9836, + 28.0601, + 15.679, + 23.9835, + 31.8322, + 29.5728, + 26.6533, + 30.7748, + 34.593, + 38.4073, + 39.2042, + 38.3986, + 14.5137, + 6.63915, + 8.44755, + 25.8921, + 67.5891, + 83.2034, + 100.27, + 114.028, + 99.4353, + 84.8063, + 96.2694, + 100.575, + 109.612, + 88.6517, + 78.7906, + 66.3624, + 73.5192, + 51.0181, + 64.8923, + 53.5126, + 51.7236, + 70.73, + 68.0468, + 55.0062, + 51.941, + 62.987, + 58.7421, + 83.0768, + 86.8024, + 73.7075, + 106.674, + 95.6813, + 65.7111, + 66.6224, + 71.3709, + 72.9583, + 74.9911, + 53.6183, + 59.3012, + 58.0787, + 34.7232 + ], + "train_epoch_time": 5.059368371963501, + "train_loss": 2.331776088074256, + "train_score": 0.31920283353824697, + "val_loss": 2.369336447677437, + "val_score": 0.3103024545890182 + }, + { + "epoch": 14, + "grad_norm": 0.14123967289924622, + "learning_rate": 0.33333333333333337, + "model_norm": 88.06018829345703, + "step_logs": { + "grad_norm": { + "756": 0.16919466853141785, + "757": 0.1527901291847229, + "758": 0.16814734041690826, + "759": 0.14379505813121796, + "760": 0.15826955437660217, + "761": 0.16517005860805511, + "762": 0.17118597030639648, + "763": 0.17684140801429749, + "764": 0.2197951078414917, + "765": 0.1795675903558731, + "766": 0.15357689559459686, + "767": 0.15348678827285767, + "768": 0.1800214797258377, + "769": 0.1680680364370346, + "770": 0.15507693588733673, + "771": 0.16153189539909363, + "772": 0.13893342018127441, + "773": 0.15212024748325348, + "774": 0.16563336551189423, + "775": 0.21204416453838348, + "776": 0.2171325534582138, + "777": 0.17124004662036896, + "778": 0.15722417831420898, + "779": 0.1687515527009964, + "780": 0.20852503180503845, + "781": 0.1994674801826477, + "782": 0.1389341801404953, + "783": 0.18625572323799133, + "784": 0.16316533088684082, + "785": 0.15010342001914978, + "786": 0.17625698447227478, + "787": 0.14616595208644867, + "788": 0.15358756482601166, + "789": 0.1764170080423355, + "790": 0.16849461197853088, + "791": 0.1559830605983734, + "792": 0.1519165337085724, + "793": 0.1520063877105713, + "794": 0.16567540168762207, + "795": 0.1518193483352661, + "796": 0.13931231200695038, + "797": 0.15041916072368622, + "798": 0.13906803727149963, + "799": 0.13872286677360535, + "800": 0.15496313571929932, + "801": 0.14736458659172058, + "802": 0.15293791890144348, + "803": 0.1286572813987732, + "804": 0.15545165538787842, + "805": 0.14150285720825195, + "806": 0.12946298718452454, + "807": 0.14146363735198975, + "808": 0.14373642206192017, + "809": 0.14123967289924622 + }, + "loss": { + "756": 2.3208799362182617, + "757": 2.323700189590454, + "758": 2.3460261821746826, + "759": 2.3306610584259033, + "760": 2.2934744358062744, + "761": 2.3358302116394043, + "762": 2.3395979404449463, + "763": 2.322214126586914, + "764": 2.322810649871826, + "765": 2.320448875427246, + "766": 2.3309519290924072, + "767": 2.330899715423584, + "768": 2.3207345008850098, + "769": 2.3087668418884277, + "770": 2.3470306396484375, + "771": 2.3387796878814697, + "772": 2.3258748054504395, + "773": 2.3206069469451904, + "774": 2.3296265602111816, + "775": 2.325906753540039, + "776": 2.3431174755096436, + "777": 2.336763381958008, + "778": 2.303090810775757, + "779": 2.329092502593994, + "780": 2.312742233276367, + "781": 2.3172004222869873, + "782": 2.324556827545166, + "783": 2.32741117477417, + "784": 2.336142063140869, + "785": 2.312987804412842, + "786": 2.3157477378845215, + "787": 2.3127593994140625, + "788": 2.3509538173675537, + "789": 2.317056179046631, + "790": 2.328007936477661, + "791": 2.322556734085083, + "792": 2.3323583602905273, + "793": 2.321770668029785, + "794": 2.318619728088379, + "795": 2.3258066177368164, + "796": 2.3157896995544434, + "797": 2.3239922523498535, + "798": 2.326474905014038, + "799": 2.313377857208252, + "800": 2.346735715866089, + "801": 2.326061964035034, + "802": 2.3059816360473633, + "803": 2.311988353729248, + "804": 2.327362060546875, + "805": 2.318028211593628, + "806": 2.3194198608398438, + "807": 2.3261709213256836, + "808": 2.311486005783081, + "809": 2.333019256591797 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 81.0736, + 99.5381, + 82.9761, + 112.717, + 91.5586, + 85.6207, + 79.8371, + 74.2565, + 48.0815, + 71.9641, + 98.8283, + 98.9422, + 71.6105, + 81.7353, + 97.5943, + 89.634, + 120.496, + 100.283, + 84.9162, + 51.7297, + 49.6986, + 79.69, + 93.1692, + 81.7883, + 53.1877, + 58.2397, + 120.426, + 67.0894, + 87.7493, + 102.658, + 74.5416, + 108.253, + 99.6626, + 74.4484, + 81.9997, + 95.4578, + 101.061, + 100.484, + 84.4721, + 100.907, + 119.322, + 102.714, + 120.294, + 120.213, + 97.7254, + 107.111, + 98.5883, + 139.674, + 96.3104, + 115.768, + 138.385, + 116.239, + 111.881, + 116.951 + ], + "train_epoch_time": 5.06749415397644, + "train_loss": 2.3210341138169275, + "train_score": 0.3224399210048714, + "val_loss": 2.361155847731195, + "val_score": 0.3132175665293167 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:35:56.893715", + "final_model_norm": 88.06018829345703, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:34:11.873439", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "prox-sps", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 16.325838088989258, + "learning_rate": 1e-10, + "model_norm": 87.34168243408203, + "step_logs": { + "grad_norm": { + "0": 22.7664794921875, + "1": 23.4499454498291, + "2": 6.298552989959717, + "3": 7.566888809204102, + "4": 14.49585247039795, + "5": 5.620354175567627, + "6": 10.216382026672363, + "7": 4.534229755401611, + "8": 11.747849464416504, + "9": 5.449873924255371, + "10": 13.424872398376465, + "11": 6.095279216766357, + "12": 11.157068252563477, + "13": 5.757944107055664, + "14": 10.918036460876465, + "15": 4.400775909423828, + "16": 20.614965438842773, + "17": 5.2823591232299805, + "18": 20.4493465423584, + "19": 21.37148666381836, + "20": 24.3276424407959, + "21": 3.937091112136841, + "22": 11.294756889343262, + "23": 19.098108291625977, + "24": 15.733933448791504, + "25": 5.697427749633789, + "26": 16.87322425842285, + "27": 7.170352935791016, + "28": 10.589845657348633, + "29": 4.0917582511901855, + "30": 17.086423873901367, + "31": 6.23681640625, + "32": 23.630807876586914, + "33": 3.5906713008880615, + "34": 19.72943115234375, + "35": 10.054206848144531, + "36": 8.447721481323242, + "37": 5.513556957244873, + "38": 16.578889846801758, + "39": 4.4890360832214355, + "40": 16.984851837158203, + "41": 13.220318794250488, + "42": 4.785301208496094, + "43": 16.19448471069336, + "44": 12.653915405273438, + "45": 3.955620288848877, + "46": 16.038116455078125, + "47": 14.444923400878906, + "48": 7.361199855804443, + "49": 9.815719604492188, + "50": 17.361591339111328, + "51": 4.043751239776611, + "52": 18.901411056518555, + "53": 16.325838088989258 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.7870278358459473, + "3": 4.075554847717285, + "4": 4.519266128540039, + "5": 4.4081573486328125, + "6": 4.090362548828125, + "7": 4.264717102050781, + "8": 4.81776237487793, + "9": 4.620635509490967, + "10": 5.196425914764404, + "11": 4.694077491760254, + "12": 5.102016448974609, + "13": 5.162631034851074, + "14": 4.536558151245117, + "15": 4.507107734680176, + "16": 7.70632266998291, + "17": 7.171968460083008, + "18": 6.168941974639893, + "19": 5.053447723388672, + "20": 5.458719253540039, + "21": 3.8100318908691406, + "22": 4.792364597320557, + "23": 5.147730827331543, + "24": 5.587794303894043, + "25": 4.857039928436279, + "26": 7.223695278167725, + "27": 4.380226135253906, + "28": 4.780410289764404, + "29": 4.617242813110352, + "30": 8.140088081359863, + "31": 7.500243663787842, + "32": 5.915950775146484, + "33": 4.088742733001709, + "34": 5.866542816162109, + "35": 6.472012996673584, + "36": 4.977142810821533, + "37": 4.597317218780518, + "38": 6.1798930168151855, + "39": 3.996333122253418, + "40": 8.745774269104004, + "41": 5.304678916931152, + "42": 4.416441917419434, + "43": 7.424479007720947, + "44": 5.2670793533325195, + "45": 4.364896297454834, + "46": 8.071239471435547, + "47": 6.173081874847412, + "48": 4.716697692871094, + "49": 4.693948268890381, + "50": 5.485614776611328, + "51": 4.669734001159668, + "52": 8.000372886657715, + "53": 5.641817569732666 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 0.00874362, + 0.00824185, + 0.095459, + 0.071179, + 0.021507, + 0.13955, + 0.0391893, + 0.207436, + 0.0349083, + 0.155571, + 0.0288326, + 0.126346, + 0.0409866, + 0.155717, + 0.0380572, + 0.232723, + 0.0181335, + 0.257029, + 0.014752, + 0.0110642, + 0.00922339, + 0.245798, + 0.0375661, + 0.0141135, + 0.0225718, + 0.149628, + 0.0253725, + 0.0851953, + 0.0426271, + 0.27578, + 0.0278822, + 0.192819, + 0.0105942, + 0.317131, + 0.0150714, + 0.0640241, + 0.0697431, + 0.151231, + 0.0224838, + 0.198315, + 0.0303162, + 0.0303512, + 0.192865, + 0.0283095, + 0.0328943, + 0.278962, + 0.0313786, + 0.029585, + 0.0870444, + 0.0487185, + 0.0181989, + 0.285577, + 0.0223935, + 0.0211674 + ], + "train_epoch_time": 5.057394027709961, + "train_loss": 4.25890385191273, + "train_score": 0.13767485653225284, + "val_loss": 4.278916929400474, + "val_score": 0.13572312698161698 + }, + { + "epoch": 1, + "grad_norm": 3.087127685546875, + "learning_rate": 1.0, + "model_norm": 87.36463165283203, + "step_logs": { + "grad_norm": { + "54": 4.851062774658203, + "55": 4.642487049102783, + "56": 13.736299514770508, + "57": 7.214140892028809, + "58": 6.71858549118042, + "59": 3.9326348304748535, + "60": 5.912207126617432, + "61": 7.219697952270508, + "62": 5.136228084564209, + "63": 6.090472221374512, + "64": 5.010668754577637, + "65": 3.2521190643310547, + "66": 8.625386238098145, + "67": 3.8423526287078857, + "68": 8.762495040893555, + "69": 10.446293830871582, + "70": 4.44512414932251, + "71": 8.88054084777832, + "72": 5.5230021476745605, + "73": 2.6638336181640625, + "74": 4.368422985076904, + "75": 7.9679107666015625, + "76": 4.711985111236572, + "77": 1.975710153579712, + "78": 11.510388374328613, + "79": 4.894310474395752, + "80": 2.6999690532684326, + "81": 4.465037822723389, + "82": 3.6896812915802, + "83": 2.1784634590148926, + "84": 4.675506114959717, + "85": 3.7070164680480957, + "86": 3.475722551345825, + "87": 4.807565212249756, + "88": 1.534299612045288, + "89": 7.62632417678833, + "90": 2.80254864692688, + "91": 7.808013916015625, + "92": 1.3819202184677124, + "93": 5.648556232452393, + "94": 2.7121543884277344, + "95": 7.299947738647461, + "96": 5.433496475219727, + "97": 1.0458855628967285, + "98": 4.237065315246582, + "99": 1.5481511354446411, + "100": 3.3716747760772705, + "101": 2.228264570236206, + "102": 1.277934193611145, + "103": 2.9510915279388428, + "104": 3.282909393310547, + "105": 1.9327986240386963, + "106": 1.456048607826233, + "107": 3.087127685546875 + }, + "loss": { + "54": 4.254249572753906, + "55": 4.582394123077393, + "56": 4.7759904861450195, + "57": 4.368451118469238, + "58": 4.007016181945801, + "59": 4.331964015960693, + "60": 4.342117786407471, + "61": 4.68231201171875, + "62": 4.248996734619141, + "63": 4.268795490264893, + "64": 4.463672637939453, + "65": 4.019813537597656, + "66": 5.721035957336426, + "67": 5.101027965545654, + "68": 4.912322044372559, + "69": 4.708672523498535, + "70": 3.6658873558044434, + "71": 4.833868026733398, + "72": 4.2417497634887695, + "73": 3.8356680870056152, + "74": 3.903273582458496, + "75": 4.723843574523926, + "76": 4.217850685119629, + "77": 3.323133945465088, + "78": 6.23946475982666, + "79": 4.575232982635498, + "80": 3.8332982063293457, + "81": 3.989940643310547, + "82": 4.012438774108887, + "83": 3.6906023025512695, + "84": 4.02376651763916, + "85": 4.2337493896484375, + "86": 3.80649995803833, + "87": 4.055908203125, + "88": 3.8032517433166504, + "89": 4.17458438873291, + "90": 3.67374849319458, + "91": 4.98996639251709, + "92": 3.4046430587768555, + "93": 4.4559502601623535, + "94": 4.028538227081299, + "95": 4.399440288543701, + "96": 3.4790291786193848, + "97": 3.110877513885498, + "98": 3.7854416370391846, + "99": 3.4571709632873535, + "100": 3.4675159454345703, + "101": 3.7974157333374023, + "102": 3.13344144821167, + "103": 3.4828577041625977, + "104": 4.106999397277832, + "105": 3.4190545082092285, + "106": 3.7496399879455566, + "107": 3.676380157470703 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.18078, + 0.212614, + 0.0253119, + 0.0839379, + 0.0887698, + 0.280103, + 0.124223, + 0.0898302, + 0.161064, + 0.115081, + 0.177787, + 0.380078, + 0.0768984, + 0.345512, + 0.0639781, + 0.0431493, + 0.185529, + 0.0612937, + 0.139058, + 0.540539, + 0.204541, + 0.0744058, + 0.189969, + 0.851337, + 0.0470942, + 0.190999, + 0.525842, + 0.200132, + 0.294734, + 0.777672, + 0.184067, + 0.308089, + 0.315091, + 0.175484, + 1.6156, + 0.0717766, + 0.467738, + 0.0818496, + 1.78281, + 0.139658, + 0.54767, + 0.0825578, + 0.117842, + 2.8439, + 0.210856, + 1.44243, + 0.305019, + 0.764812, + 1.91869, + 0.399917, + 0.381072, + 0.915235, + 1.76863, + 0.385755 + ], + "train_epoch_time": 5.054786443710327, + "train_loss": 3.416832287308133, + "train_score": 0.16706756639856862, + "val_loss": 3.4741060314988705, + "val_score": 0.16689240121540327 + }, + { + "epoch": 2, + "grad_norm": 3.008190393447876, + "learning_rate": 1.0, + "model_norm": 87.41412353515625, + "step_logs": { + "grad_norm": { + "108": 1.5771931409835815, + "109": 1.3276400566101074, + "110": 1.6606084108352661, + "111": 3.2099571228027344, + "112": 1.7904987335205078, + "113": 5.71084451675415, + "114": 2.318511486053467, + "115": 2.369474411010742, + "116": 2.9814114570617676, + "117": 1.5239108800888062, + "118": 3.0710394382476807, + "119": 1.6041967868804932, + "120": 5.355349063873291, + "121": 2.286020040512085, + "122": 4.396191596984863, + "123": 2.8245620727539062, + "124": 2.5127451419830322, + "125": 1.7162646055221558, + "126": 6.4136810302734375, + "127": 1.796766757965088, + "128": 7.797423839569092, + "129": 1.0371285676956177, + "130": 4.269420146942139, + "131": 3.9408934116363525, + "132": 1.033205509185791, + "133": 1.4431287050247192, + "134": 2.451185703277588, + "135": 3.02068829536438, + "136": 0.9753559827804565, + "137": 3.2207963466644287, + "138": 2.112476110458374, + "139": 1.313253402709961, + "140": 5.322528839111328, + "141": 1.7499157190322876, + "142": 6.138791084289551, + "143": 3.267159938812256, + "144": 3.9819841384887695, + "145": 1.6480112075805664, + "146": 4.6178436279296875, + "147": 2.288015365600586, + "148": 3.5858347415924072, + "149": 1.1324995756149292, + "150": 1.6467430591583252, + "151": 2.653310775756836, + "152": 2.111180305480957, + "153": 3.752389669418335, + "154": 1.9792944192886353, + "155": 3.4413907527923584, + "156": 3.5163180828094482, + "157": 1.061113715171814, + "158": 3.7987544536590576, + "159": 1.3852602243423462, + "160": 1.8876944780349731, + "161": 3.008190393447876 + }, + "loss": { + "108": 3.391155242919922, + "109": 3.254826068878174, + "110": 3.4949028491973877, + "111": 4.522764682769775, + "112": 3.1810269355773926, + "113": 4.352084159851074, + "114": 3.6936278343200684, + "115": 3.3618383407592773, + "116": 4.287606239318848, + "117": 3.4038262367248535, + "118": 3.589904308319092, + "119": 3.4578728675842285, + "120": 3.757467031478882, + "121": 3.3929529190063477, + "122": 3.8742623329162598, + "123": 3.7166049480438232, + "124": 3.2067177295684814, + "125": 3.4688572883605957, + "126": 4.05441951751709, + "127": 3.249584674835205, + "128": 5.004021644592285, + "129": 2.9070515632629395, + "130": 3.653980255126953, + "131": 3.5371079444885254, + "132": 3.0327553749084473, + "133": 2.936931610107422, + "134": 3.98585844039917, + "135": 3.6089138984680176, + "136": 3.0453946590423584, + "137": 3.212122917175293, + "138": 3.414034605026245, + "139": 2.92160701751709, + "140": 4.298502445220947, + "141": 3.044149398803711, + "142": 4.040562629699707, + "143": 3.4205234050750732, + "144": 3.7751450538635254, + "145": 3.0437707901000977, + "146": 4.015471458435059, + "147": 3.212235450744629, + "148": 3.880786418914795, + "149": 2.9279117584228516, + "150": 2.974810838699341, + "151": 3.8245153427124023, + "152": 3.2268872261047363, + "153": 3.343679666519165, + "154": 3.5337448120117188, + "155": 3.7032947540283203, + "156": 3.5217983722686768, + "157": 2.9809324741363525, + "158": 3.4148528575897217, + "159": 2.962611198425293, + "160": 3.130906820297241, + "161": 4.3312482833862305 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 1.36326, + 1.84658, + 1.26736, + 0.43894, + 0.992246, + 0.133443, + 0.687123, + 0.598788, + 0.48236, + 1.46571, + 0.380638, + 1.34367, + 0.131015, + 0.649259, + 0.200464, + 0.465848, + 0.507883, + 1.17765, + 0.098563, + 1.00657, + 0.0823032, + 2.70264, + 0.20046, + 0.22775, + 2.84095, + 1.41021, + 0.663391, + 0.395517, + 3.20123, + 0.309646, + 0.76504, + 1.69404, + 0.151733, + 0.994104, + 0.10722, + 0.320444, + 0.238086, + 1.12071, + 0.188303, + 0.613606, + 0.301814, + 2.28287, + 1.097, + 0.543251, + 0.723991, + 0.23747, + 0.902016, + 0.312695, + 0.284832, + 2.64745, + 0.236641, + 1.54387, + 0.878631, + 0.478633 + ], + "train_epoch_time": 5.055427312850952, + "train_loss": 2.80425652438291, + "train_score": 0.22392844333710255, + "val_loss": 2.829315626251711, + "val_score": 0.2205663392519157 + }, + { + "epoch": 3, + "grad_norm": 7.985816478729248, + "learning_rate": 1.0, + "model_norm": 87.48208618164062, + "step_logs": { + "grad_norm": { + "162": 0.739536464214325, + "163": 9.031047821044922, + "164": 0.47472620010375977, + "165": 1.0238107442855835, + "166": 1.0248939990997314, + "167": 0.8922759294509888, + "168": 0.8334680795669556, + "169": 1.789474606513977, + "170": 1.6453161239624023, + "171": 5.625613212585449, + "172": 1.9424501657485962, + "173": 14.148873329162598, + "174": 2.742997646331787, + "175": 1.8475837707519531, + "176": 1.7735297679901123, + "177": 5.4583659172058105, + "178": 0.9533764123916626, + "179": 2.4805867671966553, + "180": 1.0320863723754883, + "181": 1.922633171081543, + "182": 2.158294439315796, + "183": 7.784940719604492, + "184": 1.870803713798523, + "185": 2.184821605682373, + "186": 10.198567390441895, + "187": 2.120152473449707, + "188": 2.170665740966797, + "189": 2.0228075981140137, + "190": 2.058455467224121, + "191": 3.568654775619507, + "192": 1.2148218154907227, + "193": 0.9665382504463196, + "194": 1.8063281774520874, + "195": 1.1803311109542847, + "196": 10.03723430633545, + "197": 1.332942247390747, + "198": 0.8169476985931396, + "199": 4.852204322814941, + "200": 1.0940513610839844, + "201": 2.2535884380340576, + "202": 1.2635124921798706, + "203": 7.535573482513428, + "204": 1.1998114585876465, + "205": 10.150912284851074, + "206": 1.3481035232543945, + "207": 4.655223369598389, + "208": 1.4760758876800537, + "209": 0.8217176198959351, + "210": 3.4677295684814453, + "211": 1.007032871246338, + "212": 1.2883204221725464, + "213": 1.8062108755111694, + "214": 1.568005919456482, + "215": 7.985816478729248 + }, + "loss": { + "162": 2.7989280223846436, + "163": 4.361610412597656, + "164": 2.6710832118988037, + "165": 2.7454373836517334, + "166": 3.0132532119750977, + "167": 2.839517116546631, + "168": 2.78610897064209, + "169": 2.9087047576904297, + "170": 3.0820183753967285, + "171": 3.928989887237549, + "172": 3.401419162750244, + "173": 7.087863922119141, + "174": 3.383911609649658, + "175": 3.271345615386963, + "176": 3.098524570465088, + "177": 3.9379611015319824, + "178": 2.953331232070923, + "179": 3.0407285690307617, + "180": 3.0467114448547363, + "181": 3.0696702003479004, + "182": 2.9657020568847656, + "183": 4.595019340515137, + "184": 3.3624134063720703, + "185": 2.9694716930389404, + "186": 6.6835503578186035, + "187": 3.152012586593628, + "188": 3.545067310333252, + "189": 3.051173210144043, + "190": 3.091698169708252, + "191": 3.435682773590088, + "192": 3.0081863403320312, + "193": 3.0356650352478027, + "194": 2.8297853469848633, + "195": 2.8922810554504395, + "196": 5.024540901184082, + "197": 2.9263577461242676, + "198": 2.9566190242767334, + "199": 3.381348133087158, + "200": 2.9656076431274414, + "201": 3.047962188720703, + "202": 3.042660713195801, + "203": 3.8212239742279053, + "204": 2.7600789070129395, + "205": 5.395393371582031, + "206": 2.7800631523132324, + "207": 4.111872673034668, + "208": 3.174259662628174, + "209": 2.729217052459717, + "210": 3.371816635131836, + "211": 2.7568399906158447, + "212": 2.8635785579681396, + "213": 3.318913459777832, + "214": 2.949039936065674, + "215": 5.546300411224365 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 5.11767, + 0.0534774, + 11.8523, + 2.61922, + 2.86865, + 3.56653, + 4.0107, + 0.90834, + 1.13851, + 0.124148, + 0.901489, + 0.0354056, + 0.449747, + 0.958337, + 0.985095, + 0.132174, + 3.24925, + 0.494161, + 2.86022, + 0.830422, + 0.636658, + 0.0758188, + 0.960715, + 0.622081, + 0.0642582, + 0.701219, + 0.752382, + 0.745689, + 0.729649, + 0.269776, + 2.03835, + 3.24949, + 0.867282, + 2.07603, + 0.0498733, + 1.64704, + 4.43003, + 0.143619, + 2.47764, + 0.600151, + 1.90588, + 0.067293, + 1.91732, + 0.0523616, + 1.52971, + 0.18974, + 1.45688, + 4.04197, + 0.280397, + 2.71847, + 1.72529, + 1.01732, + 1.19946, + 0.086969 + ], + "train_epoch_time": 5.054608583450317, + "train_loss": 2.8779535056186716, + "train_score": 0.20478165348085817, + "val_loss": 2.9066813320571603, + "val_score": 0.19806167483329773 + }, + { + "epoch": 4, + "grad_norm": 0.5887344479560852, + "learning_rate": 1.0, + "model_norm": 87.54537963867188, + "step_logs": { + "grad_norm": { + "216": 1.05914306640625, + "217": 1.1333253383636475, + "218": 2.3842105865478516, + "219": 0.8865511417388916, + "220": 2.2461509704589844, + "221": 1.1158198118209839, + "222": 4.969597816467285, + "223": 0.861678957939148, + "224": 1.3381102085113525, + "225": 0.9215143918991089, + "226": 5.811474800109863, + "227": 0.886582612991333, + "228": 1.4481470584869385, + "229": 1.5169901847839355, + "230": 4.161715507507324, + "231": 1.1239099502563477, + "232": 1.2122349739074707, + "233": 1.2336012125015259, + "234": 4.419249534606934, + "235": 1.3811534643173218, + "236": 2.5956661701202393, + "237": 2.1354124546051025, + "238": 0.7544280290603638, + "239": 1.8376989364624023, + "240": 1.5926493406295776, + "241": 2.820427417755127, + "242": 0.8177281022071838, + "243": 0.9029402732849121, + "244": 0.9629725813865662, + "245": 1.0313796997070312, + "246": 0.6563055515289307, + "247": 0.39263778924942017, + "248": 0.2830714285373688, + "249": 0.4418735206127167, + "250": 0.7193074226379395, + "251": 1.573967456817627, + "252": 0.8245394825935364, + "253": 1.0902609825134277, + "254": 0.8563145399093628, + "255": 0.5519356727600098, + "256": 0.9002417922019958, + "257": 0.9481765627861023, + "258": 0.5470548868179321, + "259": 0.39079615473747253, + "260": 0.426209956407547, + "261": 0.48951825499534607, + "262": 0.533036470413208, + "263": 0.6724916696548462, + "264": 0.6284416317939758, + "265": 0.4307269752025604, + "266": 0.3624004125595093, + "267": 0.4356800317764282, + "268": 0.5273780822753906, + "269": 0.5887344479560852 + }, + "loss": { + "216": 2.879425525665283, + "217": 2.860900402069092, + "218": 3.128995418548584, + "219": 2.9086966514587402, + "220": 2.8993587493896484, + "221": 2.8802409172058105, + "222": 3.5089950561523438, + "223": 2.8219335079193115, + "224": 2.8240935802459717, + "225": 2.8346526622772217, + "226": 3.576157569885254, + "227": 2.7567503452301025, + "228": 2.7820897102355957, + "229": 3.1573550701141357, + "230": 3.269378185272217, + "231": 2.991978883743286, + "232": 2.734656810760498, + "233": 2.941859006881714, + "234": 3.293694257736206, + "235": 2.987952709197998, + "236": 3.0175209045410156, + "237": 3.4019830226898193, + "238": 2.6543259620666504, + "239": 3.040815830230713, + "240": 2.9467356204986572, + "241": 4.197122573852539, + "242": 2.751098394393921, + "243": 2.747145175933838, + "244": 2.7478253841400146, + "245": 2.8338794708251953, + "246": 2.6444525718688965, + "247": 2.6249399185180664, + "248": 2.593257188796997, + "249": 2.569084405899048, + "250": 2.651834487915039, + "251": 2.801464080810547, + "252": 2.9775736331939697, + "253": 2.734055995941162, + "254": 2.927776336669922, + "255": 2.6600732803344727, + "256": 2.627235174179077, + "257": 2.8831098079681396, + "258": 2.6698570251464844, + "259": 2.5659618377685547, + "260": 2.569091796875, + "261": 2.577655792236328, + "262": 2.594210624694824, + "263": 2.6245179176330566, + "264": 2.700070858001709, + "265": 2.5859293937683105, + "266": 2.5591416358947754, + "267": 2.5564053058624268, + "268": 2.597079038619995, + "269": 2.5908541679382324 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 2.56683, + 2.22738, + 0.550447, + 3.70076, + 0.574677, + 2.31335, + 0.142082, + 3.80063, + 1.57723, + 3.33807, + 0.105887, + 3.50719, + 1.32662, + 1.37201, + 0.188765, + 2.36862, + 1.86093, + 1.93318, + 0.16865, + 1.56635, + 0.447871, + 0.746051, + 4.66357, + 0.900412, + 1.16172, + 0.527621, + 4.11423, + 3.36949, + 2.9632, + 2.66406, + 6.13937, + 17.0269, + 32.3634, + 13.1578, + 5.12528, + 1.13082, + 4.37965, + 2.3001, + 3.99274, + 8.73206, + 3.24176, + 3.20688, + 8.92126, + 16.8016, + 14.1427, + 10.7569, + 9.13043, + 5.80331, + 6.83668, + 13.9384, + 19.4857, + 13.4677, + 9.33772, + 7.47488 + ], + "train_epoch_time": 5.054750919342041, + "train_loss": 2.6228919218739275, + "train_score": 0.23670081599713055, + "val_loss": 2.665089219090038, + "val_score": 0.23133431426351023 + }, + { + "epoch": 5, + "grad_norm": 0.6827788352966309, + "learning_rate": 1.0, + "model_norm": 87.62759399414062, + "step_logs": { + "grad_norm": { + "270": 0.5555106997489929, + "271": 0.4655524492263794, + "272": 0.47277209162712097, + "273": 0.5839066505432129, + "274": 0.6063619256019592, + "275": 0.5545136332511902, + "276": 0.5252218246459961, + "277": 0.5745373368263245, + "278": 0.5235341787338257, + "279": 0.40295249223709106, + "280": 0.4501647651195526, + "281": 0.6298251152038574, + "282": 0.5882461071014404, + "283": 0.421758770942688, + "284": 0.44777485728263855, + "285": 0.5653491020202637, + "286": 0.676089882850647, + "287": 0.6589581370353699, + "288": 0.5578166842460632, + "289": 0.43333059549331665, + "290": 0.4672476351261139, + "291": 0.5668012499809265, + "292": 0.6152086853981018, + "293": 0.6537403464317322, + "294": 0.5736836194992065, + "295": 0.4108264446258545, + "296": 0.43720370531082153, + "297": 0.5664796829223633, + "298": 0.5703925490379333, + "299": 0.510169267654419, + "300": 0.5257142782211304, + "301": 0.6524471640586853, + "302": 0.6611528992652893, + "303": 0.501374363899231, + "304": 0.43720799684524536, + "305": 0.5381279587745667, + "306": 0.7145493030548096, + "307": 0.654240608215332, + "308": 0.537788450717926, + "309": 0.5183095335960388, + "310": 0.5211868286132812, + "311": 0.6392931938171387, + "312": 0.6911410689353943, + "313": 0.48801928758621216, + "314": 0.4185806214809418, + "315": 0.4073719084262848, + "316": 0.4472411572933197, + "317": 1.0167473554611206, + "318": 0.5887118577957153, + "319": 0.615260124206543, + "320": 0.6129778623580933, + "321": 0.6420866847038269, + "322": 0.711050271987915, + "323": 0.6827788352966309 + }, + "loss": { + "270": 2.629457950592041, + "271": 2.576857089996338, + "272": 2.579404354095459, + "273": 2.594836711883545, + "274": 2.625920295715332, + "275": 2.5772459506988525, + "276": 2.5971596240997314, + "277": 2.5823988914489746, + "278": 2.6025447845458984, + "279": 2.556942939758301, + "280": 2.539665699005127, + "281": 2.575623035430908, + "282": 2.6504249572753906, + "283": 2.5649685859680176, + "284": 2.5449976921081543, + "285": 2.556992530822754, + "286": 2.634875774383545, + "287": 2.6135270595550537, + "288": 2.610264539718628, + "289": 2.5462141036987305, + "290": 2.554551839828491, + "291": 2.5557708740234375, + "292": 2.5983874797821045, + "293": 2.5749433040618896, + "294": 2.6319751739501953, + "295": 2.5239522457122803, + "296": 2.5390303134918213, + "297": 2.5517003536224365, + "298": 2.6059937477111816, + "299": 2.5403308868408203, + "300": 2.5613574981689453, + "301": 2.5686450004577637, + "302": 2.6447978019714355, + "303": 2.5433998107910156, + "304": 2.5435404777526855, + "305": 2.532345771789551, + "306": 2.5997838973999023, + "307": 2.573017120361328, + "308": 2.5923736095428467, + "309": 2.560164451599121, + "310": 2.5676488876342773, + "311": 2.563610553741455, + "312": 2.6065878868103027, + "313": 2.5391602516174316, + "314": 2.5279202461242676, + "315": 2.4809811115264893, + "316": 2.4592719078063965, + "317": 2.528409242630005, + "318": 2.575133800506592, + "319": 2.543478488922119, + "320": 2.6020755767822266, + "321": 2.5485613346099854, + "322": 2.605581760406494, + "323": 2.583954334259033 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 8.52082, + 11.8892, + 11.5403, + 7.61068, + 7.14196, + 8.38169, + 9.41484, + 7.82324, + 9.49529, + 15.7476, + 12.5324, + 6.49295, + 7.65945, + 14.4196, + 12.6931, + 8.00011, + 5.76436, + 6.01882, + 8.38883, + 13.5599, + 11.7009, + 7.95537, + 6.86529, + 6.025, + 7.99718, + 14.9542, + 13.2831, + 7.95172, + 8.00987, + 9.76027, + 9.26767, + 6.03411, + 6.05046, + 10.1179, + 13.3065, + 8.74484, + 5.09182, + 6.01129, + 8.96344, + 9.52992, + 9.45255, + 6.27266, + 5.45681, + 10.6614, + 14.428, + 14.95, + 12.2948, + 2.4458, + 7.43009, + 6.71909, + 6.92517, + 6.1817, + 5.15352, + 5.54274 + ], + "train_epoch_time": 5.056915521621704, + "train_loss": 2.575000104671571, + "train_score": 0.2442005918220947, + "val_loss": 2.6129064505190582, + "val_score": 0.23873421397534905 + }, + { + "epoch": 6, + "grad_norm": 0.42222860455513, + "learning_rate": 1.0, + "model_norm": 87.7262191772461, + "step_logs": { + "grad_norm": { + "324": 0.589959442615509, + "325": 0.5029729008674622, + "326": 0.5285340547561646, + "327": 0.5284609794616699, + "328": 0.5149835348129272, + "329": 0.5487874746322632, + "330": 0.5941106677055359, + "331": 0.6051709055900574, + "332": 0.5953346490859985, + "333": 0.5666784644126892, + "334": 0.5870032906532288, + "335": 0.6597731113433838, + "336": 0.6418724060058594, + "337": 0.5051636695861816, + "338": 0.45763349533081055, + "339": 0.5128597021102905, + "340": 0.5366122722625732, + "341": 0.5496450066566467, + "342": 0.5769292712211609, + "343": 0.6236070990562439, + "344": 0.615502119064331, + "345": 0.600796639919281, + "346": 0.6764445304870605, + "347": 0.650355339050293, + "348": 0.7011405229568481, + "349": 0.6819117665290833, + "350": 0.6357214450836182, + "351": 0.6570878028869629, + "352": 0.7232521772384644, + "353": 0.6644817590713501, + "354": 0.5698176026344299, + "355": 0.5169979333877563, + "356": 0.5554744005203247, + "357": 0.6457024216651917, + "358": 0.6742708086967468, + "359": 0.5455997586250305, + "360": 0.5129860639572144, + "361": 0.8755683302879333, + "362": 0.7512721419334412, + "363": 0.7248756885528564, + "364": 0.6089893579483032, + "365": 0.8547742366790771, + "366": 0.5908450484275818, + "367": 0.6266611814498901, + "368": 0.6146601438522339, + "369": 0.6133776307106018, + "370": 0.519788920879364, + "371": 0.5844746828079224, + "372": 0.5966187119483948, + "373": 0.5463952422142029, + "374": 0.5837127566337585, + "375": 0.8275039196014404, + "376": 0.5807767510414124, + "377": 0.42222860455513 + }, + "loss": { + "324": 2.5863800048828125, + "325": 2.553689956665039, + "326": 2.531513214111328, + "327": 2.528355360031128, + "328": 2.549199104309082, + "329": 2.5063982009887695, + "330": 2.5663740634918213, + "331": 2.5246520042419434, + "332": 2.5476579666137695, + "333": 2.509037494659424, + "334": 2.5129401683807373, + "335": 2.552493095397949, + "336": 2.5522704124450684, + "337": 2.5181727409362793, + "338": 2.5297961235046387, + "339": 2.485309600830078, + "340": 2.5213050842285156, + "341": 2.4957408905029297, + "342": 2.5106847286224365, + "343": 2.481025218963623, + "344": 2.541317939758301, + "345": 2.4983444213867188, + "346": 2.4951438903808594, + "347": 2.530484676361084, + "348": 2.5371439456939697, + "349": 2.5106794834136963, + "350": 2.5177316665649414, + "351": 2.499049663543701, + "352": 2.5348687171936035, + "353": 2.542555332183838, + "354": 2.525477647781372, + "355": 2.4737844467163086, + "356": 2.497093677520752, + "357": 2.4768943786621094, + "358": 2.5111653804779053, + "359": 2.4554877281188965, + "360": 2.430851697921753, + "361": 2.435702085494995, + "362": 2.5544791221618652, + "363": 2.542387008666992, + "364": 2.5089550018310547, + "365": 2.503735303878784, + "366": 2.5334179401397705, + "367": 2.5242385864257812, + "368": 2.541548252105713, + "369": 2.5084705352783203, + "370": 2.4832043647766113, + "371": 2.461982011795044, + "372": 2.481531858444214, + "373": 2.4528322219848633, + "374": 2.436610221862793, + "375": 2.4492297172546387, + "376": 2.524195671081543, + "377": 2.4112091064453125 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 7.43101, + 10.0944, + 9.06221, + 9.05341, + 9.61207, + 8.32227, + 7.27085, + 6.89359, + 7.18818, + 7.81329, + 7.29291, + 5.86375, + 6.19483, + 9.86782, + 12.0795, + 9.44895, + 8.75597, + 8.26104, + 7.54305, + 6.37983, + 6.70811, + 6.92145, + 5.45295, + 5.98277, + 5.16101, + 5.39927, + 6.22982, + 5.78799, + 4.84592, + 5.75843, + 7.77807, + 9.25517, + 8.09295, + 5.94077, + 5.5234, + 8.24877, + 9.23735, + 3.1772, + 4.52593, + 4.83854, + 6.76509, + 3.42677, + 7.25704, + 6.42784, + 6.72711, + 6.66735, + 9.19091, + 7.20698, + 6.9715, + 8.21588, + 7.15134, + 3.57676, + 7.4835, + 13.5251 + ], + "train_epoch_time": 5.059398651123047, + "train_loss": 2.3843994223404477, + "train_score": 0.3101181403208668, + "val_loss": 2.412595060197412, + "val_score": 0.30106827647486184 + }, + { + "epoch": 7, + "grad_norm": 0.6233770847320557, + "learning_rate": 1.0, + "model_norm": 87.8237533569336, + "step_logs": { + "grad_norm": { + "378": 0.4476366937160492, + "379": 0.725091814994812, + "380": 0.5437142252922058, + "381": 0.4848865568637848, + "382": 0.6433197855949402, + "383": 0.5641137361526489, + "384": 0.7488631010055542, + "385": 0.7799643278121948, + "386": 0.5854859948158264, + "387": 0.5013403296470642, + "388": 0.6148808002471924, + "389": 0.9244084358215332, + "390": 0.6364380121231079, + "391": 0.6464278101921082, + "392": 0.6114261746406555, + "393": 0.5769841074943542, + "394": 0.5522688031196594, + "395": 0.5251255035400391, + "396": 0.49269723892211914, + "397": 0.47639235854148865, + "398": 0.5340412259101868, + "399": 0.6054332852363586, + "400": 0.6141825318336487, + "401": 0.6073479056358337, + "402": 0.7358940839767456, + "403": 0.5304068922996521, + "404": 0.5366724133491516, + "405": 1.4512220621109009, + "406": 0.5987162590026855, + "407": 0.6535307765007019, + "408": 0.44275209307670593, + "409": 0.3558882474899292, + "410": 0.4601386487483978, + "411": 0.5123399496078491, + "412": 0.5616330504417419, + "413": 0.7331110835075378, + "414": 0.6110520362854004, + "415": 0.5966317653656006, + "416": 0.6240831017494202, + "417": 0.7943153977394104, + "418": 0.824618935585022, + "419": 0.7633992433547974, + "420": 0.6250609159469604, + "421": 0.6731459498405457, + "422": 0.7580225467681885, + "423": 0.6872450113296509, + "424": 0.5986344814300537, + "425": 0.6646986603736877, + "426": 1.035421371459961, + "427": 0.6865931749343872, + "428": 0.6413306593894958, + "429": 0.605828583240509, + "430": 0.5741073489189148, + "431": 0.6233770847320557 + }, + "loss": { + "378": 2.4035091400146484, + "379": 2.4032530784606934, + "380": 2.475454330444336, + "381": 2.423172950744629, + "382": 2.44462513923645, + "383": 2.482314348220825, + "384": 2.4797093868255615, + "385": 2.5459303855895996, + "386": 2.4761035442352295, + "387": 2.403139591217041, + "388": 2.359795570373535, + "389": 2.468825340270996, + "390": 2.517343521118164, + "391": 2.4774680137634277, + "392": 2.460294246673584, + "393": 2.44753098487854, + "394": 2.4470326900482178, + "395": 2.4523162841796875, + "396": 2.406097173690796, + "397": 2.4421441555023193, + "398": 2.3985066413879395, + "399": 2.46334171295166, + "400": 2.429025650024414, + "401": 2.4523916244506836, + "402": 2.4385428428649902, + "403": 2.4708199501037598, + "404": 2.4242401123046875, + "405": 2.497091770172119, + "406": 2.50575852394104, + "407": 2.437063694000244, + "408": 2.4413199424743652, + "409": 2.3665173053741455, + "410": 2.3716275691986084, + "411": 2.410421848297119, + "412": 2.420224666595459, + "413": 2.4213624000549316, + "414": 2.443295955657959, + "415": 2.4177072048187256, + "416": 2.4265623092651367, + "417": 2.4684195518493652, + "418": 2.4558656215667725, + "419": 2.5177488327026367, + "420": 2.423591136932373, + "421": 2.3797426223754883, + "422": 2.4399895668029785, + "423": 2.467034339904785, + "424": 2.380239963531494, + "425": 2.3962860107421875, + "426": 2.4109139442443848, + "427": 2.4847571849823, + "428": 2.4449620246887207, + "429": 2.455070734024048, + "430": 2.3972742557525635, + "431": 2.411729335784912 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 11.9948, + 4.57102, + 8.37363, + 10.3063, + 5.90688, + 7.80052, + 4.42177, + 4.18502, + 7.2233, + 9.56123, + 6.24155, + 2.8891, + 6.21484, + 5.92882, + 6.58111, + 7.35192, + 8.02304, + 8.89304, + 9.91181, + 10.7607, + 8.40991, + 6.72035, + 6.43928, + 6.64836, + 4.50298, + 8.78259, + 8.417, + 1.18568, + 6.99032, + 5.70604, + 12.4538, + 18.6845, + 11.2013, + 9.18283, + 7.67274, + 4.50526, + 6.54364, + 6.79189, + 6.23027, + 3.91231, + 3.61159, + 4.32025, + 6.20318, + 5.25184, + 4.24643, + 5.22338, + 6.64198, + 5.42362, + 2.24878, + 5.2709, + 5.9444, + 6.68905, + 7.2733, + 6.20622 + ], + "train_epoch_time": 5.062114477157593, + "train_loss": 2.409191673079043, + "train_score": 0.3023863432910213, + "val_loss": 2.467650843542561, + "val_score": 0.290035699285678 + }, + { + "epoch": 8, + "grad_norm": 0.9071621298789978, + "learning_rate": 1.0, + "model_norm": 87.94059753417969, + "step_logs": { + "grad_norm": { + "432": 0.7765079140663147, + "433": 0.8220192193984985, + "434": 0.6735662221908569, + "435": 0.5537124276161194, + "436": 0.5728278160095215, + "437": 0.8567742109298706, + "438": 0.6464678049087524, + "439": 0.6067763566970825, + "440": 0.6834186911582947, + "441": 0.5651432275772095, + "442": 0.5057303309440613, + "443": 0.580076277256012, + "444": 0.713246762752533, + "445": 0.6217272877693176, + "446": 0.5551022291183472, + "447": 0.5998145341873169, + "448": 0.6619981527328491, + "449": 0.733294665813446, + "450": 0.6560757756233215, + "451": 0.6208107471466064, + "452": 0.598636269569397, + "453": 0.5802643895149231, + "454": 0.6702688336372375, + "455": 0.6370559930801392, + "456": 0.6309664845466614, + "457": 0.5908674597740173, + "458": 0.676193118095398, + "459": 0.7668659687042236, + "460": 0.7398245334625244, + "461": 0.6312540173530579, + "462": 0.6135047078132629, + "463": 0.5682210326194763, + "464": 0.6274297833442688, + "465": 0.6611471176147461, + "466": 0.7763511538505554, + "467": 0.5277438163757324, + "468": 0.46193280816078186, + "469": 0.5078476667404175, + "470": 0.5151538252830505, + "471": 0.5464355945587158, + "472": 0.8680021166801453, + "473": 0.6129276156425476, + "474": 0.5865321159362793, + "475": 0.5939337015151978, + "476": 0.6458668112754822, + "477": 0.6915168166160583, + "478": 0.6741195917129517, + "479": 0.7049943208694458, + "480": 0.6342566013336182, + "481": 0.9019966125488281, + "482": 0.6422078609466553, + "483": 0.5967493057250977, + "484": 0.5948537588119507, + "485": 0.9071621298789978 + }, + "loss": { + "432": 2.434274196624756, + "433": 2.4632740020751953, + "434": 2.4352469444274902, + "435": 2.392164945602417, + "436": 2.362865924835205, + "437": 2.415482759475708, + "438": 2.4197092056274414, + "439": 2.4119417667388916, + "440": 2.37589430809021, + "441": 2.394064426422119, + "442": 2.360340118408203, + "443": 2.345093250274658, + "444": 2.3945202827453613, + "445": 2.4090452194213867, + "446": 2.3488574028015137, + "447": 2.343813180923462, + "448": 2.370213031768799, + "449": 2.3920602798461914, + "450": 2.3988003730773926, + "451": 2.366093158721924, + "452": 2.334099769592285, + "453": 2.3401384353637695, + "454": 2.3362374305725098, + "455": 2.4077751636505127, + "456": 2.3257977962493896, + "457": 2.342228889465332, + "458": 2.3290772438049316, + "459": 2.382145881652832, + "460": 2.407740592956543, + "461": 2.388184070587158, + "462": 2.3617303371429443, + "463": 2.3404135704040527, + "464": 2.310192346572876, + "465": 2.36226224899292, + "466": 2.3162612915039062, + "467": 2.3913581371307373, + "468": 2.3197669982910156, + "469": 2.3052096366882324, + "470": 2.318777561187744, + "471": 2.315047264099121, + "472": 2.3315112590789795, + "473": 2.401728630065918, + "474": 2.354654312133789, + "475": 2.377084732055664, + "476": 2.3382976055145264, + "477": 2.3644192218780518, + "478": 2.368288516998291, + "479": 2.372807502746582, + "480": 2.378720283508301, + "481": 2.346017360687256, + "482": 2.3976635932922363, + "483": 2.3420934677124023, + "484": 2.3065268993377686, + "485": 2.352982997894287 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 4.03718, + 3.64543, + 5.36763, + 7.8023, + 7.20097, + 3.29057, + 5.78988, + 6.55103, + 5.0869, + 7.49581, + 9.22862, + 6.96931, + 4.70694, + 6.23225, + 7.62273, + 6.51462, + 5.40846, + 4.44852, + 5.57296, + 6.13922, + 6.51318, + 6.95008, + 5.20019, + 5.93281, + 5.84197, + 6.70887, + 5.09381, + 4.05069, + 4.39898, + 5.99321, + 6.27472, + 7.24866, + 5.86838, + 5.40421, + 3.84301, + 8.58615, + 10.8714, + 8.93807, + 8.73746, + 7.75322, + 3.09454, + 6.39301, + 6.84453, + 6.73858, + 5.60549, + 4.94446, + 5.21148, + 4.7741, + 5.91307, + 2.88351, + 5.81349, + 6.57689, + 6.51836, + 2.85923 + ], + "train_epoch_time": 5.063021659851074, + "train_loss": 2.4160673170896994, + "train_score": 0.3061323080377565, + "val_loss": 2.4691786831747375, + "val_score": 0.2923229407223987 + }, + { + "epoch": 9, + "grad_norm": 0.8479442000389099, + "learning_rate": 1.0, + "model_norm": 88.05618286132812, + "step_logs": { + "grad_norm": { + "486": 0.8017803430557251, + "487": 0.7247504591941833, + "488": 0.5997197031974792, + "489": 0.5661600828170776, + "490": 0.5384531021118164, + "491": 0.5412113666534424, + "492": 0.5752896070480347, + "493": 0.5827569365501404, + "494": 0.5506663918495178, + "495": 0.6922354698181152, + "496": 0.6688157320022583, + "497": 0.710025429725647, + "498": 0.640258252620697, + "499": 0.5868394374847412, + "500": 0.7574666738510132, + "501": 0.7814472913742065, + "502": 0.6115919947624207, + "503": 0.48716840147972107, + "504": 0.5875566601753235, + "505": 0.5984475016593933, + "506": 0.7445812225341797, + "507": 0.7910962700843811, + "508": 0.6796084642410278, + "509": 0.6982877254486084, + "510": 0.6632845401763916, + "511": 0.6065599322319031, + "512": 0.5757735371589661, + "513": 0.5152581930160522, + "514": 0.5502412915229797, + "515": 0.5284173488616943, + "516": 0.4957486689090729, + "517": 0.5644363164901733, + "518": 0.6596044898033142, + "519": 0.7322612404823303, + "520": 0.6808573007583618, + "521": 0.5945437550544739, + "522": 0.6843143701553345, + "523": 0.6302454471588135, + "524": 0.5569486021995544, + "525": 0.564264178276062, + "526": 0.7126025557518005, + "527": 0.7082458734512329, + "528": 0.5964210629463196, + "529": 0.5213846564292908, + "530": 0.5039574503898621, + "531": 0.6565131545066833, + "532": 0.6962807774543762, + "533": 1.044175148010254, + "534": 0.7827070951461792, + "535": 0.9594875574111938, + "536": 0.7189468741416931, + "537": 0.8180437088012695, + "538": 1.003755807876587, + "539": 0.8479442000389099 + }, + "loss": { + "486": 2.4348576068878174, + "487": 2.45119047164917, + "488": 2.3284833431243896, + "489": 2.297154664993286, + "490": 2.2644057273864746, + "491": 2.2953734397888184, + "492": 2.30232310295105, + "493": 2.317416191101074, + "494": 2.26430082321167, + "495": 2.276296854019165, + "496": 2.3626694679260254, + "497": 2.3176751136779785, + "498": 2.340026378631592, + "499": 2.286736011505127, + "500": 2.290950059890747, + "501": 2.36889910697937, + "502": 2.3355743885040283, + "503": 2.252765655517578, + "504": 2.2416040897369385, + "505": 2.2984800338745117, + "506": 2.2948336601257324, + "507": 2.3218183517456055, + "508": 2.341596841812134, + "509": 2.3350746631622314, + "510": 2.290396213531494, + "511": 2.3208940029144287, + "512": 2.281456470489502, + "513": 2.2863335609436035, + "514": 2.242624282836914, + "515": 2.2667813301086426, + "516": 2.24092960357666, + "517": 2.2533531188964844, + "518": 2.2849860191345215, + "519": 2.323093891143799, + "520": 2.297314405441284, + "521": 2.30865478515625, + "522": 2.2752223014831543, + "523": 2.3129215240478516, + "524": 2.2830934524536133, + "525": 2.2515766620635986, + "526": 2.279601573944092, + "527": 2.325507164001465, + "528": 2.2451751232147217, + "529": 2.2410316467285156, + "530": 2.230013370513916, + "531": 2.2327327728271484, + "532": 2.3034873008728027, + "533": 2.3184001445770264, + "534": 2.3564209938049316, + "535": 2.373034954071045, + "536": 2.393862724304199, + "537": 2.3292441368103027, + "538": 2.3392906188964844, + "539": 2.448216438293457 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 3.78759, + 4.66659, + 6.47406, + 7.16658, + 7.81013, + 7.83645, + 6.95654, + 6.82384, + 7.46719, + 4.7503, + 5.2819, + 4.59732, + 5.70835, + 6.64014, + 3.9929, + 3.87924, + 6.2441, + 9.492, + 6.49321, + 6.41784, + 4.1393, + 3.70996, + 5.06984, + 4.78886, + 5.20608, + 6.30824, + 6.88191, + 8.61172, + 7.40713, + 8.11812, + 9.11812, + 7.07293, + 5.2519, + 4.33246, + 4.95574, + 6.53118, + 4.85862, + 5.82293, + 7.36026, + 7.07167, + 4.48915, + 4.63607, + 6.31167, + 8.24388, + 8.78051, + 5.18024, + 4.75135, + 2.12638, + 3.8464, + 2.57766, + 4.63133, + 3.48066, + 2.32182, + 3.40499 + ], + "train_epoch_time": 5.057504653930664, + "train_loss": 2.341117527700394, + "train_score": 0.32564450320246574, + "val_loss": 2.4157500888395256, + "val_score": 0.3131502937230396 + }, + { + "epoch": 10, + "grad_norm": 0.6387036442756653, + "learning_rate": 1.0, + "model_norm": 88.17684173583984, + "step_logs": { + "grad_norm": { + "540": 0.7706964015960693, + "541": 0.565232515335083, + "542": 0.6389411687850952, + "543": 0.8183033466339111, + "544": 0.7362214922904968, + "545": 0.8955008387565613, + "546": 0.6621264219284058, + "547": 0.5685049295425415, + "548": 0.6059681177139282, + "549": 0.6288581490516663, + "550": 0.5820083022117615, + "551": 0.5389314889907837, + "552": 0.5582356452941895, + "553": 0.5509064793586731, + "554": 0.6221991181373596, + "555": 0.5400289297103882, + "556": 0.5090938806533813, + "557": 0.5750826597213745, + "558": 0.6969702243804932, + "559": 0.6738913059234619, + "560": 0.7683441042900085, + "561": 0.53670334815979, + "562": 0.48783236742019653, + "563": 0.486321359872818, + "564": 0.5148715972900391, + "565": 0.5636603832244873, + "566": 0.6192460060119629, + "567": 0.6551604270935059, + "568": 0.6902365684509277, + "569": 0.6826980113983154, + "570": 0.6609243154525757, + "571": 0.58481365442276, + "572": 0.5430092811584473, + "573": 0.5805642604827881, + "574": 0.6683855652809143, + "575": 0.6684220433235168, + "576": 0.6165861487388611, + "577": 0.5472785234451294, + "578": 0.5367283225059509, + "579": 0.5435999631881714, + "580": 0.5493052005767822, + "581": 0.5877703428268433, + "582": 0.6477825045585632, + "583": 0.608697772026062, + "584": 0.601865291595459, + "585": 0.6329501271247864, + "586": 0.6878306269645691, + "587": 0.6167069673538208, + "588": 0.5438553094863892, + "589": 0.5399678349494934, + "590": 0.5317276120185852, + "591": 0.6248087286949158, + "592": 0.6037768125534058, + "593": 0.6387036442756653 + }, + "loss": { + "540": 2.348557472229004, + "541": 2.277038812637329, + "542": 2.2487144470214844, + "543": 2.335327625274658, + "544": 2.3149187564849854, + "545": 2.3475818634033203, + "546": 2.354139804840088, + "547": 2.284546375274658, + "548": 2.249492645263672, + "549": 2.274181365966797, + "550": 2.254321813583374, + "551": 2.2341079711914062, + "552": 2.234658718109131, + "553": 2.251466751098633, + "554": 2.230208396911621, + "555": 2.2891364097595215, + "556": 2.2364907264709473, + "557": 2.2209420204162598, + "558": 2.21760892868042, + "559": 2.282057762145996, + "560": 2.2576422691345215, + "561": 2.2787747383117676, + "562": 2.2567410469055176, + "563": 2.24727201461792, + "564": 2.219088554382324, + "565": 2.210675001144409, + "566": 2.2498698234558105, + "567": 2.240264415740967, + "568": 2.2447094917297363, + "569": 2.2753264904022217, + "570": 2.279207944869995, + "571": 2.2346253395080566, + "572": 2.200122833251953, + "573": 2.188459873199463, + "574": 2.2267870903015137, + "575": 2.2667417526245117, + "576": 2.2027907371520996, + "577": 2.206265926361084, + "578": 2.1901350021362305, + "579": 2.207444667816162, + "580": 2.201704263687134, + "581": 2.2042269706726074, + "582": 2.2094364166259766, + "583": 2.1977386474609375, + "584": 2.2322731018066406, + "585": 2.206829309463501, + "586": 2.231187105178833, + "587": 2.2853989601135254, + "588": 2.2001137733459473, + "589": 2.1983747482299805, + "590": 2.1891374588012695, + "591": 2.1933391094207764, + "592": 2.214601993560791, + "593": 2.1937899589538574 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 3.95398, + 7.12715, + 5.50824, + 3.48754, + 4.27089, + 2.92745, + 5.36971, + 7.06857, + 6.12611, + 5.75069, + 6.65515, + 7.69196, + 7.17094, + 7.41839, + 5.76085, + 7.84942, + 8.62922, + 6.71547, + 4.56517, + 5.02513, + 3.82423, + 7.91103, + 9.48289, + 9.50187, + 8.37099, + 6.95809, + 5.8672, + 5.2192, + 4.71155, + 4.88187, + 5.21772, + 6.53386, + 7.46161, + 6.49289, + 4.98453, + 5.07342, + 5.7941, + 7.36616, + 7.60259, + 7.47018, + 7.29679, + 6.3803, + 5.2653, + 5.93161, + 6.16238, + 5.50845, + 4.71599, + 6.00903, + 7.43838, + 7.53991, + 7.74274, + 5.61839, + 6.07495, + 5.3777 + ], + "train_epoch_time": 5.056248188018799, + "train_loss": 2.235028208208563, + "train_score": 0.35139997319306326, + "val_loss": 2.3336211624977645, + "val_score": 0.3283761486530851 + }, + { + "epoch": 11, + "grad_norm": 0.6408677101135254, + "learning_rate": 1.0, + "model_norm": 88.31568145751953, + "step_logs": { + "grad_norm": { + "594": 0.7077268958091736, + "595": 0.6217532753944397, + "596": 0.5789417028427124, + "597": 0.5227370858192444, + "598": 0.5119795203208923, + "599": 0.6203845739364624, + "600": 0.6974325776100159, + "601": 0.6027323603630066, + "602": 0.5231297016143799, + "603": 0.5360603332519531, + "604": 0.5519902110099792, + "605": 0.6456120014190674, + "606": 0.6429061889648438, + "607": 0.576969563961029, + "608": 0.6324966549873352, + "609": 0.6383657455444336, + "610": 0.6201777458190918, + "611": 0.5834636688232422, + "612": 0.616550087928772, + "613": 0.7507140040397644, + "614": 0.934150755405426, + "615": 0.9501323699951172, + "616": 0.8356849551200867, + "617": 0.8668741583824158, + "618": 0.7063598036766052, + "619": 0.6286273002624512, + "620": 0.592766523361206, + "621": 0.5907629728317261, + "622": 0.5974923968315125, + "623": 0.6208613514900208, + "624": 0.6769024133682251, + "625": 0.6768881678581238, + "626": 0.6350733637809753, + "627": 0.5566412806510925, + "628": 0.48644495010375977, + "629": 0.556790828704834, + "630": 0.7378922700881958, + "631": 0.6458808183670044, + "632": 0.47657760977745056, + "633": 0.4891546368598938, + "634": 0.5827817320823669, + "635": 0.6533992886543274, + "636": 0.5813054442405701, + "637": 0.53339684009552, + "638": 0.5462914705276489, + "639": 0.5413225293159485, + "640": 0.5329384803771973, + "641": 0.522574782371521, + "642": 0.5853460431098938, + "643": 0.5998774170875549, + "644": 0.6070759892463684, + "645": 0.7032648921012878, + "646": 0.9001753926277161, + "647": 0.6408677101135254 + }, + "loss": { + "594": 2.222482681274414, + "595": 2.2153053283691406, + "596": 2.214951753616333, + "597": 2.1934876441955566, + "598": 2.1778922080993652, + "599": 2.13860821723938, + "600": 2.2235894203186035, + "601": 2.2297611236572266, + "602": 2.1694183349609375, + "603": 2.1711907386779785, + "604": 2.186601161956787, + "605": 2.2139649391174316, + "606": 2.2311410903930664, + "607": 2.224823474884033, + "608": 2.1895785331726074, + "609": 2.198668956756592, + "610": 2.163444757461548, + "611": 2.2034220695495605, + "612": 2.161508083343506, + "613": 2.2344391345977783, + "614": 2.248788356781006, + "615": 2.317734718322754, + "616": 2.298546314239502, + "617": 2.2629029750823975, + "618": 2.3032338619232178, + "619": 2.2257790565490723, + "620": 2.201134204864502, + "621": 2.1813535690307617, + "622": 2.1882834434509277, + "623": 2.1853721141815186, + "624": 2.208638906478882, + "625": 2.2136659622192383, + "626": 2.1800966262817383, + "627": 2.1558072566986084, + "628": 2.1377124786376953, + "629": 2.11506986618042, + "630": 2.1868984699249268, + "631": 2.213593006134033, + "632": 2.124800205230713, + "633": 2.1536502838134766, + "634": 2.147817373275757, + "635": 2.162109613418579, + "636": 2.1786575317382812, + "637": 2.1588425636291504, + "638": 2.1571078300476074, + "639": 2.129655361175537, + "640": 2.14258074760437, + "641": 2.134822368621826, + "642": 2.1243739128112793, + "643": 2.189227819442749, + "644": 2.143068313598633, + "645": 2.19210147857666, + "646": 2.2131314277648926, + "647": 2.2399368286132812 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 4.43718, + 5.73057, + 6.60837, + 8.02728, + 8.30866, + 5.5566, + 4.57141, + 6.13775, + 7.92729, + 7.55563, + 7.1764, + 5.31163, + 5.39799, + 6.68328, + 5.47323, + 5.39536, + 5.62488, + 6.47247, + 5.68617, + 3.96478, + 2.577, + 2.56741, + 3.2913, + 3.0113, + 4.61622, + 5.63243, + 6.2644, + 6.25028, + 6.12969, + 5.66939, + 4.82028, + 4.83146, + 5.4054, + 6.95759, + 9.03404, + 6.82245, + 4.01645, + 5.30631, + 9.35515, + 9.00084, + 6.32391, + 5.06431, + 6.44733, + 7.58787, + 7.22808, + 7.2677, + 7.54368, + 7.81745, + 6.2002, + 6.08367, + 5.815, + 4.43223, + 2.7312, + 5.4538 + ], + "train_epoch_time": 5.059988737106323, + "train_loss": 2.168833270202921, + "train_score": 0.3682758697221748, + "val_loss": 2.269371535830054, + "val_score": 0.340372954594692 + }, + { + "epoch": 12, + "grad_norm": 0.5115713477134705, + "learning_rate": 1.0, + "model_norm": 88.42173767089844, + "step_logs": { + "grad_norm": { + "648": 0.716665506362915, + "649": 0.8069600462913513, + "650": 0.7947304844856262, + "651": 0.7240526080131531, + "652": 0.7629228234291077, + "653": 1.1339279413223267, + "654": 0.8302299976348877, + "655": 0.7467823624610901, + "656": 0.6972697973251343, + "657": 0.5455514192581177, + "658": 0.47693899273872375, + "659": 0.49731290340423584, + "660": 0.48412537574768066, + "661": 0.5093258619308472, + "662": 0.5902482867240906, + "663": 0.5586665868759155, + "664": 0.46420934796333313, + "665": 0.4215879440307617, + "666": 0.4409480690956116, + "667": 0.43534886837005615, + "668": 0.4254956841468811, + "669": 0.4858226478099823, + "670": 0.5949127078056335, + "671": 0.601553201675415, + "672": 0.5546567440032959, + "673": 0.5125888586044312, + "674": 0.5321463942527771, + "675": 0.46987468004226685, + "676": 0.4241557717323303, + "677": 0.41045764088630676, + "678": 0.4471478760242462, + "679": 0.49451401829719543, + "680": 0.563727617263794, + "681": 0.5436035990715027, + "682": 0.45979243516921997, + "683": 0.41452208161354065, + "684": 0.44676491618156433, + "685": 0.571497917175293, + "686": 0.5866062641143799, + "687": 0.46033743023872375, + "688": 0.42913320660591125, + "689": 0.468587726354599, + "690": 0.4917983412742615, + "691": 0.5114935040473938, + "692": 0.48584261536598206, + "693": 0.42314836382865906, + "694": 0.4293260872364044, + "695": 0.43268439173698425, + "696": 0.43268081545829773, + "697": 0.4635193347930908, + "698": 0.503139317035675, + "699": 0.546288013458252, + "700": 0.5250817537307739, + "701": 0.5115713477134705 + }, + "loss": { + "648": 2.1549344062805176, + "649": 2.224435806274414, + "650": 2.2394330501556396, + "651": 2.1879374980926514, + "652": 2.2070116996765137, + "653": 2.235476016998291, + "654": 2.2922840118408203, + "655": 2.2481298446655273, + "656": 2.241356372833252, + "657": 2.1795616149902344, + "658": 2.1363630294799805, + "659": 2.11326265335083, + "660": 2.1178579330444336, + "661": 2.1301376819610596, + "662": 2.161309242248535, + "663": 2.144866943359375, + "664": 2.11765193939209, + "665": 2.0968875885009766, + "666": 2.0897750854492188, + "667": 2.0687460899353027, + "668": 2.066110372543335, + "669": 2.097501039505005, + "670": 2.0905919075012207, + "671": 2.0850939750671387, + "672": 2.1050355434417725, + "673": 2.1126999855041504, + "674": 2.0986886024475098, + "675": 2.0715060234069824, + "676": 2.0975241661071777, + "677": 2.052122116088867, + "678": 2.057093858718872, + "679": 2.1010711193084717, + "680": 2.079598903656006, + "681": 2.069915771484375, + "682": 2.0774035453796387, + "683": 2.0765464305877686, + "684": 2.0656509399414062, + "685": 2.065680503845215, + "686": 2.0785820484161377, + "687": 2.052807092666626, + "688": 2.0685455799102783, + "689": 2.0394287109375, + "690": 2.0377089977264404, + "691": 2.068427085876465, + "692": 2.034313678741455, + "693": 2.0061779022216797, + "694": 2.028668165206909, + "695": 2.0189952850341797, + "696": 2.0436224937438965, + "697": 2.015399932861328, + "698": 2.029754877090454, + "699": 2.022144079208374, + "700": 2.0621938705444336, + "701": 2.0341384410858154 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 4.19567, + 3.41598, + 3.54567, + 4.17344, + 3.79178, + 1.7386, + 3.32561, + 4.03119, + 4.61009, + 7.32315, + 9.39181, + 8.54464, + 9.0361, + 8.21138, + 6.20365, + 6.87219, + 9.82713, + 11.7977, + 10.7479, + 10.9152, + 11.412, + 8.88683, + 5.90694, + 5.76206, + 6.84244, + 8.0408, + 7.41115, + 9.38258, + 11.6589, + 12.1805, + 10.2885, + 8.59179, + 6.54396, + 7.00468, + 9.82646, + 12.085, + 10.349, + 6.32461, + 6.04051, + 9.68714, + 11.2326, + 9.2881, + 8.42496, + 7.90606, + 8.6184, + 11.2043, + 11.0062, + 10.7843, + 10.916, + 9.38049, + 8.01802, + 6.77593, + 7.47955, + 7.77263 + ], + "train_epoch_time": 5.061323404312134, + "train_loss": 2.0338356951901013, + "train_score": 0.40135513812602847, + "val_loss": 2.1551041594876486, + "val_score": 0.3673982853935999 + }, + { + "epoch": 13, + "grad_norm": 0.32616376876831055, + "learning_rate": 0.6666666666666667, + "model_norm": 88.48424530029297, + "step_logs": { + "grad_norm": { + "702": 0.4989427924156189, + "703": 0.5028764605522156, + "704": 0.48513442277908325, + "705": 0.4481595754623413, + "706": 0.3963972330093384, + "707": 0.37314093112945557, + "708": 0.4267770051956177, + "709": 0.49082428216934204, + "710": 0.48820096254348755, + "711": 0.4020784795284271, + "712": 0.3392620086669922, + "713": 0.3534849286079407, + "714": 0.38990235328674316, + "715": 0.4022107720375061, + "716": 0.4177328944206238, + "717": 0.4514225423336029, + "718": 0.44274264574050903, + "719": 0.4521128535270691, + "720": 0.4563765823841095, + "721": 0.4543166160583496, + "722": 0.4531152844429016, + "723": 0.4502907991409302, + "724": 0.43363961577415466, + "725": 0.44261205196380615, + "726": 0.4255932867527008, + "727": 0.3841048777103424, + "728": 0.3714306056499481, + "729": 0.3813962936401367, + "730": 0.3985673785209656, + "731": 0.4312443435192108, + "732": 0.4193502366542816, + "733": 0.3988642990589142, + "734": 0.39689362049102783, + "735": 0.4039759635925293, + "736": 0.36932554841041565, + "737": 0.3757588267326355, + "738": 0.38661423325538635, + "739": 0.3642880916595459, + "740": 0.3629930317401886, + "741": 0.37425899505615234, + "742": 0.37483593821525574, + "743": 0.3770025670528412, + "744": 0.36514976620674133, + "745": 0.32594454288482666, + "746": 0.30586183071136475, + "747": 0.3098975419998169, + "748": 0.35013002157211304, + "749": 0.38314712047576904, + "750": 0.3351095914840698, + "751": 0.3467327654361725, + "752": 0.36268121004104614, + "753": 0.36673134565353394, + "754": 0.3334101736545563, + "755": 0.32616376876831055 + }, + "loss": { + "702": 2.034492015838623, + "703": 2.0352673530578613, + "704": 2.0394086837768555, + "705": 2.008737564086914, + "706": 2.0163679122924805, + "707": 1.9963488578796387, + "708": 1.9911524057388306, + "709": 2.041245222091675, + "710": 2.034959316253662, + "711": 2.0070085525512695, + "712": 1.9800987243652344, + "713": 1.9934635162353516, + "714": 1.9859341382980347, + "715": 2.022761344909668, + "716": 1.9788706302642822, + "717": 1.96261465549469, + "718": 1.9863977432250977, + "719": 2.0154967308044434, + "720": 1.96248197555542, + "721": 1.974233627319336, + "722": 2.0144500732421875, + "723": 1.9809954166412354, + "724": 2.0080857276916504, + "725": 2.024318218231201, + "726": 1.9882514476776123, + "727": 1.985202670097351, + "728": 1.9838249683380127, + "729": 1.9879852533340454, + "730": 1.9706623554229736, + "731": 2.007246494293213, + "732": 1.9716395139694214, + "733": 1.9697849750518799, + "734": 1.9749248027801514, + "735": 1.9931504726409912, + "736": 1.9625194072723389, + "737": 1.9570355415344238, + "738": 1.9798170328140259, + "739": 1.987271785736084, + "740": 1.9576994180679321, + "741": 2.002255439758301, + "742": 1.9505881071090698, + "743": 1.9529694318771362, + "744": 1.9706934690475464, + "745": 1.9538817405700684, + "746": 1.9835524559020996, + "747": 1.9771671295166016, + "748": 1.931983232498169, + "749": 1.9355602264404297, + "750": 1.976599097251892, + "751": 1.9477629661560059, + "752": 1.9791746139526367, + "753": 1.979038953781128, + "754": 1.9702351093292236, + "755": 1.9630451202392578 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 8.17249, + 8.0482, + 8.66523, + 10.0013, + 12.8324, + 14.3381, + 10.9321, + 8.47311, + 8.53805, + 12.4145, + 17.2035, + 15.9539, + 13.0633, + 12.5037, + 11.3402, + 9.63094, + 10.1336, + 9.86026, + 9.42234, + 9.56492, + 9.81158, + 9.77006, + 10.6788, + 10.3331, + 10.977, + 13.4557, + 14.3796, + 13.6666, + 12.4053, + 10.7933, + 11.2118, + 12.3814, + 12.5373, + 12.2132, + 14.3878, + 13.8605, + 13.2455, + 14.975, + 14.8576, + 14.2947, + 13.883, + 13.7406, + 14.7801, + 18.3912, + 21.2028, + 20.5877, + 15.7596, + 13.1849, + 17.6013, + 16.2012, + 15.0464, + 14.7149, + 17.7239, + 18.4527 + ], + "train_epoch_time": 5.0623815059661865, + "train_loss": 1.9476230926116878, + "train_score": 0.42392283901251543, + "val_loss": 2.077079646212362, + "val_score": 0.38772782751804924 + }, + { + "epoch": 14, + "grad_norm": 0.26985040307044983, + "learning_rate": 0.33333333333333337, + "model_norm": 88.50469207763672, + "step_logs": { + "grad_norm": { + "756": 0.30478379130363464, + "757": 0.3027219772338867, + "758": 0.32425057888031006, + "759": 0.3415772616863251, + "760": 0.303385853767395, + "761": 0.30860263109207153, + "762": 0.3042046129703522, + "763": 0.28921687602996826, + "764": 0.301988810300827, + "765": 0.2913858890533447, + "766": 0.28916874527931213, + "767": 0.303390234708786, + "768": 0.31164324283599854, + "769": 0.31332913041114807, + "770": 0.3099716901779175, + "771": 0.2818314731121063, + "772": 0.27313002943992615, + "773": 0.30405449867248535, + "774": 0.3092050850391388, + "775": 0.33552536368370056, + "776": 0.30218085646629333, + "777": 0.2772131860256195, + "778": 0.2984389662742615, + "779": 0.2824351191520691, + "780": 0.28806403279304504, + "781": 0.28275948762893677, + "782": 0.2901439964771271, + "783": 0.2757797837257385, + "784": 0.28935128450393677, + "785": 0.2884388267993927, + "786": 0.2947910726070404, + "787": 0.28605902194976807, + "788": 0.29526323080062866, + "789": 0.2733495831489563, + "790": 0.28498467803001404, + "791": 0.2944028973579407, + "792": 0.27763620018959045, + "793": 0.27936363220214844, + "794": 0.2877821922302246, + "795": 0.26401326060295105, + "796": 0.2605854272842407, + "797": 0.2725641429424286, + "798": 0.2814179062843323, + "799": 0.27628862857818604, + "800": 0.28744468092918396, + "801": 0.2990172803401947, + "802": 0.27801597118377686, + "803": 0.28110939264297485, + "804": 0.27005496621131897, + "805": 0.28657811880111694, + "806": 0.2674529254436493, + "807": 0.28101804852485657, + "808": 0.26782262325286865, + "809": 0.26985040307044983 + }, + "loss": { + "756": 1.9605858325958252, + "757": 1.968909502029419, + "758": 1.9600588083267212, + "759": 1.9773964881896973, + "760": 1.9438071250915527, + "761": 1.9563130140304565, + "762": 1.949770212173462, + "763": 1.9656846523284912, + "764": 1.8893201351165771, + "765": 1.9340025186538696, + "766": 1.944157600402832, + "767": 1.9542779922485352, + "768": 1.9738895893096924, + "769": 1.918994426727295, + "770": 1.9660694599151611, + "771": 1.9126553535461426, + "772": 1.9468989372253418, + "773": 1.9277746677398682, + "774": 1.933929681777954, + "775": 1.922041416168213, + "776": 1.9419910907745361, + "777": 1.9250818490982056, + "778": 1.9404267072677612, + "779": 1.9482839107513428, + "780": 1.9292819499969482, + "781": 1.898986577987671, + "782": 1.946297287940979, + "783": 1.8935195207595825, + "784": 1.9474821090698242, + "785": 1.9518009424209595, + "786": 1.9261317253112793, + "787": 1.963820219039917, + "788": 1.9378907680511475, + "789": 1.8760712146759033, + "790": 1.905753254890442, + "791": 1.9465609788894653, + "792": 1.9615123271942139, + "793": 1.911255955696106, + "794": 1.93943190574646, + "795": 1.920114517211914, + "796": 1.9334185123443604, + "797": 1.8845516443252563, + "798": 1.92726731300354, + "799": 1.9357072114944458, + "800": 1.9099059104919434, + "801": 1.8913452625274658, + "802": 1.949580430984497, + "803": 1.9286179542541504, + "804": 1.9588322639465332, + "805": 1.9380176067352295, + "806": 1.9361475706100464, + "807": 1.9308364391326904, + "808": 1.9216241836547852, + "809": 1.9016042947769165 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 21.1058, + 21.4851, + 18.6426, + 16.9479, + 21.1185, + 20.5418, + 21.0694, + 23.4999, + 20.7169, + 22.7782, + 23.2503, + 21.2316, + 20.3239, + 19.5466, + 20.4623, + 24.0801, + 26.0978, + 20.8523, + 20.2277, + 17.0731, + 21.2673, + 25.0508, + 21.7864, + 24.4239, + 23.2497, + 23.7513, + 23.1197, + 24.8969, + 23.2607, + 23.46, + 22.1645, + 23.9988, + 22.2285, + 25.108, + 23.4652, + 22.4587, + 25.4471, + 24.4895, + 23.4178, + 27.5471, + 28.4725, + 25.3671, + 24.3354, + 25.3579, + 23.1155, + 21.1533, + 25.2233, + 24.4059, + 26.8592, + 23.5978, + 27.0672, + 24.4499, + 26.7901, + 26.114 + ], + "train_epoch_time": 5.064804315567017, + "train_loss": 1.9235311545121618, + "train_score": 0.4305830792511895, + "val_loss": 2.0592152544058835, + "val_score": 0.39383162282638245 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:37:42.127979", + "final_model_norm": 88.50469207763672, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:35:57.059600", + "step_scheduler_on_epoch": false + } + } +] \ No newline at end of file diff --git a/output/lr-stability/shakespeare-3.json b/output/lr-stability/shakespeare-3.json new file mode 100644 index 0000000..ca09a2a --- /dev/null +++ b/output/lr-stability/shakespeare-3.json @@ -0,0 +1,43214 @@ +[ + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 2.829242706298828, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.42960357666016, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 8.929902076721191, + "3": 5.186877250671387, + "4": 3.783720016479492, + "5": 3.923210382461548, + "6": 9.143267631530762, + "7": 23.94384002685547, + "8": 8.97022533416748, + "9": 3.847670555114746, + "10": 3.9274423122406006, + "11": 5.522656440734863, + "12": 4.651491641998291, + "13": 4.442531108856201, + "14": 4.289407730102539, + "15": 12.143924713134766, + "16": 5.79517126083374, + "17": 10.25547981262207, + "18": 3.454157590866089, + "19": 6.344359874725342, + "20": 3.7780702114105225, + "21": 7.477146148681641, + "22": 3.879782199859619, + "23": 6.381377220153809, + "24": 5.72065544128418, + "25": 4.4057769775390625, + "26": 6.91972541809082, + "27": 4.443119049072266, + "28": 37.84668731689453, + "29": 3.661571741104126, + "30": 3.1671783924102783, + "31": 3.988196611404419, + "32": 4.033689022064209, + "33": 3.5805203914642334, + "34": 2.4237263202667236, + "35": 2.7346973419189453, + "36": 3.0601348876953125, + "37": 2.965196132659912, + "38": 3.7202541828155518, + "39": 3.718553066253662, + "40": 2.521270751953125, + "41": 4.433636665344238, + "42": 3.129775047302246, + "43": 26.615100860595703, + "44": 1.878593921661377, + "45": 2.9344465732574463, + "46": 3.6754989624023438, + "47": 4.5123515129089355, + "48": 2.5713019371032715, + "49": 2.2343690395355225, + "50": 2.1086156368255615, + "51": 1.9976294040679932, + "52": 3.213641405105591, + "53": 2.829242706298828 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.9741530418395996, + "3": 3.7462706565856934, + "4": 3.6170592308044434, + "5": 3.5356454849243164, + "6": 3.550992727279663, + "7": 4.202289581298828, + "8": 3.9621734619140625, + "9": 3.4726767539978027, + "10": 3.3988418579101562, + "11": 3.424409866333008, + "12": 3.4638521671295166, + "13": 3.2228474617004395, + "14": 3.235687255859375, + "15": 3.296962261199951, + "16": 3.293919086456299, + "17": 3.6316699981689453, + "18": 3.214136838912964, + "19": 3.1056227684020996, + "20": 3.1635918617248535, + "21": 3.1394731998443604, + "22": 3.06063175201416, + "23": 3.1491594314575195, + "24": 3.1002345085144043, + "25": 3.1068034172058105, + "26": 2.945953845977783, + "27": 3.0315403938293457, + "28": 3.954380750656128, + "29": 2.937297821044922, + "30": 2.8964157104492188, + "31": 2.8965346813201904, + "32": 2.928769826889038, + "33": 2.9699316024780273, + "34": 2.7929859161376953, + "35": 2.7797155380249023, + "36": 2.8007261753082275, + "37": 2.807225227355957, + "38": 2.8082680702209473, + "39": 2.945089817047119, + "40": 2.779898166656494, + "41": 2.786756992340088, + "42": 2.7511987686157227, + "43": 3.5148537158966064, + "44": 2.698302984237671, + "45": 2.728705406188965, + "46": 2.8470048904418945, + "47": 3.057342052459717, + "48": 2.8441390991210938, + "49": 2.703319787979126, + "50": 2.7191898822784424, + "51": 2.660457134246826, + "52": 2.7339060306549072, + "53": 2.8816604614257812 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 1e-11, + 0.00179115, + 0.00384567, + 0.00587346, + 0.00787532, + 0.00978697, + 0.0105147, + 0.00716116, + 0.0137638, + 0.0173349, + 0.0191318, + 0.0200369, + 0.0223265, + 0.0240828, + 0.0259353, + 0.0179538, + 0.0275119, + 0.0227832, + 0.0337452, + 0.0304914, + 0.0366892, + 0.0305684, + 0.039704, + 0.0354551, + 0.0382976, + 0.0432452, + 0.0365529, + 0.0459253, + 0.0050259, + 0.0512201, + 0.0543529, + 0.052981, + 0.0543398, + 0.0577706, + 0.0634617, + 0.0639758, + 0.0642646, + 0.066315, + 0.0640119, + 0.0659279, + 0.0732958, + 0.0636051, + 0.0730728, + 0.00889719, + 0.0832114, + 0.0788087, + 0.0755167, + 0.0715912, + 0.0863633, + 0.0898677, + 0.0924422, + 0.0930235, + 0.0841129, + 0.0878049 + ], + "train_epoch_time": 5.0122058391571045, + "train_loss": 2.6955285815973706, + "train_score": 0.23732402259256782, + "val_loss": 2.722465205274685, + "val_score": 0.22831156707863856 + }, + { + "epoch": 1, + "grad_norm": 1.632340908050537, + "learning_rate": 0.1, + "model_norm": 87.44588470458984, + "step_logs": { + "grad_norm": { + "54": 2.167717695236206, + "55": 1.8843998908996582, + "56": 2.296365261077881, + "57": 2.6649978160858154, + "58": 2.9070963859558105, + "59": 2.2548794746398926, + "60": 1.5094093084335327, + "61": 1.9111990928649902, + "62": 3.303433895111084, + "63": 2.2870326042175293, + "64": 1.2310452461242676, + "65": 1.1800872087478638, + "66": 1.3149524927139282, + "67": 1.5462857484817505, + "68": 1.7933094501495361, + "69": 2.478090763092041, + "70": 2.4005751609802246, + "71": 2.347316265106201, + "72": 2.1841657161712646, + "73": 1.783528208732605, + "74": 1.9344581365585327, + "75": 2.398865222930908, + "76": 2.024362564086914, + "77": 1.452140212059021, + "78": 1.7936229705810547, + "79": 2.5270416736602783, + "80": 2.1543946266174316, + "81": 1.4391276836395264, + "82": 1.6454904079437256, + "83": 2.532956600189209, + "84": 2.4044623374938965, + "85": 1.7642533779144287, + "86": 1.7786751985549927, + "87": 1.9635628461837769, + "88": 2.08329439163208, + "89": 2.3949873447418213, + "90": 2.205061912536621, + "91": 1.7287977933883667, + "92": 1.8417333364486694, + "93": 2.232008218765259, + "94": 2.0573503971099854, + "95": 1.4013190269470215, + "96": 1.3763247728347778, + "97": 1.868039608001709, + "98": 1.8802298307418823, + "99": 1.754790186882019, + "100": 1.76875901222229, + "101": 1.8400527238845825, + "102": 1.7403810024261475, + "103": 1.7100898027420044, + "104": 1.7036994695663452, + "105": 1.6900243759155273, + "106": 1.6948237419128418, + "107": 1.632340908050537 + }, + "loss": { + "54": 2.697671890258789, + "55": 2.6580300331115723, + "56": 2.6626007556915283, + "57": 2.7512998580932617, + "58": 2.7369589805603027, + "59": 2.7951793670654297, + "60": 2.5983762741088867, + "61": 2.6475000381469727, + "62": 2.698931932449341, + "63": 2.8319039344787598, + "64": 2.590872287750244, + "65": 2.576648473739624, + "66": 2.5834898948669434, + "67": 2.590606689453125, + "68": 2.5987179279327393, + "69": 2.6423301696777344, + "70": 2.727649688720703, + "71": 2.6386632919311523, + "72": 2.726167917251587, + "73": 2.5826878547668457, + "74": 2.6456406116485596, + "75": 2.620405435562134, + "76": 2.695251226425171, + "77": 2.5784120559692383, + "78": 2.6063196659088135, + "79": 2.6195664405822754, + "80": 2.710402488708496, + "81": 2.5660197734832764, + "82": 2.5647058486938477, + "83": 2.611298084259033, + "84": 2.7177627086639404, + "85": 2.5913009643554688, + "86": 2.5952882766723633, + "87": 2.6092567443847656, + "88": 2.612041473388672, + "89": 2.6423397064208984, + "90": 2.696099042892456, + "91": 2.579451084136963, + "92": 2.6191582679748535, + "93": 2.592412233352661, + "94": 2.6643106937408447, + "95": 2.571406841278076, + "96": 2.5484509468078613, + "97": 2.553044319152832, + "98": 2.600273370742798, + "99": 2.5597410202026367, + "100": 2.584235906600952, + "101": 2.562354564666748, + "102": 2.6044912338256836, + "103": 2.558192491531372, + "104": 2.5852434635162354, + "105": 2.5465574264526367, + "106": 2.5729241371154785, + "107": 2.574094772338867 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 0.0919884, + 0.0937386, + 0.0909897, + 0.0885685, + 0.0866258, + 0.0916632, + 0.0958, + 0.0935468, + 0.0831832, + 0.0915458, + 0.0971585, + 0.0973688, + 0.0967619, + 0.0955888, + 0.094173, + 0.0895894, + 0.0904457, + 0.0905463, + 0.0919544, + 0.094199, + 0.0933949, + 0.0901061, + 0.0929348, + 0.0960715, + 0.0941871, + 0.0891354, + 0.0921131, + 0.0961209, + 0.094986, + 0.0890592, + 0.0903862, + 0.0943344, + 0.0942551, + 0.09312, + 0.0923294, + 0.0902088, + 0.0917286, + 0.0945239, + 0.0939185, + 0.0912338, + 0.0926412, + 0.0963221, + 0.0964167, + 0.093603, + 0.0936348, + 0.0943264, + 0.0942924, + 0.0938026, + 0.0945047, + 0.0945933, + 0.0946846, + 0.0946899, + 0.0947131, + 0.095079 + ], + "train_epoch_time": 4.843577861785889, + "train_loss": 2.578076574552692, + "train_score": 0.24235002684114312, + "val_loss": 2.6129268844420546, + "val_score": 0.24001686288691826 + }, + { + "epoch": 2, + "grad_norm": 1.6892448663711548, + "learning_rate": 0.1, + "model_norm": 87.45940399169922, + "step_logs": { + "grad_norm": { + "108": 1.6401013135910034, + "109": 1.7984172105789185, + "110": 1.7912962436676025, + "111": 1.6297528743743896, + "112": 1.71138596534729, + "113": 1.810578465461731, + "114": 1.8892784118652344, + "115": 1.8201881647109985, + "116": 1.6007473468780518, + "117": 1.6456505060195923, + "118": 1.6218377351760864, + "119": 1.6116821765899658, + "120": 1.6829594373703003, + "121": 1.7640148401260376, + "122": 1.6729094982147217, + "123": 1.4540841579437256, + "124": 1.4654661417007446, + "125": 1.5338268280029297, + "126": 1.582726001739502, + "127": 1.622740626335144, + "128": 1.735021948814392, + "129": 1.7508931159973145, + "130": 1.8040889501571655, + "131": 1.6388498544692993, + "132": 1.5618400573730469, + "133": 1.6743911504745483, + "134": 1.719663143157959, + "135": 1.9646096229553223, + "136": 1.7544358968734741, + "137": 1.4120299816131592, + "138": 1.4097856283187866, + "139": 1.6519172191619873, + "140": 1.753260612487793, + "141": 1.7085126638412476, + "142": 1.7379885911941528, + "143": 1.701741099357605, + "144": 1.6415828466415405, + "145": 1.4609078168869019, + "146": 1.487511157989502, + "147": 1.7455832958221436, + "148": 1.6841126680374146, + "149": 1.5068544149398804, + "150": 1.6761696338653564, + "151": 1.7969931364059448, + "152": 1.734042763710022, + "153": 1.5202258825302124, + "154": 1.3413509130477905, + "155": 1.2147680521011353, + "156": 1.26915442943573, + "157": 1.4844236373901367, + "158": 1.4759669303894043, + "159": 1.4076545238494873, + "160": 1.4967929124832153, + "161": 1.6892448663711548 + }, + "loss": { + "108": 2.5715224742889404, + "109": 2.532073497772217, + "110": 2.599666118621826, + "111": 2.557715892791748, + "112": 2.568601131439209, + "113": 2.5607824325561523, + "114": 2.580101728439331, + "115": 2.5469844341278076, + "116": 2.5666747093200684, + "117": 2.549964666366577, + "118": 2.5779125690460205, + "119": 2.5485129356384277, + "120": 2.5577869415283203, + "121": 2.543717384338379, + "122": 2.585871696472168, + "123": 2.5400009155273438, + "124": 2.549086093902588, + "125": 2.5163979530334473, + "126": 2.5393409729003906, + "127": 2.538783073425293, + "128": 2.5583629608154297, + "129": 2.5411787033081055, + "130": 2.552502155303955, + "131": 2.5548455715179443, + "132": 2.558537483215332, + "133": 2.5306711196899414, + "134": 2.5770304203033447, + "135": 2.560976505279541, + "136": 2.6058897972106934, + "137": 2.5130372047424316, + "138": 2.5369863510131836, + "139": 2.516374111175537, + "140": 2.5712404251098633, + "141": 2.5201947689056396, + "142": 2.555270195007324, + "143": 2.536632537841797, + "144": 2.580138921737671, + "145": 2.5126419067382812, + "146": 2.5284831523895264, + "147": 2.5315775871276855, + "148": 2.5695245265960693, + "149": 2.5148541927337646, + "150": 2.5509133338928223, + "151": 2.5566539764404297, + "152": 2.5582828521728516, + "153": 2.5279204845428467, + "154": 2.5321807861328125, + "155": 2.5014920234680176, + "156": 2.504667282104492, + "157": 2.5098249912261963, + "158": 2.528846263885498, + "159": 2.512179374694824, + "160": 2.5179286003112793, + "161": 2.5021111965179443 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 0.0950297, + 0.0939967, + 0.0941873, + 0.095064, + 0.0946063, + 0.0939843, + 0.0935304, + 0.0938932, + 0.0952457, + 0.0949576, + 0.0951459, + 0.095151, + 0.0947537, + 0.094236, + 0.0948664, + 0.0960042, + 0.0959578, + 0.0955342, + 0.0952994, + 0.0950696, + 0.0944436, + 0.0943112, + 0.0940065, + 0.0950062, + 0.0954498, + 0.0947515, + 0.0945737, + 0.0929925, + 0.0944234, + 0.0961844, + 0.0962306, + 0.0948567, + 0.0943596, + 0.0945258, + 0.0944193, + 0.0946, + 0.095037, + 0.095926, + 0.0958079, + 0.0943235, + 0.0947697, + 0.0956806, + 0.0947805, + 0.0940599, + 0.0944494, + 0.0956287, + 0.0965692, + 0.0971349, + 0.0968847, + 0.0957948, + 0.0958706, + 0.0962059, + 0.0957406, + 0.0946053 + ], + "train_epoch_time": 4.84375262260437, + "train_loss": 2.5431613441861343, + "train_score": 0.24829402800269929, + "val_loss": 2.585049842720601, + "val_score": 0.23902123997118388 + }, + { + "epoch": 3, + "grad_norm": 1.6417392492294312, + "learning_rate": 0.1, + "model_norm": 87.47151184082031, + "step_logs": { + "grad_norm": { + "162": 1.6463249921798706, + "163": 1.5342001914978027, + "164": 1.455183982849121, + "165": 1.5131183862686157, + "166": 1.6168954372406006, + "167": 1.5889735221862793, + "168": 1.5289008617401123, + "169": 1.4898327589035034, + "170": 1.570967197418213, + "171": 1.6290321350097656, + "172": 1.5947881937026978, + "173": 1.5506904125213623, + "174": 1.4376262426376343, + "175": 1.398522973060608, + "176": 1.4237021207809448, + "177": 1.4817471504211426, + "178": 1.4833542108535767, + "179": 1.4684853553771973, + "180": 1.453198790550232, + "181": 1.493291974067688, + "182": 1.573236346244812, + "183": 1.5547974109649658, + "184": 1.5328584909439087, + "185": 1.5478310585021973, + "186": 1.5275315046310425, + "187": 1.415902018547058, + "188": 1.3994611501693726, + "189": 1.393317461013794, + "190": 1.4230738878250122, + "191": 1.5195673704147339, + "192": 1.4761430025100708, + "193": 1.4361157417297363, + "194": 1.352280616760254, + "195": 1.5173275470733643, + "196": 1.5241765975952148, + "197": 1.4327393770217896, + "198": 1.4824446439743042, + "199": 1.5598995685577393, + "200": 1.4209989309310913, + "201": 1.2262513637542725, + "202": 1.3120940923690796, + "203": 1.444899559020996, + "204": 1.4460457563400269, + "205": 1.4080867767333984, + "206": 1.5145533084869385, + "207": 1.6775528192520142, + "208": 1.567270278930664, + "209": 1.400187611579895, + "210": 1.3919522762298584, + "211": 1.6053329706192017, + "212": 1.5757410526275635, + "213": 1.680374026298523, + "214": 1.709222435951233, + "215": 1.6417392492294312 + }, + "loss": { + "162": 2.5523223876953125, + "163": 2.5141079425811768, + "164": 2.5111641883850098, + "165": 2.5069899559020996, + "166": 2.541145086288452, + "167": 2.515294075012207, + "168": 2.5161244869232178, + "169": 2.5069851875305176, + "170": 2.526698589324951, + "171": 2.5056231021881104, + "172": 2.5440969467163086, + "173": 2.5141029357910156, + "174": 2.508448600769043, + "175": 2.475994825363159, + "176": 2.5205020904541016, + "177": 2.485654354095459, + "178": 2.499504327774048, + "179": 2.5099403858184814, + "180": 2.522557497024536, + "181": 2.5125904083251953, + "182": 2.512593984603882, + "183": 2.52034854888916, + "184": 2.530547618865967, + "185": 2.512237548828125, + "186": 2.529956102371216, + "187": 2.4996047019958496, + "188": 2.511016368865967, + "189": 2.5138168334960938, + "190": 2.5169854164123535, + "191": 2.500276565551758, + "192": 2.5048160552978516, + "193": 2.4937100410461426, + "194": 2.4996755123138428, + "195": 2.4886741638183594, + "196": 2.5265872478485107, + "197": 2.5009970664978027, + "198": 2.4965901374816895, + "199": 2.4987947940826416, + "200": 2.5282092094421387, + "201": 2.4793591499328613, + "202": 2.494969367980957, + "203": 2.4941344261169434, + "204": 2.508145570755005, + "205": 2.4973325729370117, + "206": 2.507845163345337, + "207": 2.503416061401367, + "208": 2.509850025177002, + "209": 2.4855904579162598, + "210": 2.486443281173706, + "211": 2.4937844276428223, + "212": 2.501904010772705, + "213": 2.5076377391815186, + "214": 2.536623477935791, + "215": 2.5081586837768555 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 0.0949581, + 0.0955282, + 0.0959543, + 0.0956331, + 0.0951076, + 0.0952209, + 0.0955611, + 0.0957608, + 0.0953437, + 0.0949707, + 0.0952394, + 0.095436, + 0.0960434, + 0.0962004, + 0.0961345, + 0.0957703, + 0.095784, + 0.0958811, + 0.0959824, + 0.0957511, + 0.0953059, + 0.0954237, + 0.0955634, + 0.0954488, + 0.0955918, + 0.0961444, + 0.0962466, + 0.0962822, + 0.0961326, + 0.0955862, + 0.0958317, + 0.0960289, + 0.0964713, + 0.095579, + 0.0956047, + 0.0960579, + 0.0957843, + 0.0953571, + 0.0961599, + 0.0970568, + 0.0966649, + 0.0959828, + 0.0959983, + 0.0961819, + 0.0956266, + 0.0946784, + 0.0953349, + 0.0962059, + 0.0962499, + 0.0950868, + 0.0952725, + 0.09467, + 0.094555, + 0.0949009 + ], + "train_epoch_time": 4.84400200843811, + "train_loss": 2.5096431797169885, + "train_score": 0.255805012468286, + "val_loss": 2.5529645710122844, + "val_score": 0.25108083377749446 + }, + { + "epoch": 4, + "grad_norm": 1.2616267204284668, + "learning_rate": 0.1, + "model_norm": 87.48371887207031, + "step_logs": { + "grad_norm": { + "216": 1.4753199815750122, + "217": 1.4843089580535889, + "218": 1.702085018157959, + "219": 1.9024754762649536, + "220": 1.770369529724121, + "221": 1.5003074407577515, + "222": 1.4227277040481567, + "223": 1.2950764894485474, + "224": 1.260401725769043, + "225": 1.4663050174713135, + "226": 1.4639695882797241, + "227": 1.3770688772201538, + "228": 1.3091496229171753, + "229": 1.3295845985412598, + "230": 1.4498988389968872, + "231": 1.5564842224121094, + "232": 1.4637154340744019, + "233": 1.3805773258209229, + "234": 1.4203565120697021, + "235": 1.3844490051269531, + "236": 1.404839277267456, + "237": 1.3583775758743286, + "238": 1.377422571182251, + "239": 1.4285433292388916, + "240": 1.3406805992126465, + "241": 1.3092323541641235, + "242": 1.4103388786315918, + "243": 1.447407841682434, + "244": 1.336208462715149, + "245": 1.4705170392990112, + "246": 1.558107614517212, + "247": 1.5607337951660156, + "248": 1.5305641889572144, + "249": 1.7422747611999512, + "250": 1.6681272983551025, + "251": 1.5193485021591187, + "252": 1.5240287780761719, + "253": 1.4833558797836304, + "254": 1.4607470035552979, + "255": 1.3434115648269653, + "256": 1.3210078477859497, + "257": 1.3474167585372925, + "258": 1.377573013305664, + "259": 1.197898507118225, + "260": 1.021098256111145, + "261": 1.1203125715255737, + "262": 1.2265043258666992, + "263": 1.2755136489868164, + "264": 1.2767614126205444, + "265": 1.0849404335021973, + "266": 1.154232144355774, + "267": 1.2829339504241943, + "268": 1.204483151435852, + "269": 1.2616267204284668 + }, + "loss": { + "216": 2.52264404296875, + "217": 2.498532295227051, + "218": 2.513336658477783, + "219": 2.5009446144104004, + "220": 2.5377063751220703, + "221": 2.4965500831604004, + "222": 2.4910073280334473, + "223": 2.4984376430511475, + "224": 2.4834978580474854, + "225": 2.476687431335449, + "226": 2.509334087371826, + "227": 2.4919402599334717, + "228": 2.496373176574707, + "229": 2.46787691116333, + "230": 2.4884681701660156, + "231": 2.4986793994903564, + "232": 2.5191092491149902, + "233": 2.4749345779418945, + "234": 2.513787269592285, + "235": 2.482914686203003, + "236": 2.4701218605041504, + "237": 2.4905314445495605, + "238": 2.4847304821014404, + "239": 2.469801425933838, + "240": 2.4805047512054443, + "241": 2.4836788177490234, + "242": 2.4825828075408936, + "243": 2.4821274280548096, + "244": 2.486481189727783, + "245": 2.4815030097961426, + "246": 2.4846348762512207, + "247": 2.485553741455078, + "248": 2.5009756088256836, + "249": 2.487590789794922, + "250": 2.5263524055480957, + "251": 2.4827191829681396, + "252": 2.4865167140960693, + "253": 2.4900436401367188, + "254": 2.483656883239746, + "255": 2.483293056488037, + "256": 2.4900450706481934, + "257": 2.4850914478302, + "258": 2.4637441635131836, + "259": 2.4640021324157715, + "260": 2.4561357498168945, + "261": 2.4686825275421143, + "262": 2.480060577392578, + "263": 2.45208740234375, + "264": 2.4610671997070312, + "265": 2.452866554260254, + "266": 2.444319248199463, + "267": 2.437253713607788, + "268": 2.4502177238464355, + "269": 2.4595770835876465 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 0.0958643, + 0.0957773, + 0.0945506, + 0.0932522, + 0.0941839, + 0.0956864, + 0.0960957, + 0.0967525, + 0.0969008, + 0.09584, + 0.0959044, + 0.0963346, + 0.0966812, + 0.0965422, + 0.0959473, + 0.0953763, + 0.095921, + 0.0962922, + 0.0961421, + 0.0962837, + 0.0961586, + 0.0964279, + 0.0963225, + 0.0960325, + 0.0965036, + 0.0966644, + 0.0961483, + 0.0959507, + 0.0965341, + 0.0958248, + 0.0953421, + 0.0953288, + 0.0955261, + 0.0942495, + 0.0947802, + 0.0955576, + 0.0955379, + 0.0957687, + 0.0958813, + 0.0964936, + 0.0966146, + 0.0964759, + 0.0962916, + 0.0971705, + 0.0979216, + 0.097521, + 0.0970565, + 0.0967891, + 0.0967944, + 0.0976568, + 0.0973471, + 0.0967337, + 0.0971246, + 0.0968657 + ], + "train_epoch_time": 4.843307971954346, + "train_loss": 2.4654785947786, + "train_score": 0.2695906564614694, + "val_loss": 2.5175748082170806, + "val_score": 0.26251255750792996 + }, + { + "epoch": 5, + "grad_norm": 1.1777217388153076, + "learning_rate": 0.1, + "model_norm": 87.49819946289062, + "step_logs": { + "grad_norm": { + "270": 1.5681922435760498, + "271": 1.7132329940795898, + "272": 1.6316648721694946, + "273": 1.48341965675354, + "274": 1.5266185998916626, + "275": 1.5643476247787476, + "276": 1.5164021253585815, + "277": 1.4735674858093262, + "278": 1.5165610313415527, + "279": 1.6102924346923828, + "280": 1.541800618171692, + "281": 1.5295721292495728, + "282": 1.5281633138656616, + "283": 1.4212607145309448, + "284": 1.3670010566711426, + "285": 1.557177186012268, + "286": 1.717246651649475, + "287": 1.7445170879364014, + "288": 1.616858959197998, + "289": 1.492230772972107, + "290": 1.4443650245666504, + "291": 1.4495494365692139, + "292": 1.5596765279769897, + "293": 1.5833477973937988, + "294": 1.6813318729400635, + "295": 1.5986870527267456, + "296": 1.3505122661590576, + "297": 1.276371717453003, + "298": 1.2337177991867065, + "299": 1.1896799802780151, + "300": 1.2713990211486816, + "301": 1.3051244020462036, + "302": 1.4924052953720093, + "303": 1.5375165939331055, + "304": 1.6132373809814453, + "305": 1.6318001747131348, + "306": 1.5559868812561035, + "307": 1.5971157550811768, + "308": 1.6797418594360352, + "309": 1.6264094114303589, + "310": 1.5401262044906616, + "311": 1.3944423198699951, + "312": 1.283914566040039, + "313": 1.3803130388259888, + "314": 1.4413127899169922, + "315": 1.4595688581466675, + "316": 1.8463311195373535, + "317": 1.648689866065979, + "318": 1.7266327142715454, + "319": 1.6444430351257324, + "320": 1.6546168327331543, + "321": 1.461954116821289, + "322": 1.2780884504318237, + "323": 1.1777217388153076 + }, + "loss": { + "270": 2.4524741172790527, + "271": 2.484706163406372, + "272": 2.4989864826202393, + "273": 2.4877777099609375, + "274": 2.4687724113464355, + "275": 2.480257511138916, + "276": 2.479813575744629, + "277": 2.490440607070923, + "278": 2.4598026275634766, + "279": 2.5062897205352783, + "280": 2.470792531967163, + "281": 2.4756789207458496, + "282": 2.4691925048828125, + "283": 2.4633398056030273, + "284": 2.4576454162597656, + "285": 2.466660261154175, + "286": 2.4862875938415527, + "287": 2.4686734676361084, + "288": 2.484574794769287, + "289": 2.455036163330078, + "290": 2.4594199657440186, + "291": 2.4611167907714844, + "292": 2.4461159706115723, + "293": 2.483973264694214, + "294": 2.445065975189209, + "295": 2.496243715286255, + "296": 2.4402995109558105, + "297": 2.43380069732666, + "298": 2.4325037002563477, + "299": 2.4196219444274902, + "300": 2.4435489177703857, + "301": 2.4557044506073, + "302": 2.4425840377807617, + "303": 2.4472739696502686, + "304": 2.4574999809265137, + "305": 2.461731433868408, + "306": 2.4328176975250244, + "307": 2.4578046798706055, + "308": 2.454918146133423, + "309": 2.4583582878112793, + "310": 2.4348974227905273, + "311": 2.4526233673095703, + "312": 2.407618284225464, + "313": 2.4238271713256836, + "314": 2.4122562408447266, + "315": 2.424379348754883, + "316": 2.4501571655273438, + "317": 2.455643653869629, + "318": 2.430037260055542, + "319": 2.4438700675964355, + "320": 2.4183945655822754, + "321": 2.437368869781494, + "322": 2.415073871612549, + "323": 2.396214485168457 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 0.0952256, + 0.0944229, + 0.0949426, + 0.0957646, + 0.0954927, + 0.0952986, + 0.0955691, + 0.0958226, + 0.0955337, + 0.0950814, + 0.0954103, + 0.095488, + 0.0954847, + 0.0960614, + 0.0963374, + 0.0953151, + 0.0944016, + 0.094194, + 0.095002, + 0.0956617, + 0.0959313, + 0.095906, + 0.0952632, + 0.0951961, + 0.0945351, + 0.09513, + 0.0963976, + 0.0967615, + 0.0969663, + 0.0971584, + 0.0967983, + 0.0966481, + 0.0956395, + 0.0953927, + 0.0949712, + 0.0948692, + 0.09526, + 0.0950668, + 0.0945656, + 0.0948946, + 0.0953554, + 0.0961871, + 0.0966899, + 0.0962183, + 0.0958719, + 0.0957913, + 0.0934959, + 0.0947557, + 0.0942203, + 0.0947575, + 0.0946429, + 0.0957997, + 0.0967287, + 0.0971872 + ], + "train_epoch_time": 4.843637228012085, + "train_loss": 2.3951509260207713, + "train_score": 0.29289813482949517, + "val_loss": 2.450149717068152, + "val_score": 0.2812141221097909 + }, + { + "epoch": 6, + "grad_norm": 1.392254114151001, + "learning_rate": 0.1, + "model_norm": 87.51399993896484, + "step_logs": { + "grad_norm": { + "324": 1.1860820055007935, + "325": 1.139835238456726, + "326": 1.2046810388565063, + "327": 1.3439661264419556, + "328": 1.4064558744430542, + "329": 1.3534471988677979, + "330": 1.3458380699157715, + "331": 1.2536165714263916, + "332": 1.443936824798584, + "333": 1.554681658744812, + "334": 1.6362686157226562, + "335": 1.7995811700820923, + "336": 1.8044660091400146, + "337": 1.9230691194534302, + "338": 1.793067455291748, + "339": 1.5272263288497925, + "340": 1.470845103263855, + "341": 1.485547661781311, + "342": 1.5559905767440796, + "343": 1.6723933219909668, + "344": 1.6789045333862305, + "345": 1.5583182573318481, + "346": 1.497838020324707, + "347": 1.481758952140808, + "348": 1.4876999855041504, + "349": 1.4353747367858887, + "350": 1.425061583518982, + "351": 1.5410263538360596, + "352": 1.6250354051589966, + "353": 1.6732479333877563, + "354": 1.62208092212677, + "355": 1.639390230178833, + "356": 1.4765866994857788, + "357": 1.2863820791244507, + "358": 1.2355879545211792, + "359": 1.235979437828064, + "360": 1.3444041013717651, + "361": 1.580484390258789, + "362": 1.5247668027877808, + "363": 1.3593993186950684, + "364": 1.3910149335861206, + "365": 1.698815107345581, + "366": 1.7982655763626099, + "367": 1.7412036657333374, + "368": 1.5068066120147705, + "369": 1.3631888628005981, + "370": 1.187957525253296, + "371": 1.3362401723861694, + "372": 1.4558069705963135, + "373": 1.4392685890197754, + "374": 1.3998827934265137, + "375": 1.2879066467285156, + "376": 1.3272349834442139, + "377": 1.392254114151001 + }, + "loss": { + "324": 2.3722052574157715, + "325": 2.411508798599243, + "326": 2.385117530822754, + "327": 2.400644063949585, + "328": 2.4109585285186768, + "329": 2.3783326148986816, + "330": 2.3828747272491455, + "331": 2.3994574546813965, + "332": 2.3886513710021973, + "333": 2.4140372276306152, + "334": 2.393425941467285, + "335": 2.4094622135162354, + "336": 2.4645755290985107, + "337": 2.4251999855041504, + "338": 2.4448819160461426, + "339": 2.4097023010253906, + "340": 2.398139476776123, + "341": 2.3962512016296387, + "342": 2.4052672386169434, + "343": 2.3844337463378906, + "344": 2.40040922164917, + "345": 2.394157886505127, + "346": 2.4045188426971436, + "347": 2.4006471633911133, + "348": 2.419200897216797, + "349": 2.3951902389526367, + "350": 2.3989980220794678, + "351": 2.407146453857422, + "352": 2.4095678329467773, + "353": 2.4075894355773926, + "354": 2.415912628173828, + "355": 2.387561321258545, + "356": 2.4027962684631348, + "357": 2.377953052520752, + "358": 2.4014976024627686, + "359": 2.3618595600128174, + "360": 2.361713409423828, + "361": 2.382688283920288, + "362": 2.3993608951568604, + "363": 2.388090133666992, + "364": 2.3624143600463867, + "365": 2.3993406295776367, + "366": 2.4330270290374756, + "367": 2.4033923149108887, + "368": 2.4099574089050293, + "369": 2.3658499717712402, + "370": 2.3675289154052734, + "371": 2.3777809143066406, + "372": 2.3918118476867676, + "373": 2.3724188804626465, + "374": 2.3873162269592285, + "375": 2.3658089637756348, + "376": 2.361781120300293, + "377": 2.367701768875122 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 0.0971202, + 0.0973769, + 0.0970475, + 0.0963744, + 0.0960593, + 0.0962918, + 0.0963385, + 0.096829, + 0.0958182, + 0.0952325, + 0.0947031, + 0.0937028, + 0.0938035, + 0.0929156, + 0.0938305, + 0.0953838, + 0.0956841, + 0.0955979, + 0.0952082, + 0.09446, + 0.0944543, + 0.0951734, + 0.0955427, + 0.095627, + 0.0956258, + 0.0958764, + 0.0959393, + 0.0952991, + 0.094805, + 0.0945051, + 0.0948358, + 0.0946716, + 0.0956599, + 0.0966376, + 0.0969193, + 0.0968673, + 0.0963145, + 0.0950192, + 0.095379, + 0.096275, + 0.0960659, + 0.0943271, + 0.0937686, + 0.0940669, + 0.0955013, + 0.0962211, + 0.0971059, + 0.0963812, + 0.0957575, + 0.0958168, + 0.0960575, + 0.0966132, + 0.0964048, + 0.0960676 + ], + "train_epoch_time": 4.843627691268921, + "train_loss": 2.366916924467046, + "train_score": 0.29962338593119014, + "val_loss": 2.4222577751435588, + "val_score": 0.2854970944735268 + }, + { + "epoch": 7, + "grad_norm": 1.669904112815857, + "learning_rate": 0.1, + "model_norm": 87.52935028076172, + "step_logs": { + "grad_norm": { + "378": 1.4751824140548706, + "379": 1.554626703262329, + "380": 1.6681768894195557, + "381": 1.383451223373413, + "382": 1.116145372390747, + "383": 1.165587067604065, + "384": 1.4156062602996826, + "385": 1.8636415004730225, + "386": 1.8343044519424438, + "387": 1.7075082063674927, + "388": 1.7049862146377563, + "389": 1.808707594871521, + "390": 1.6509422063827515, + "391": 1.3917895555496216, + "392": 1.5421075820922852, + "393": 1.5486865043640137, + "394": 1.4522162675857544, + "395": 1.6417620182037354, + "396": 1.6343942880630493, + "397": 1.2589577436447144, + "398": 1.207227110862732, + "399": 1.4624335765838623, + "400": 1.4087663888931274, + "401": 1.358626365661621, + "402": 1.3215059041976929, + "403": 1.2855933904647827, + "404": 1.4452123641967773, + "405": 1.6052252054214478, + "406": 1.5231980085372925, + "407": 1.3681321144104004, + "408": 1.5055698156356812, + "409": 1.5484745502471924, + "410": 1.7293670177459717, + "411": 1.4456441402435303, + "412": 1.3399046659469604, + "413": 1.1268364191055298, + "414": 1.0692131519317627, + "415": 1.0324090719223022, + "416": 0.9636962413787842, + "417": 1.118695855140686, + "418": 1.3339556455612183, + "419": 1.4107352495193481, + "420": 1.6140360832214355, + "421": 1.7020269632339478, + "422": 1.5629189014434814, + "423": 1.5723743438720703, + "424": 1.4688177108764648, + "425": 1.4020779132843018, + "426": 1.548861026763916, + "427": 1.7145603895187378, + "428": 1.7066810131072998, + "429": 1.66238272190094, + "430": 1.5904337167739868, + "431": 1.669904112815857 + }, + "loss": { + "378": 2.371570110321045, + "379": 2.367591619491577, + "380": 2.3902666568756104, + "381": 2.361928939819336, + "382": 2.3372890949249268, + "383": 2.3406941890716553, + "384": 2.3528828620910645, + "385": 2.374268054962158, + "386": 2.4097800254821777, + "387": 2.3714141845703125, + "388": 2.3969037532806396, + "389": 2.3816514015197754, + "390": 2.401012659072876, + "391": 2.3565120697021484, + "392": 2.353564977645874, + "393": 2.3646492958068848, + "394": 2.353878974914551, + "395": 2.3482301235198975, + "396": 2.3860223293304443, + "397": 2.3507814407348633, + "398": 2.3687691688537598, + "399": 2.3317477703094482, + "400": 2.36746883392334, + "401": 2.3564627170562744, + "402": 2.358987331390381, + "403": 2.350877523422241, + "404": 2.360438346862793, + "405": 2.370955228805542, + "406": 2.381950616836548, + "407": 2.3405771255493164, + "408": 2.3413681983947754, + "409": 2.377681016921997, + "410": 2.3699045181274414, + "411": 2.3864994049072266, + "412": 2.359920024871826, + "413": 2.3440327644348145, + "414": 2.335439682006836, + "415": 2.3359432220458984, + "416": 2.321115255355835, + "417": 2.324124813079834, + "418": 2.326892137527466, + "419": 2.365225076675415, + "420": 2.3420863151550293, + "421": 2.362074851989746, + "422": 2.3602867126464844, + "423": 2.3613364696502686, + "424": 2.359339475631714, + "425": 2.3359060287475586, + "426": 2.348653793334961, + "427": 2.3661108016967773, + "428": 2.3529343605041504, + "429": 2.3723464012145996, + "430": 2.3470795154571533, + "431": 2.3493247032165527 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 0.0956132, + 0.0951438, + 0.0944991, + 0.0961061, + 0.0974042, + 0.0971797, + 0.0959155, + 0.0931843, + 0.0934743, + 0.0942087, + 0.0942827, + 0.0935734, + 0.0946289, + 0.0960522, + 0.0951909, + 0.0951734, + 0.0957124, + 0.0945723, + 0.094699, + 0.0967388, + 0.0970155, + 0.095615, + 0.0959772, + 0.096231, + 0.0964306, + 0.0966042, + 0.0957632, + 0.0948461, + 0.0953559, + 0.0961552, + 0.0953829, + 0.0951998, + 0.0940647, + 0.0958051, + 0.0963356, + 0.0973629, + 0.0976109, + 0.0977694, + 0.0980387, + 0.0973782, + 0.0963172, + 0.0959627, + 0.0947315, + 0.0942222, + 0.09508, + 0.0950253, + 0.0956278, + 0.0959621, + 0.095141, + 0.0941512, + 0.0941712, + 0.0944961, + 0.094887, + 0.0943976 + ], + "train_epoch_time": 4.8406760692596436, + "train_loss": 2.3526640700472994, + "train_score": 0.30506411416287743, + "val_loss": 2.3984971511788262, + "val_score": 0.29477612008730936 + }, + { + "epoch": 8, + "grad_norm": 1.352026104927063, + "learning_rate": 0.1, + "model_norm": 87.54547882080078, + "step_logs": { + "grad_norm": { + "432": 1.6159244775772095, + "433": 1.5814199447631836, + "434": 1.7194830179214478, + "435": 1.6321874856948853, + "436": 1.5271730422973633, + "437": 1.5218703746795654, + "438": 1.6716121435165405, + "439": 1.7178452014923096, + "440": 1.6843918561935425, + "441": 1.4707714319229126, + "442": 1.4967799186706543, + "443": 1.7388938665390015, + "444": 1.769057273864746, + "445": 1.697637677192688, + "446": 1.6095563173294067, + "447": 1.444825530052185, + "448": 1.3334919214248657, + "449": 1.3897777795791626, + "450": 1.3749045133590698, + "451": 1.1873587369918823, + "452": 1.1644511222839355, + "453": 1.2821862697601318, + "454": 1.3721824884414673, + "455": 1.4244450330734253, + "456": 1.5382767915725708, + "457": 1.5794689655303955, + "458": 1.7272545099258423, + "459": 1.818613886833191, + "460": 1.7755937576293945, + "461": 1.6588903665542603, + "462": 1.5850425958633423, + "463": 1.5457450151443481, + "464": 1.5867552757263184, + "465": 1.4877558946609497, + "466": 1.206352710723877, + "467": 1.1839745044708252, + "468": 1.3052968978881836, + "469": 1.3289339542388916, + "470": 1.4100149869918823, + "471": 1.4546879529953003, + "472": 1.489277720451355, + "473": 1.5010201930999756, + "474": 1.4384151697158813, + "475": 1.5987670421600342, + "476": 1.5114128589630127, + "477": 1.5496670007705688, + "478": 1.685840368270874, + "479": 1.6992778778076172, + "480": 1.5443183183670044, + "481": 1.3526712656021118, + "482": 1.2337331771850586, + "483": 1.2588777542114258, + "484": 1.3107843399047852, + "485": 1.352026104927063 + }, + "loss": { + "432": 2.3697707653045654, + "433": 2.352079391479492, + "434": 2.352780818939209, + "435": 2.3681323528289795, + "436": 2.3505606651306152, + "437": 2.347172737121582, + "438": 2.354395866394043, + "439": 2.3686418533325195, + "440": 2.3710150718688965, + "441": 2.3649792671203613, + "442": 2.321746349334717, + "443": 2.356660842895508, + "444": 2.3641395568847656, + "445": 2.3779103755950928, + "446": 2.336223602294922, + "447": 2.325019121170044, + "448": 2.317532539367676, + "449": 2.3175973892211914, + "450": 2.33904767036438, + "451": 2.314915895462036, + "452": 2.297523021697998, + "453": 2.3162975311279297, + "454": 2.3309273719787598, + "455": 2.3304758071899414, + "456": 2.325908660888672, + "457": 2.350257635116577, + "458": 2.346595048904419, + "459": 2.3654356002807617, + "460": 2.338963508605957, + "461": 2.3616232872009277, + "462": 2.332193374633789, + "463": 2.3296713829040527, + "464": 2.3327627182006836, + "465": 2.3536195755004883, + "466": 2.315582275390625, + "467": 2.3132290840148926, + "468": 2.3035173416137695, + "469": 2.317366123199463, + "470": 2.302553176879883, + "471": 2.3296689987182617, + "472": 2.325779438018799, + "473": 2.3288848400115967, + "474": 2.3056187629699707, + "475": 2.317692756652832, + "476": 2.304396390914917, + "477": 2.3225655555725098, + "478": 2.3388824462890625, + "479": 2.3441243171691895, + "480": 2.3449299335479736, + "481": 2.31307053565979, + "482": 2.2947025299072266, + "483": 2.292106866836548, + "484": 2.3105478286743164, + "485": 2.3324155807495117 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 0.0947783, + 0.094952, + 0.0940882, + 0.0946748, + 0.0952734, + 0.0952982, + 0.0943982, + 0.094136, + 0.0943547, + 0.0956267, + 0.0953974, + 0.0939714, + 0.0937921, + 0.0942864, + 0.0947467, + 0.0957036, + 0.0963053, + 0.0959997, + 0.0961161, + 0.0970449, + 0.0971337, + 0.0965729, + 0.0961179, + 0.0958283, + 0.0951594, + 0.0949601, + 0.0940231, + 0.0934658, + 0.0936859, + 0.0944944, + 0.094889, + 0.0951221, + 0.0948797, + 0.095509, + 0.0969534, + 0.0970592, + 0.0964336, + 0.0963294, + 0.0958614, + 0.0956556, + 0.0954488, + 0.095386, + 0.0957057, + 0.0947739, + 0.0952775, + 0.0950843, + 0.0942723, + 0.0941982, + 0.0951608, + 0.0961953, + 0.0967899, + 0.0966585, + 0.0964152, + 0.0962291 + ], + "train_epoch_time": 4.8405914306640625, + "train_loss": 2.303981245470526, + "train_score": 0.32044252157211306, + "val_loss": 2.368876140509079, + "val_score": 0.3031357642087816 + }, + { + "epoch": 9, + "grad_norm": 1.4082812070846558, + "learning_rate": 0.1, + "model_norm": 87.56199645996094, + "step_logs": { + "grad_norm": { + "486": 1.2864775657653809, + "487": 1.2649035453796387, + "488": 1.3978171348571777, + "489": 1.4597502946853638, + "490": 1.3774306774139404, + "491": 1.245689034461975, + "492": 1.214050531387329, + "493": 1.2781760692596436, + "494": 1.3233747482299805, + "495": 1.5580949783325195, + "496": 1.7756626605987549, + "497": 1.6769715547561646, + "498": 1.3581280708312988, + "499": 1.22910475730896, + "500": 1.1415377855300903, + "501": 1.0999529361724854, + "502": 1.2983170747756958, + "503": 1.6977559328079224, + "504": 1.830295443534851, + "505": 1.7282837629318237, + "506": 1.6115379333496094, + "507": 1.5725210905075073, + "508": 1.5280147790908813, + "509": 1.4653064012527466, + "510": 1.4042085409164429, + "511": 1.4489576816558838, + "512": 1.5666319131851196, + "513": 1.597487211227417, + "514": 1.3567883968353271, + "515": 1.2098150253295898, + "516": 1.2113839387893677, + "517": 1.4553508758544922, + "518": 1.5904985666275024, + "519": 1.6239534616470337, + "520": 1.5390082597732544, + "521": 1.5543642044067383, + "522": 1.6086974143981934, + "523": 1.4266736507415771, + "524": 1.320531964302063, + "525": 1.3625422716140747, + "526": 1.5260429382324219, + "527": 1.6582518815994263, + "528": 1.6923820972442627, + "529": 1.572224497795105, + "530": 1.6293480396270752, + "531": 1.664021611213684, + "532": 1.5834639072418213, + "533": 1.4302923679351807, + "534": 1.3162269592285156, + "535": 1.4563848972320557, + "536": 1.493881106376648, + "537": 1.4727799892425537, + "538": 1.3968660831451416, + "539": 1.4082812070846558 + }, + "loss": { + "486": 2.305068016052246, + "487": 2.299149990081787, + "488": 2.285799503326416, + "489": 2.3291726112365723, + "490": 2.2877230644226074, + "491": 2.323141574859619, + "492": 2.287027359008789, + "493": 2.3029165267944336, + "494": 2.308023452758789, + "495": 2.322627544403076, + "496": 2.3242156505584717, + "497": 2.332921028137207, + "498": 2.310991048812866, + "499": 2.2614879608154297, + "500": 2.2862839698791504, + "501": 2.2918813228607178, + "502": 2.2997522354125977, + "503": 2.3261070251464844, + "504": 2.3541808128356934, + "505": 2.3150224685668945, + "506": 2.3013603687286377, + "507": 2.321082592010498, + "508": 2.3025972843170166, + "509": 2.3107120990753174, + "510": 2.306215286254883, + "511": 2.294032573699951, + "512": 2.305708408355713, + "513": 2.330054521560669, + "514": 2.3123176097869873, + "515": 2.281512975692749, + "516": 2.277214527130127, + "517": 2.293863296508789, + "518": 2.3103156089782715, + "519": 2.304133415222168, + "520": 2.3078079223632812, + "521": 2.295689582824707, + "522": 2.3106651306152344, + "523": 2.3192057609558105, + "524": 2.2956135272979736, + "525": 2.3030943870544434, + "526": 2.298799991607666, + "527": 2.288393497467041, + "528": 2.2966222763061523, + "529": 2.2845492362976074, + "530": 2.3210959434509277, + "531": 2.30497407913208, + "532": 2.30173397064209, + "533": 2.306532859802246, + "534": 2.261338949203491, + "535": 2.2612709999084473, + "536": 2.282104969024658, + "537": 2.2685036659240723, + "538": 2.269813060760498, + "539": 2.2653915882110596 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 0.0965345, + 0.0966375, + 0.0959012, + 0.0956258, + 0.0960184, + 0.0967682, + 0.0968783, + 0.0965744, + 0.0963447, + 0.0950335, + 0.093648, + 0.0943154, + 0.0961624, + 0.0967679, + 0.0972291, + 0.0974283, + 0.0964648, + 0.0941658, + 0.0933576, + 0.0939397, + 0.0946589, + 0.0949425, + 0.0951747, + 0.0955603, + 0.0959003, + 0.0956243, + 0.0949467, + 0.0948081, + 0.0961718, + 0.0968921, + 0.0968785, + 0.095587, + 0.0948094, + 0.094587, + 0.0951189, + 0.0950009, + 0.0946971, + 0.0957963, + 0.0963409, + 0.0961257, + 0.0951789, + 0.0943324, + 0.0941304, + 0.0948676, + 0.0945906, + 0.0943338, + 0.0948347, + 0.0957537, + 0.0963107, + 0.0955201, + 0.0953384, + 0.0954373, + 0.0958789, + 0.0958063 + ], + "train_epoch_time": 4.841122388839722, + "train_loss": 2.2861214772529546, + "train_score": 0.32931088598249975, + "val_loss": 2.3297110687851497, + "val_score": 0.3184513128036473 + }, + { + "epoch": 10, + "grad_norm": 1.5447317361831665, + "learning_rate": 0.1, + "model_norm": 87.57910919189453, + "step_logs": { + "grad_norm": { + "540": 1.4496456384658813, + "541": 1.428707480430603, + "542": 1.3890002965927124, + "543": 1.3327672481536865, + "544": 1.3495399951934814, + "545": 1.4422661066055298, + "546": 1.4840099811553955, + "547": 1.4972702264785767, + "548": 1.4662714004516602, + "549": 1.5260636806488037, + "550": 1.3724501132965088, + "551": 1.175660252571106, + "552": 1.1435974836349487, + "553": 1.265898585319519, + "554": 1.3222699165344238, + "555": 1.3145053386688232, + "556": 1.4351242780685425, + "557": 1.523789405822754, + "558": 1.6304676532745361, + "559": 1.8852672576904297, + "560": 2.408698320388794, + "561": 1.6329765319824219, + "562": 1.2162243127822876, + "563": 1.349126935005188, + "564": 1.541892170906067, + "565": 1.5408660173416138, + "566": 1.482460379600525, + "567": 1.35426926612854, + "568": 1.4242154359817505, + "569": 1.4016752243041992, + "570": 1.491642713546753, + "571": 1.603078842163086, + "572": 1.5918983221054077, + "573": 1.5547316074371338, + "574": 1.5848643779754639, + "575": 1.6563140153884888, + "576": 1.4822837114334106, + "577": 1.3219908475875854, + "578": 1.351645827293396, + "579": 1.2938183546066284, + "580": 1.182682752609253, + "581": 1.1695317029953003, + "582": 1.1982420682907104, + "583": 1.2221649885177612, + "584": 1.2715436220169067, + "585": 1.2286652326583862, + "586": 1.1889058351516724, + "587": 1.1929199695587158, + "588": 1.2656303644180298, + "589": 1.4840818643569946, + "590": 1.5940841436386108, + "591": 1.5778687000274658, + "592": 1.5546985864639282, + "593": 1.5447317361831665 + }, + "loss": { + "540": 2.2956197261810303, + "541": 2.275216579437256, + "542": 2.293718099594116, + "543": 2.2971489429473877, + "544": 2.2522454261779785, + "545": 2.2696895599365234, + "546": 2.2697768211364746, + "547": 2.2869839668273926, + "548": 2.2796876430511475, + "549": 2.2783961296081543, + "550": 2.290621280670166, + "551": 2.2746129035949707, + "552": 2.248746395111084, + "553": 2.2493629455566406, + "554": 2.258431911468506, + "555": 2.281113862991333, + "556": 2.268069267272949, + "557": 2.2675845623016357, + "558": 2.2552695274353027, + "559": 2.310277223587036, + "560": 2.27936053276062, + "561": 2.3375160694122314, + "562": 2.2768137454986572, + "563": 2.259498119354248, + "564": 2.2936599254608154, + "565": 2.2451322078704834, + "566": 2.255136489868164, + "567": 2.245969533920288, + "568": 2.272033452987671, + "569": 2.2828240394592285, + "570": 2.2702159881591797, + "571": 2.2861099243164062, + "572": 2.2712438106536865, + "573": 2.2878823280334473, + "574": 2.2461040019989014, + "575": 2.2610225677490234, + "576": 2.251922845840454, + "577": 2.249603271484375, + "578": 2.254251718521118, + "579": 2.2607345581054688, + "580": 2.2414402961730957, + "581": 2.241957187652588, + "582": 2.242112398147583, + "583": 2.219125270843506, + "584": 2.249091148376465, + "585": 2.229884624481201, + "586": 2.224472999572754, + "587": 2.2105965614318848, + "588": 2.231696605682373, + "589": 2.2200279235839844, + "590": 2.259990692138672, + "591": 2.258254289627075, + "592": 2.2463722229003906, + "593": 2.2557711601257324 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 0.0956232, + 0.0957068, + 0.0959641, + 0.0962777, + 0.0961139, + 0.0956184, + 0.0953731, + 0.0953277, + 0.0954969, + 0.0951377, + 0.0960508, + 0.0970513, + 0.0971743, + 0.0965604, + 0.0962734, + 0.0963508, + 0.0956568, + 0.0951295, + 0.0944342, + 0.0928572, + 0.08871, + 0.0946038, + 0.0968538, + 0.0961282, + 0.0950727, + 0.094978, + 0.0953538, + 0.0960772, + 0.0957269, + 0.0958743, + 0.0953285, + 0.0946785, + 0.094716, + 0.0949825, + 0.0947046, + 0.0942803, + 0.0953485, + 0.0962609, + 0.0961056, + 0.0964299, + 0.0969742, + 0.0970398, + 0.0968975, + 0.0967441, + 0.0965303, + 0.0967259, + 0.0969207, + 0.0968817, + 0.0965355, + 0.0952739, + 0.0946773, + 0.0947756, + 0.0948947, + 0.0949766 + ], + "train_epoch_time": 4.840698480606079, + "train_loss": 2.2450779760927175, + "train_score": 0.34053084642507425, + "val_loss": 2.3052828722514733, + "val_score": 0.3245595938576349 + }, + { + "epoch": 11, + "grad_norm": 1.2617815732955933, + "learning_rate": 0.1, + "model_norm": 87.59596252441406, + "step_logs": { + "grad_norm": { + "594": 1.588753581047058, + "595": 1.5247321128845215, + "596": 1.5911153554916382, + "597": 1.775584101676941, + "598": 1.8242466449737549, + "599": 1.8073973655700684, + "600": 1.5275120735168457, + "601": 1.3717238903045654, + "602": 1.3489559888839722, + "603": 1.3506227731704712, + "604": 1.3558343648910522, + "605": 1.2104554176330566, + "606": 1.1499735116958618, + "607": 1.1806302070617676, + "608": 1.1823066473007202, + "609": 1.1349818706512451, + "610": 1.1166205406188965, + "611": 1.16506826877594, + "612": 1.3397775888442993, + "613": 1.5055805444717407, + "614": 1.5274930000305176, + "615": 1.457973599433899, + "616": 1.5001298189163208, + "617": 1.4768356084823608, + "618": 1.4328608512878418, + "619": 1.4925471544265747, + "620": 1.7491965293884277, + "621": 1.9343494176864624, + "622": 1.8554956912994385, + "623": 1.6830286979675293, + "624": 1.617335557937622, + "625": 1.7890456914901733, + "626": 1.7231693267822266, + "627": 1.5459706783294678, + "628": 1.5641156435012817, + "629": 1.757225513458252, + "630": 1.7565991878509521, + "631": 1.540608525276184, + "632": 1.3137248754501343, + "633": 1.271006464958191, + "634": 1.3354873657226562, + "635": 1.3851101398468018, + "636": 1.3646599054336548, + "637": 1.2995293140411377, + "638": 1.2750593423843384, + "639": 1.447117805480957, + "640": 1.653588891029358, + "641": 1.9522098302841187, + "642": 1.6688272953033447, + "643": 1.3668769598007202, + "644": 1.4728426933288574, + "645": 1.5510808229446411, + "646": 1.4057600498199463, + "647": 1.2617815732955933 + }, + "loss": { + "594": 2.248753547668457, + "595": 2.270047187805176, + "596": 2.249763011932373, + "597": 2.2486512660980225, + "598": 2.2625999450683594, + "599": 2.257965087890625, + "600": 2.231330394744873, + "601": 2.2484304904937744, + "602": 2.23899507522583, + "603": 2.2469663619995117, + "604": 2.2197325229644775, + "605": 2.2334465980529785, + "606": 2.215475559234619, + "607": 2.2137811183929443, + "608": 2.1991682052612305, + "609": 2.2368717193603516, + "610": 2.2260210514068604, + "611": 2.194429397583008, + "612": 2.232891321182251, + "613": 2.225104570388794, + "614": 2.2434089183807373, + "615": 2.240908145904541, + "616": 2.2376346588134766, + "617": 2.262025833129883, + "618": 2.267366409301758, + "619": 2.251049518585205, + "620": 2.246934413909912, + "621": 2.285754442214966, + "622": 2.2500481605529785, + "623": 2.253274917602539, + "624": 2.2515296936035156, + "625": 2.238271713256836, + "626": 2.2604024410247803, + "627": 2.2659225463867188, + "628": 2.2380685806274414, + "629": 2.248012065887451, + "630": 2.2320353984832764, + "631": 2.2373127937316895, + "632": 2.2257747650146484, + "633": 2.21968936920166, + "634": 2.200162887573242, + "635": 2.205569267272949, + "636": 2.226161003112793, + "637": 2.232771158218384, + "638": 2.227863073348999, + "639": 2.191601037979126, + "640": 2.2179627418518066, + "641": 2.2556052207946777, + "642": 2.2579398155212402, + "643": 2.2348270416259766, + "644": 2.222165584564209, + "645": 2.2351973056793213, + "646": 2.2275376319885254, + "647": 2.2061872482299805 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 0.0946859, + 0.0951288, + 0.0946732, + 0.093449, + 0.0931497, + 0.0932543, + 0.0950313, + 0.0959838, + 0.0960951, + 0.0960991, + 0.0960239, + 0.096824, + 0.0971019, + 0.0969479, + 0.0969198, + 0.0972012, + 0.0972757, + 0.097, + 0.0961359, + 0.0951532, + 0.0950569, + 0.0954719, + 0.0952123, + 0.0954007, + 0.0956686, + 0.0952852, + 0.0936254, + 0.0924344, + 0.0928931, + 0.0940862, + 0.09451, + 0.0933272, + 0.0938367, + 0.0949904, + 0.0948177, + 0.0935734, + 0.0935347, + 0.0949629, + 0.0962677, + 0.0964888, + 0.0961047, + 0.095832, + 0.0959852, + 0.096356, + 0.0964797, + 0.0954402, + 0.0941938, + 0.09221, + 0.0941911, + 0.0959876, + 0.0953462, + 0.0948931, + 0.0957526, + 0.0965174 + ], + "train_epoch_time": 4.84069037437439, + "train_loss": 2.2040659528893753, + "train_score": 0.3523459916306363, + "val_loss": 2.2799425461809615, + "val_score": 0.335758109351387 + }, + { + "epoch": 12, + "grad_norm": 0.9351850748062134, + "learning_rate": 0.1, + "model_norm": 87.61099243164062, + "step_logs": { + "grad_norm": { + "648": 1.3094134330749512, + "649": 1.3486777544021606, + "650": 1.4072928428649902, + "651": 1.5142947435379028, + "652": 1.5094581842422485, + "653": 1.3887529373168945, + "654": 1.4224172830581665, + "655": 1.5083428621292114, + "656": 1.5954478979110718, + "657": 1.5511927604675293, + "658": 1.4117122888565063, + "659": 1.3409422636032104, + "660": 1.2231510877609253, + "661": 1.237666368484497, + "662": 1.2927314043045044, + "663": 1.2787615060806274, + "664": 1.2403388023376465, + "665": 1.2749487161636353, + "666": 1.264051914215088, + "667": 1.1782317161560059, + "668": 1.133363962173462, + "669": 1.0662480592727661, + "670": 1.0082484483718872, + "671": 0.9322500824928284, + "672": 0.9343487024307251, + "673": 1.0088428258895874, + "674": 1.0157840251922607, + "675": 0.8826058506965637, + "676": 0.8474686741828918, + "677": 0.8494150042533875, + "678": 0.9389997720718384, + "679": 1.0475910902023315, + "680": 1.0899783372879028, + "681": 1.17728590965271, + "682": 1.20793616771698, + "683": 1.1445024013519287, + "684": 1.1728129386901855, + "685": 1.3455970287322998, + "686": 1.2690045833587646, + "687": 1.0965877771377563, + "688": 1.0773963928222656, + "689": 1.0435794591903687, + "690": 1.0178760290145874, + "691": 1.0164440870285034, + "692": 1.0642462968826294, + "693": 1.0559436082839966, + "694": 0.9758284091949463, + "695": 1.0340592861175537, + "696": 1.075991153717041, + "697": 1.0048353672027588, + "698": 1.035723090171814, + "699": 1.1538376808166504, + "700": 1.0841097831726074, + "701": 0.9351850748062134 + }, + "loss": { + "648": 2.202803611755371, + "649": 2.2173171043395996, + "650": 2.181130886077881, + "651": 2.2152657508850098, + "652": 2.229952812194824, + "653": 2.2089314460754395, + "654": 2.208660840988159, + "655": 2.2242431640625, + "656": 2.226830005645752, + "657": 2.2219443321228027, + "658": 2.209901809692383, + "659": 2.211392879486084, + "660": 2.195700168609619, + "661": 2.1754508018493652, + "662": 2.2183332443237305, + "663": 2.2171554565429688, + "664": 2.198988437652588, + "665": 2.209177017211914, + "666": 2.1785616874694824, + "667": 2.16093373298645, + "668": 2.1865477561950684, + "669": 2.1781415939331055, + "670": 2.1762661933898926, + "671": 2.1845035552978516, + "672": 2.1780917644500732, + "673": 2.1550164222717285, + "674": 2.176767587661743, + "675": 2.1646058559417725, + "676": 2.134427547454834, + "677": 2.1593589782714844, + "678": 2.1512317657470703, + "679": 2.180830478668213, + "680": 2.173267364501953, + "681": 2.1815433502197266, + "682": 2.1676177978515625, + "683": 2.177546977996826, + "684": 2.179234504699707, + "685": 2.1747994422912598, + "686": 2.195272445678711, + "687": 2.203248977661133, + "688": 2.168781280517578, + "689": 2.156891345977783, + "690": 2.158818006515503, + "691": 2.1580328941345215, + "692": 2.140256404876709, + "693": 2.1329727172851562, + "694": 2.1519196033477783, + "695": 2.170727252960205, + "696": 2.1416468620300293, + "697": 2.178642749786377, + "698": 2.1451587677001953, + "699": 2.1795005798339844, + "700": 2.166288375854492, + "701": 2.142151355743408 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 0.096254, + 0.0954902, + 0.0945269, + 0.0934034, + 0.0929019, + 0.0929798, + 0.0922284, + 0.0912155, + 0.090163, + 0.0898497, + 0.0900187, + 0.0898066, + 0.0897611, + 0.0890904, + 0.0883188, + 0.0878027, + 0.0873691, + 0.0866528, + 0.0860829, + 0.0858378, + 0.0854541, + 0.0851039, + 0.08471, + 0.0843626, + 0.0837553, + 0.0829122, + 0.0823128, + 0.0821022, + 0.0815807, + 0.080988, + 0.0801432, + 0.0792517, + 0.0785245, + 0.077665, + 0.0769656, + 0.0765892, + 0.0759144, + 0.0747592, + 0.074453, + 0.0743847, + 0.0738209, + 0.073309, + 0.0727804, + 0.0721875, + 0.0714622, + 0.0708841, + 0.0704882, + 0.0697678, + 0.0690569, + 0.0686436, + 0.067961, + 0.067114, + 0.0666731, + 0.0663723 + ], + "train_epoch_time": 4.840441703796387, + "train_loss": 2.145929629012536, + "train_score": 0.36693754481790397, + "val_loss": 2.2230640298052853, + "val_score": 0.34471423012107166 + }, + { + "epoch": 13, + "grad_norm": 0.7406230568885803, + "learning_rate": 0.06666666666666668, + "model_norm": 87.62065124511719, + "step_logs": { + "grad_norm": { + "702": 0.9913109540939331, + "703": 0.9523283839225769, + "704": 0.8870086669921875, + "705": 0.7779407501220703, + "706": 0.7760103940963745, + "707": 0.8092449903488159, + "708": 0.8593055009841919, + "709": 0.8720701932907104, + "710": 0.7881453037261963, + "711": 0.813335120677948, + "712": 0.8608036041259766, + "713": 0.9568566679954529, + "714": 0.9770629405975342, + "715": 0.9285096526145935, + "716": 0.8567588925361633, + "717": 0.7760965824127197, + "718": 0.7677116990089417, + "719": 0.768796443939209, + "720": 0.8074935674667358, + "721": 0.8065921664237976, + "722": 0.8056923747062683, + "723": 0.7458757162094116, + "724": 0.7424354553222656, + "725": 0.8486406207084656, + "726": 0.8646981716156006, + "727": 0.902239978313446, + "728": 0.8873850107192993, + "729": 0.8135435581207275, + "730": 0.7287591695785522, + "731": 0.7761465311050415, + "732": 0.7969075441360474, + "733": 0.7741085290908813, + "734": 0.7060049772262573, + "735": 0.6913495063781738, + "736": 0.7907267212867737, + "737": 0.8001796007156372, + "738": 0.7330057621002197, + "739": 0.7804092764854431, + "740": 0.7430421113967896, + "741": 0.7955508828163147, + "742": 0.7392135262489319, + "743": 0.8314844369888306, + "744": 0.8225041031837463, + "745": 0.7510942816734314, + "746": 0.7440597414970398, + "747": 0.7147427201271057, + "748": 0.6901541352272034, + "749": 0.737075924873352, + "750": 0.7721534371376038, + "751": 0.7181875109672546, + "752": 0.7882216572761536, + "753": 0.7213799953460693, + "754": 0.7337266802787781, + "755": 0.7406230568885803 + }, + "loss": { + "702": 2.136104106903076, + "703": 2.1402642726898193, + "704": 2.127439498901367, + "705": 2.1252241134643555, + "706": 2.1404170989990234, + "707": 2.1388142108917236, + "708": 2.1538124084472656, + "709": 2.137284278869629, + "710": 2.139042854309082, + "711": 2.140145778656006, + "712": 2.143686294555664, + "713": 2.150625705718994, + "714": 2.1344122886657715, + "715": 2.156564950942993, + "716": 2.126880168914795, + "717": 2.1501851081848145, + "718": 2.144930362701416, + "719": 2.1238722801208496, + "720": 2.1252119541168213, + "721": 2.1325180530548096, + "722": 2.1329076290130615, + "723": 2.1131508350372314, + "724": 2.1258697509765625, + "725": 2.1209659576416016, + "726": 2.1325011253356934, + "727": 2.1062605381011963, + "728": 2.1219520568847656, + "729": 2.1355714797973633, + "730": 2.1436898708343506, + "731": 2.1131651401519775, + "732": 2.1354570388793945, + "733": 2.1211252212524414, + "734": 2.110975503921509, + "735": 2.1173393726348877, + "736": 2.122076988220215, + "737": 2.099818229675293, + "738": 2.115570068359375, + "739": 2.1291046142578125, + "740": 2.133324146270752, + "741": 2.11545467376709, + "742": 2.144829273223877, + "743": 2.1413776874542236, + "744": 2.1241583824157715, + "745": 2.1375081539154053, + "746": 2.114259719848633, + "747": 2.122079849243164, + "748": 2.10994291305542, + "749": 2.078666925430298, + "750": 2.1118717193603516, + "751": 2.103835344314575, + "752": 2.102637767791748, + "753": 2.1102163791656494, + "754": 2.105670928955078, + "755": 2.1023080348968506 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 0.0656598, + 0.0651378, + 0.0646499, + 0.0642221, + 0.063623, + 0.0629673, + 0.0622907, + 0.0616617, + 0.06118, + 0.0605393, + 0.0598679, + 0.059123, + 0.0584842, + 0.0579626, + 0.0574495, + 0.0569495, + 0.0563505, + 0.0557372, + 0.0550861, + 0.0544817, + 0.0538756, + 0.0533267, + 0.0527236, + 0.0520059, + 0.0513848, + 0.0507323, + 0.0501463, + 0.0496156, + 0.0490825, + 0.0484288, + 0.0478059, + 0.0472139, + 0.0466552, + 0.0460556, + 0.0453737, + 0.0447543, + 0.044195, + 0.0435541, + 0.0429696, + 0.0423229, + 0.0417521, + 0.0410837, + 0.0404781, + 0.0399121, + 0.0393029, + 0.0387077, + 0.038107, + 0.0374699, + 0.0368444, + 0.0362579, + 0.0356141, + 0.0350332, + 0.0344158, + 0.0338009 + ], + "train_epoch_time": 4.840989112854004, + "train_loss": 2.1088773102124074, + "train_score": 0.37540127322013617, + "val_loss": 2.190773714560455, + "val_score": 0.35245048770537746 + }, + { + "epoch": 14, + "grad_norm": 0.6430807709693909, + "learning_rate": 0.03333333333333334, + "model_norm": 87.62384033203125, + "step_logs": { + "grad_norm": { + "756": 0.7788354158401489, + "757": 0.7368332743644714, + "758": 0.7312880158424377, + "759": 0.7624044418334961, + "760": 0.7455044388771057, + "761": 0.6700214147567749, + "762": 0.6613816618919373, + "763": 0.6746414303779602, + "764": 0.7023569345474243, + "765": 0.7054687142372131, + "766": 0.7379094362258911, + "767": 0.6996170878410339, + "768": 0.7023919224739075, + "769": 0.6621960401535034, + "770": 0.6923285722732544, + "771": 0.6755625009536743, + "772": 0.6708892583847046, + "773": 0.7081537246704102, + "774": 0.6640799641609192, + "775": 0.6700623631477356, + "776": 0.6931478977203369, + "777": 0.6490085124969482, + "778": 0.6820791363716125, + "779": 0.6616635918617249, + "780": 0.6551328897476196, + "781": 0.6683677434921265, + "782": 0.7173438668251038, + "783": 0.7942193746566772, + "784": 0.6706562042236328, + "785": 0.657605767250061, + "786": 0.6927198171615601, + "787": 0.721331000328064, + "788": 0.7300679683685303, + "789": 0.6800004243850708, + "790": 0.6603149175643921, + "791": 0.684819757938385, + "792": 0.6196650266647339, + "793": 0.6337314248085022, + "794": 0.6710205078125, + "795": 0.6451219320297241, + "796": 0.6872656345367432, + "797": 0.6542031764984131, + "798": 0.6223916411399841, + "799": 0.6473522782325745, + "800": 0.6199365854263306, + "801": 0.6282517910003662, + "802": 0.6723541617393494, + "803": 0.6211723685264587, + "804": 0.6353791952133179, + "805": 0.6930897831916809, + "806": 0.6347803473472595, + "807": 0.6613824963569641, + "808": 0.7201979756355286, + "809": 0.6430807709693909 + }, + "loss": { + "756": 2.1183676719665527, + "757": 2.1099693775177, + "758": 2.113245964050293, + "759": 2.110524892807007, + "760": 2.0794217586517334, + "761": 2.114016056060791, + "762": 2.1083898544311523, + "763": 2.098907470703125, + "764": 2.09934139251709, + "765": 2.1003122329711914, + "766": 2.1237707138061523, + "767": 2.1085312366485596, + "768": 2.1284918785095215, + "769": 2.0959107875823975, + "770": 2.105087995529175, + "771": 2.086397647857666, + "772": 2.1112520694732666, + "773": 2.1046180725097656, + "774": 2.1233530044555664, + "775": 2.11547589302063, + "776": 2.0912961959838867, + "777": 2.093679904937744, + "778": 2.120704412460327, + "779": 2.1068384647369385, + "780": 2.110521078109741, + "781": 2.0921778678894043, + "782": 2.083437442779541, + "783": 2.1080901622772217, + "784": 2.100724697113037, + "785": 2.1047897338867188, + "786": 2.0874509811401367, + "787": 2.111093044281006, + "788": 2.0974903106689453, + "789": 2.122121810913086, + "790": 2.084350109100342, + "791": 2.115586519241333, + "792": 2.0995631217956543, + "793": 2.1061604022979736, + "794": 2.1131274700164795, + "795": 2.096116304397583, + "796": 2.096200466156006, + "797": 2.1056478023529053, + "798": 2.0977399349212646, + "799": 2.088071823120117, + "800": 2.08858585357666, + "801": 2.0939793586730957, + "802": 2.089242935180664, + "803": 2.0822339057922363, + "804": 2.08237886428833, + "805": 2.103977680206299, + "806": 2.0992512702941895, + "807": 2.084773540496826, + "808": 2.124077558517456, + "809": 2.1066696643829346 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 0.033175, + 0.0325789, + 0.0319689, + 0.0313456, + 0.0307374, + 0.0301501, + 0.0295388, + 0.0289214, + 0.0283006, + 0.0276867, + 0.0270663, + 0.0264617, + 0.0258483, + 0.0252418, + 0.0246221, + 0.0240109, + 0.0233983, + 0.0227775, + 0.0221711, + 0.0215555, + 0.0209372, + 0.0203287, + 0.0197104, + 0.0190978, + 0.0184837, + 0.0178671, + 0.0172471, + 0.0166252, + 0.0160219, + 0.0154077, + 0.0147896, + 0.0141727, + 0.0135569, + 0.0129447, + 0.0123298, + 0.0117132, + 0.0110998, + 0.0104833, + 0.00986616, + 0.00925076, + 0.00863357, + 0.00801815, + 0.00740235, + 0.0067855, + 0.00616934, + 0.00555265, + 0.00493563, + 0.00431926, + 0.00370237, + 0.00308533, + 0.00246855, + 0.00185149, + 0.00123438, + 0.000617247 + ], + "train_epoch_time": 4.840726137161255, + "train_loss": 2.096882079287273, + "train_score": 0.37770355095822294, + "val_loss": 2.181603769210395, + "val_score": 0.3543924005652952 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:21:58.660459", + "final_model_norm": 87.62384033203125, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:20:16.888493", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.7124191522598267, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.42977142333984, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 9.073990821838379, + "3": 5.383543014526367, + "4": 4.2085280418396, + "5": 7.449563980102539, + "6": 21.270666122436523, + "7": 8.745401382446289, + "8": 5.525934219360352, + "9": 4.254435062408447, + "10": 3.7144956588745117, + "11": 6.272667407989502, + "12": 6.959863185882568, + "13": 3.8103017807006836, + "14": 5.224253177642822, + "15": 6.990877628326416, + "16": 3.912477970123291, + "17": 5.975802898406982, + "18": 16.006542205810547, + "19": 4.558985710144043, + "20": 26.973201751708984, + "21": 4.577878952026367, + "22": 7.547962188720703, + "23": 6.881000518798828, + "24": 3.721916437149048, + "25": 3.037304401397705, + "26": 3.674651622772217, + "27": 3.39523983001709, + "28": 4.316905975341797, + "29": 4.059088706970215, + "30": 3.932001829147339, + "31": 2.15552020072937, + "32": 2.463045358657837, + "33": 3.3573660850524902, + "34": 3.1214869022369385, + "35": 2.7118217945098877, + "36": 3.5217769145965576, + "37": 4.170468807220459, + "38": 4.609957218170166, + "39": 3.713710069656372, + "40": 25.984375, + "41": 2.281721830368042, + "42": 3.208026647567749, + "43": 3.073312520980835, + "44": 3.6890244483947754, + "45": 3.754546642303467, + "46": 3.4417636394500732, + "47": 2.5035364627838135, + "48": 2.6139848232269287, + "49": 2.6245594024658203, + "50": 2.4692258834838867, + "51": 3.0870068073272705, + "52": 2.605820894241333, + "53": 1.7124191522598267 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.969463348388672, + "3": 3.7336039543151855, + "4": 3.6111388206481934, + "5": 3.5463061332702637, + "6": 3.959855556488037, + "7": 3.9910166263580322, + "8": 3.5273513793945312, + "9": 3.487791061401367, + "10": 3.4190897941589355, + "11": 3.400611400604248, + "12": 3.5444602966308594, + "13": 3.259803056716919, + "14": 3.2384300231933594, + "15": 3.2085137367248535, + "16": 3.2316131591796875, + "17": 3.1274282932281494, + "18": 3.285050630569458, + "19": 3.1542654037475586, + "20": 3.476672649383545, + "21": 3.187765598297119, + "22": 3.2425570487976074, + "23": 3.4823873043060303, + "24": 3.005009174346924, + "25": 2.9901604652404785, + "26": 2.8855042457580566, + "27": 2.97104811668396, + "28": 2.924663543701172, + "29": 2.910982131958008, + "30": 3.047665596008301, + "31": 2.7814574241638184, + "32": 2.8077807426452637, + "33": 2.804180860519409, + "34": 2.8649051189422607, + "35": 2.7817745208740234, + "36": 2.811476230621338, + "37": 2.8675894737243652, + "38": 2.974514961242676, + "39": 2.815462112426758, + "40": 3.4331846237182617, + "41": 2.7526211738586426, + "42": 2.756105422973633, + "43": 2.779527187347412, + "44": 2.8949084281921387, + "45": 2.8271660804748535, + "46": 2.934957981109619, + "47": 2.736778736114502, + "48": 2.7559447288513184, + "49": 2.750572919845581, + "50": 2.771946430206299, + "51": 2.7134382724761963, + "52": 2.85939359664917, + "53": 2.663601875305176 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 1e-11, + 0.00179479, + 0.00384067, + 0.00586345, + 0.00784607, + 0.00927433, + 0.00711937, + 0.0123441, + 0.0149637, + 0.0171968, + 0.0192242, + 0.0195161, + 0.0206186, + 0.024577, + 0.025045, + 0.0244204, + 0.0297456, + 0.028473, + 0.0149759, + 0.0337719, + 0.00771405, + 0.036905, + 0.0317337, + 0.0350418, + 0.0432185, + 0.0464197, + 0.0463594, + 0.0488794, + 0.0475215, + 0.0498222, + 0.0520748, + 0.0589475, + 0.0598612, + 0.0582705, + 0.0609518, + 0.0640716, + 0.0621324, + 0.060437, + 0.0597722, + 0.0654888, + 0.00902262, + 0.0760988, + 0.0726122, + 0.0750357, + 0.0729175, + 0.0735068, + 0.0775939, + 0.0848653, + 0.0857903, + 0.0872887, + 0.0900919, + 0.0850629, + 0.0893865, + 0.0947827 + ], + "train_epoch_time": 4.84323263168335, + "train_loss": 2.63499828616379, + "train_score": 0.25480967533468685, + "val_loss": 2.6631805685987153, + "val_score": 0.250995623018383 + }, + { + "epoch": 1, + "grad_norm": 1.8427996635437012, + "learning_rate": 0.1, + "model_norm": 87.4466781616211, + "step_logs": { + "grad_norm": { + "54": 1.7451666593551636, + "55": 2.056056022644043, + "56": 2.4911372661590576, + "57": 3.8065311908721924, + "58": 2.7564525604248047, + "59": 1.779683232307434, + "60": 1.465343952178955, + "61": 1.544689655303955, + "62": 2.3603627681732178, + "63": 2.182406187057495, + "64": 1.9513694047927856, + "65": 2.2473866939544678, + "66": 2.59765625, + "67": 2.127577781677246, + "68": 1.5903594493865967, + "69": 1.9788583517074585, + "70": 2.8934824466705322, + "71": 2.4394099712371826, + "72": 1.713663101196289, + "73": 1.8603097200393677, + "74": 2.2458744049072266, + "75": 2.239811658859253, + "76": 2.100458860397339, + "77": 1.9803333282470703, + "78": 1.9503154754638672, + "79": 2.045192241668701, + "80": 1.9893498420715332, + "81": 1.8273718357086182, + "82": 1.7364015579223633, + "83": 1.7854942083358765, + "84": 1.8950395584106445, + "85": 1.85012686252594, + "86": 1.5653562545776367, + "87": 1.734098196029663, + "88": 2.142047166824341, + "89": 2.030580759048462, + "90": 1.7446129322052002, + "91": 1.810481071472168, + "92": 1.9859819412231445, + "93": 1.8290988206863403, + "94": 1.5478321313858032, + "95": 1.6295554637908936, + "96": 2.0320796966552734, + "97": 2.0534605979919434, + "98": 1.738858699798584, + "99": 1.7014224529266357, + "100": 1.842415452003479, + "101": 1.8293942213058472, + "102": 2.0938055515289307, + "103": 1.8832848072052002, + "104": 1.3503742218017578, + "105": 1.5346651077270508, + "106": 2.1024107933044434, + "107": 1.8427996635437012 + }, + "loss": { + "54": 2.6397061347961426, + "55": 2.6529054641723633, + "56": 2.7125747203826904, + "57": 2.7431840896606445, + "58": 2.9066057205200195, + "59": 2.655151844024658, + "60": 2.6396517753601074, + "61": 2.588855504989624, + "62": 2.634075164794922, + "63": 2.72806978225708, + "64": 2.6072847843170166, + "65": 2.684499740600586, + "66": 2.6587204933166504, + "67": 2.7239718437194824, + "68": 2.5890772342681885, + "69": 2.6363325119018555, + "70": 2.653327703475952, + "71": 2.7713708877563477, + "72": 2.61672306060791, + "73": 2.6043858528137207, + "74": 2.624462127685547, + "75": 2.6916556358337402, + "76": 2.6168556213378906, + "77": 2.6732101440429688, + "78": 2.5770888328552246, + "79": 2.65824031829834, + "80": 2.5798745155334473, + "81": 2.6446685791015625, + "82": 2.5722827911376953, + "83": 2.595245361328125, + "84": 2.584846019744873, + "85": 2.6358859539031982, + "86": 2.5438292026519775, + "87": 2.5701117515563965, + "88": 2.5886170864105225, + "89": 2.6534128189086914, + "90": 2.5985097885131836, + "91": 2.5908732414245605, + "92": 2.6019034385681152, + "93": 2.5985682010650635, + "94": 2.554375648498535, + "95": 2.5828542709350586, + "96": 2.591590404510498, + "97": 2.619809150695801, + "98": 2.5770506858825684, + "99": 2.5957045555114746, + "100": 2.574801445007324, + "101": 2.597097396850586, + "102": 2.5735721588134766, + "103": 2.620408535003662, + "104": 2.5321273803710938, + "105": 2.5520882606506348, + "106": 2.5506420135498047, + "107": 2.63661527633667 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 0.0945458, + 0.0926205, + 0.0897353, + 0.0791075, + 0.0884406, + 0.0943713, + 0.0960917, + 0.0955947, + 0.090436, + 0.0919714, + 0.0931946, + 0.0914016, + 0.088739, + 0.0923286, + 0.095343, + 0.0930867, + 0.086373, + 0.0903048, + 0.0946868, + 0.0937699, + 0.091233, + 0.0914753, + 0.0922255, + 0.0931661, + 0.0931273, + 0.0927062, + 0.0928764, + 0.0940617, + 0.0944637, + 0.0942134, + 0.0935046, + 0.0939029, + 0.0954051, + 0.0944732, + 0.0918589, + 0.0927904, + 0.0944674, + 0.0940506, + 0.0929547, + 0.0939519, + 0.0955205, + 0.0951108, + 0.0926211, + 0.0925517, + 0.0944586, + 0.0947183, + 0.0938159, + 0.0939469, + 0.0921511, + 0.0936614, + 0.0965244, + 0.0955893, + 0.0920262, + 0.0939497 + ], + "train_epoch_time": 4.839537858963013, + "train_loss": 2.53486723133618, + "train_score": 0.25927636303552765, + "val_loss": 2.5695702014095336, + "val_score": 0.25444442485421725 + }, + { + "epoch": 2, + "grad_norm": 1.6176575422286987, + "learning_rate": 0.1, + "model_norm": 87.46025085449219, + "step_logs": { + "grad_norm": { + "108": 1.3904390335083008, + "109": 1.5651144981384277, + "110": 1.8946231603622437, + "111": 1.9631491899490356, + "112": 2.0436851978302, + "113": 1.9051377773284912, + "114": 1.6877081394195557, + "115": 1.7305121421813965, + "116": 1.7928662300109863, + "117": 1.7490925788879395, + "118": 1.9426746368408203, + "119": 1.9647597074508667, + "120": 1.7538739442825317, + "121": 1.7286008596420288, + "122": 1.7098926305770874, + "123": 1.7366149425506592, + "124": 1.7650038003921509, + "125": 1.6990960836410522, + "126": 1.5044797658920288, + "127": 1.4832885265350342, + "128": 1.6062524318695068, + "129": 1.4627171754837036, + "130": 1.320543885231018, + "131": 1.363011360168457, + "132": 1.5314080715179443, + "133": 1.6076903343200684, + "134": 1.5600913763046265, + "135": 1.6323246955871582, + "136": 1.8003880977630615, + "137": 1.782508373260498, + "138": 1.7450324296951294, + "139": 1.6873091459274292, + "140": 1.5233711004257202, + "141": 1.6176234483718872, + "142": 1.7869263887405396, + "143": 1.7575515508651733, + "144": 1.6200042963027954, + "145": 1.588592529296875, + "146": 1.48301362991333, + "147": 1.372484803199768, + "148": 1.4073752164840698, + "149": 1.5164399147033691, + "150": 1.606526494026184, + "151": 1.5409373044967651, + "152": 1.6272214651107788, + "153": 1.6304783821105957, + "154": 1.5988147258758545, + "155": 1.5830514430999756, + "156": 1.49552583694458, + "157": 1.4493581056594849, + "158": 1.743516445159912, + "159": 1.7905282974243164, + "160": 1.6307268142700195, + "161": 1.6176575422286987 + }, + "loss": { + "108": 2.539473533630371, + "109": 2.5351099967956543, + "110": 2.5705795288085938, + "111": 2.613246440887451, + "112": 2.556335926055908, + "113": 2.5967390537261963, + "114": 2.546572685241699, + "115": 2.5725255012512207, + "116": 2.581244945526123, + "117": 2.5693461894989014, + "118": 2.565413475036621, + "119": 2.603050708770752, + "120": 2.553065299987793, + "121": 2.5703039169311523, + "122": 2.535938024520874, + "123": 2.574498414993286, + "124": 2.5499210357666016, + "125": 2.5706706047058105, + "126": 2.546192169189453, + "127": 2.5553641319274902, + "128": 2.531917095184326, + "129": 2.5682835578918457, + "130": 2.5035579204559326, + "131": 2.531642436981201, + "132": 2.5284790992736816, + "133": 2.5431647300720215, + "134": 2.545612335205078, + "135": 2.5161311626434326, + "136": 2.537336826324463, + "137": 2.5615382194519043, + "138": 2.5330071449279785, + "139": 2.5430996417999268, + "140": 2.5300376415252686, + "141": 2.5412092208862305, + "142": 2.5488288402557373, + "143": 2.566938877105713, + "144": 2.5280909538269043, + "145": 2.5661096572875977, + "146": 2.533201217651367, + "147": 2.521578311920166, + "148": 2.5038695335388184, + "149": 2.5267040729522705, + "150": 2.513009548187256, + "151": 2.5295653343200684, + "152": 2.5126407146453857, + "153": 2.5354621410369873, + "154": 2.5191593170166016, + "155": 2.5349979400634766, + "156": 2.5124080181121826, + "157": 2.51773738861084, + "158": 2.5108580589294434, + "159": 2.560356616973877, + "160": 2.509505033493042, + "161": 2.5461692810058594 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 0.096333, + 0.0953913, + 0.0934736, + 0.0931325, + 0.0924478, + 0.0934678, + 0.0947037, + 0.0944997, + 0.0941386, + 0.094381, + 0.0931485, + 0.0930969, + 0.094318, + 0.0945067, + 0.0945496, + 0.094467, + 0.0942432, + 0.0946834, + 0.0957444, + 0.0958727, + 0.095152, + 0.0960012, + 0.0966345, + 0.0964607, + 0.095568, + 0.0951641, + 0.0954376, + 0.0949715, + 0.0939961, + 0.0941602, + 0.0943299, + 0.0946992, + 0.0956149, + 0.0951036, + 0.0941054, + 0.0943246, + 0.0950656, + 0.0953132, + 0.0958396, + 0.0963993, + 0.0961952, + 0.0956475, + 0.0951157, + 0.0955169, + 0.0949947, + 0.0950186, + 0.0951714, + 0.0952899, + 0.0957386, + 0.0959954, + 0.0942921, + 0.0941081, + 0.0949682, + 0.0951124 + ], + "train_epoch_time": 4.839670896530151, + "train_loss": 2.51940996534, + "train_score": 0.24754976687506589, + "val_loss": 2.560662880830732, + "val_score": 0.24263149395533737 + }, + { + "epoch": 3, + "grad_norm": 1.4900684356689453, + "learning_rate": 0.1, + "model_norm": 87.47274017333984, + "step_logs": { + "grad_norm": { + "162": 1.598097324371338, + "163": 1.525174617767334, + "164": 1.6116458177566528, + "165": 1.6965845823287964, + "166": 1.6861199140548706, + "167": 1.5659401416778564, + "168": 1.4150835275650024, + "169": 1.340960144996643, + "170": 1.3616056442260742, + "171": 1.38835608959198, + "172": 1.3906810283660889, + "173": 1.3606905937194824, + "174": 1.4589627981185913, + "175": 1.536238670349121, + "176": 1.693096399307251, + "177": 1.6456454992294312, + "178": 1.5588834285736084, + "179": 1.5264568328857422, + "180": 1.4997258186340332, + "181": 1.420569896697998, + "182": 1.3450911045074463, + "183": 1.334816575050354, + "184": 1.4066290855407715, + "185": 1.4533334970474243, + "186": 1.52266263961792, + "187": 1.6736797094345093, + "188": 1.664509654045105, + "189": 1.6850454807281494, + "190": 1.5338938236236572, + "191": 1.357615351676941, + "192": 1.2970796823501587, + "193": 1.2680583000183105, + "194": 1.1891568899154663, + "195": 1.2309643030166626, + "196": 1.3531808853149414, + "197": 1.4047720432281494, + "198": 1.4748094081878662, + "199": 1.4856345653533936, + "200": 1.4025931358337402, + "201": 1.3785326480865479, + "202": 1.4687319993972778, + "203": 1.4839376211166382, + "204": 1.5141972303390503, + "205": 1.5908265113830566, + "206": 1.474566102027893, + "207": 1.4316205978393555, + "208": 1.5760974884033203, + "209": 1.6366726160049438, + "210": 1.7205854654312134, + "211": 1.639279842376709, + "212": 1.4056683778762817, + "213": 1.4341869354248047, + "214": 1.4404417276382446, + "215": 1.4900684356689453 + }, + "loss": { + "162": 2.525996208190918, + "163": 2.519139289855957, + "164": 2.5197999477386475, + "165": 2.5566163063049316, + "166": 2.517716407775879, + "167": 2.5122735500335693, + "168": 2.5209250450134277, + "169": 2.5298314094543457, + "170": 2.5019795894622803, + "171": 2.515258550643921, + "172": 2.482245683670044, + "173": 2.5298373699188232, + "174": 2.502087116241455, + "175": 2.5351507663726807, + "176": 2.502654790878296, + "177": 2.552912473678589, + "178": 2.510620594024658, + "179": 2.5179388523101807, + "180": 2.514465808868408, + "181": 2.525336742401123, + "182": 2.501659870147705, + "183": 2.504927635192871, + "184": 2.503681182861328, + "185": 2.5129146575927734, + "186": 2.4964075088500977, + "187": 2.522034168243408, + "188": 2.5041115283966064, + "189": 2.5342724323272705, + "190": 2.5131492614746094, + "191": 2.489699363708496, + "192": 2.4914677143096924, + "193": 2.487210512161255, + "194": 2.4756979942321777, + "195": 2.499216318130493, + "196": 2.4841837882995605, + "197": 2.4931082725524902, + "198": 2.500441074371338, + "199": 2.513913154602051, + "200": 2.4952261447906494, + "201": 2.4986376762390137, + "202": 2.4906091690063477, + "203": 2.498720407485962, + "204": 2.476853609085083, + "205": 2.5053882598876953, + "206": 2.474944591522217, + "207": 2.5047898292541504, + "208": 2.491452693939209, + "209": 2.503051280975342, + "210": 2.5131893157958984, + "211": 2.5290403366088867, + "212": 2.478907585144043, + "213": 2.4828474521636963, + "214": 2.4856977462768555, + "215": 2.4963371753692627 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 0.095188, + 0.0955868, + 0.0950986, + 0.0946707, + 0.0946557, + 0.0953467, + 0.09618, + 0.096568, + 0.0964274, + 0.0963097, + 0.0962504, + 0.0964699, + 0.09592, + 0.0955524, + 0.0945832, + 0.0949631, + 0.0953837, + 0.0955777, + 0.095719, + 0.096158, + 0.0965101, + 0.0965657, + 0.0961988, + 0.0959668, + 0.0955624, + 0.0947387, + 0.0947579, + 0.0946952, + 0.0955283, + 0.0964306, + 0.0967339, + 0.0968687, + 0.0972234, + 0.0970577, + 0.0964455, + 0.096193, + 0.0958319, + 0.0957948, + 0.0962074, + 0.0963365, + 0.0958491, + 0.0957796, + 0.0955763, + 0.0951922, + 0.0957921, + 0.0960696, + 0.0952515, + 0.0949209, + 0.0944378, + 0.0949553, + 0.0961673, + 0.0960226, + 0.0959936, + 0.0957422 + ], + "train_epoch_time": 4.839991569519043, + "train_loss": 2.497765347673698, + "train_score": 0.25919453903626505, + "val_loss": 2.547653200432025, + "val_score": 0.25313486673801827 + }, + { + "epoch": 4, + "grad_norm": 1.4493815898895264, + "learning_rate": 0.1, + "model_norm": 87.48473358154297, + "step_logs": { + "grad_norm": { + "216": 1.5207141637802124, + "217": 1.4831293821334839, + "218": 1.548401951789856, + "219": 1.4990978240966797, + "220": 1.658570408821106, + "221": 1.7461599111557007, + "222": 1.7208502292633057, + "223": 1.4546067714691162, + "224": 1.3064782619476318, + "225": 1.4333893060684204, + "226": 1.8529034852981567, + "227": 1.876185655593872, + "228": 1.4407918453216553, + "229": 1.469433069229126, + "230": 1.4858814477920532, + "231": 1.4008532762527466, + "232": 1.3361468315124512, + "233": 1.3507158756256104, + "234": 1.3724346160888672, + "235": 1.3959327936172485, + "236": 1.5130374431610107, + "237": 1.386102318763733, + "238": 1.3643090724945068, + "239": 1.2458302974700928, + "240": 1.2737303972244263, + "241": 1.3065357208251953, + "242": 1.3514872789382935, + "243": 1.368172526359558, + "244": 1.4374116659164429, + "245": 1.5546752214431763, + "246": 1.527165174484253, + "247": 1.428321123123169, + "248": 1.4025307893753052, + "249": 1.4584320783615112, + "250": 1.633088231086731, + "251": 1.6576405763626099, + "252": 1.3490997552871704, + "253": 1.2448540925979614, + "254": 1.2974003553390503, + "255": 1.337821125984192, + "256": 1.4290159940719604, + "257": 1.5032222270965576, + "258": 1.4525079727172852, + "259": 1.4253003597259521, + "260": 1.4548730850219727, + "261": 1.4741255044937134, + "262": 1.469092607498169, + "263": 1.3693534135818481, + "264": 1.367107629776001, + "265": 1.367874264717102, + "266": 1.3529359102249146, + "267": 1.3481435775756836, + "268": 1.4297680854797363, + "269": 1.4493815898895264 + }, + "loss": { + "216": 2.5005550384521484, + "217": 2.486264705657959, + "218": 2.504789352416992, + "219": 2.5058765411376953, + "220": 2.49249267578125, + "221": 2.526111125946045, + "222": 2.4997825622558594, + "223": 2.516441822052002, + "224": 2.492928981781006, + "225": 2.4929747581481934, + "226": 2.508449077606201, + "227": 2.526498317718506, + "228": 2.498701810836792, + "229": 2.4934847354888916, + "230": 2.499743938446045, + "231": 2.475876569747925, + "232": 2.487055540084839, + "233": 2.4964940547943115, + "234": 2.4587535858154297, + "235": 2.485304832458496, + "236": 2.4922542572021484, + "237": 2.4945578575134277, + "238": 2.4734184741973877, + "239": 2.496652603149414, + "240": 2.4686832427978516, + "241": 2.47457218170166, + "242": 2.4911718368530273, + "243": 2.4890055656433105, + "244": 2.481781482696533, + "245": 2.5030057430267334, + "246": 2.467012882232666, + "247": 2.4917941093444824, + "248": 2.4658961296081543, + "249": 2.4795992374420166, + "250": 2.4773926734924316, + "251": 2.496230125427246, + "252": 2.4655301570892334, + "253": 2.466240406036377, + "254": 2.4619393348693848, + "255": 2.485466480255127, + "256": 2.4605367183685303, + "257": 2.4908394813537598, + "258": 2.4808998107910156, + "259": 2.4751126766204834, + "260": 2.475172519683838, + "261": 2.4922049045562744, + "262": 2.485206127166748, + "263": 2.4630305767059326, + "264": 2.4606151580810547, + "265": 2.474362373352051, + "266": 2.4694809913635254, + "267": 2.4504642486572266, + "268": 2.4557876586914062, + "269": 2.4724512100219727 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 0.0955803, + 0.0957637, + 0.0954327, + 0.0957084, + 0.0947703, + 0.0943084, + 0.0944081, + 0.0959655, + 0.0966899, + 0.0960423, + 0.0935949, + 0.0934874, + 0.0960118, + 0.0958499, + 0.0957706, + 0.0961881, + 0.0965352, + 0.0964748, + 0.096311, + 0.0962276, + 0.0956089, + 0.0962919, + 0.0963738, + 0.0969854, + 0.0968186, + 0.0966659, + 0.0964637, + 0.0963759, + 0.0960037, + 0.0953942, + 0.0954865, + 0.0960674, + 0.0961644, + 0.0958874, + 0.0948923, + 0.0947833, + 0.0964404, + 0.096954, + 0.0966945, + 0.0965247, + 0.0960157, + 0.0956609, + 0.0959214, + 0.096058, + 0.0958996, + 0.0958224, + 0.0958385, + 0.096333, + 0.0963412, + 0.0963568, + 0.0964263, + 0.0964242, + 0.0960042, + 0.0959249 + ], + "train_epoch_time": 4.839824199676514, + "train_loss": 2.4656357970436127, + "train_score": 0.27237042672664913, + "val_loss": 2.513917387010857, + "val_score": 0.26844144644200735 + }, + { + "epoch": 5, + "grad_norm": 1.6595345735549927, + "learning_rate": 0.1, + "model_norm": 87.49935913085938, + "step_logs": { + "grad_norm": { + "270": 1.3668824434280396, + "271": 1.4833353757858276, + "272": 1.4340763092041016, + "273": 1.3308309316635132, + "274": 1.565377116203308, + "275": 1.5734424591064453, + "276": 1.3325433731079102, + "277": 1.2003980875015259, + "278": 1.1305818557739258, + "279": 1.1756256818771362, + "280": 1.2804739475250244, + "281": 1.681312918663025, + "282": 1.7337186336517334, + "283": 1.660081386566162, + "284": 1.6165975332260132, + "285": 1.638747215270996, + "286": 1.7784677743911743, + "287": 1.625307321548462, + "288": 1.4921928644180298, + "289": 1.213752031326294, + "290": 1.1863501071929932, + "291": 1.3044021129608154, + "292": 1.3800262212753296, + "293": 1.5274262428283691, + "294": 1.5698614120483398, + "295": 1.3262922763824463, + "296": 1.1837708950042725, + "297": 1.2319022417068481, + "298": 1.3980967998504639, + "299": 1.4667826890945435, + "300": 1.5803438425064087, + "301": 1.622593879699707, + "302": 1.4403170347213745, + "303": 1.2268928289413452, + "304": 1.438249111175537, + "305": 1.7772581577301025, + "306": 1.8345683813095093, + "307": 1.652672529220581, + "308": 1.6674407720565796, + "309": 1.690192699432373, + "310": 1.7400174140930176, + "311": 1.5748227834701538, + "312": 1.4441848993301392, + "313": 1.6087982654571533, + "314": 1.8951371908187866, + "315": 1.578356146812439, + "316": 1.3722999095916748, + "317": 1.3869855403900146, + "318": 1.3587955236434937, + "319": 1.3930338621139526, + "320": 1.4942517280578613, + "321": 1.5070290565490723, + "322": 1.7591979503631592, + "323": 1.6595345735549927 + }, + "loss": { + "270": 2.4582738876342773, + "271": 2.4680166244506836, + "272": 2.4740357398986816, + "273": 2.4807915687561035, + "274": 2.469590663909912, + "275": 2.4804065227508545, + "276": 2.463829517364502, + "277": 2.467266082763672, + "278": 2.4413952827453613, + "279": 2.4508750438690186, + "280": 2.450596332550049, + "281": 2.4681344032287598, + "282": 2.5184402465820312, + "283": 2.4834656715393066, + "284": 2.470820188522339, + "285": 2.4697139263153076, + "286": 2.489670753479004, + "287": 2.459014415740967, + "288": 2.46140193939209, + "289": 2.43727970123291, + "290": 2.455320358276367, + "291": 2.4319419860839844, + "292": 2.421353340148926, + "293": 2.4347152709960938, + "294": 2.462702751159668, + "295": 2.456295967102051, + "296": 2.43804931640625, + "297": 2.421156406402588, + "298": 2.468761444091797, + "299": 2.4393293857574463, + "300": 2.4439313411712646, + "301": 2.451199531555176, + "302": 2.4367570877075195, + "303": 2.447511672973633, + "304": 2.4106650352478027, + "305": 2.4494874477386475, + "306": 2.4421024322509766, + "307": 2.475184917449951, + "308": 2.4579808712005615, + "309": 2.439710855484009, + "310": 2.4429256916046143, + "311": 2.4578781127929688, + "312": 2.416517972946167, + "313": 2.4365131855010986, + "314": 2.442086696624756, + "315": 2.453134298324585, + "316": 2.4242966175079346, + "317": 2.428849220275879, + "318": 2.427621841430664, + "319": 2.419334888458252, + "320": 2.4207675457000732, + "321": 2.4236717224121094, + "322": 2.436763286590576, + "323": 2.4410738945007324 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 0.096339, + 0.0957326, + 0.0960095, + 0.0965534, + 0.0952733, + 0.0952467, + 0.0965219, + 0.0971627, + 0.097449, + 0.0972577, + 0.096763, + 0.0945836, + 0.0943685, + 0.0947432, + 0.0949771, + 0.0948435, + 0.0940273, + 0.0949025, + 0.0956726, + 0.0970665, + 0.0972138, + 0.0966201, + 0.0962161, + 0.0954279, + 0.0952348, + 0.0965431, + 0.0972064, + 0.0969612, + 0.0961919, + 0.0957763, + 0.0951388, + 0.0949033, + 0.0959171, + 0.0970166, + 0.0958861, + 0.093943, + 0.0935534, + 0.0947711, + 0.094647, + 0.0944691, + 0.0941648, + 0.0951972, + 0.0958631, + 0.0949565, + 0.0931503, + 0.0951678, + 0.0962612, + 0.0961907, + 0.0963366, + 0.0961442, + 0.0955916, + 0.0955244, + 0.094029, + 0.0946601 + ], + "train_epoch_time": 4.839600563049316, + "train_loss": 2.406728516897478, + "train_score": 0.29554900462008277, + "val_loss": 2.463793483311492, + "val_score": 0.2832188221671962 + }, + { + "epoch": 6, + "grad_norm": 1.5348509550094604, + "learning_rate": 0.1, + "model_norm": 87.51490783691406, + "step_logs": { + "grad_norm": { + "324": 1.5204179286956787, + "325": 1.475035309791565, + "326": 1.524332880973816, + "327": 1.5180416107177734, + "328": 1.5215808153152466, + "329": 1.5502034425735474, + "330": 1.4987207651138306, + "331": 1.413873314857483, + "332": 1.4547117948532104, + "333": 1.5063804388046265, + "334": 1.4725627899169922, + "335": 1.4559494256973267, + "336": 1.4342765808105469, + "337": 1.4491323232650757, + "338": 1.4730716943740845, + "339": 1.505921483039856, + "340": 1.585554838180542, + "341": 1.4796698093414307, + "342": 1.3740936517715454, + "343": 1.4039183855056763, + "344": 1.491923451423645, + "345": 1.477642297744751, + "346": 1.431505560874939, + "347": 1.4288281202316284, + "348": 1.5642822980880737, + "349": 1.5053727626800537, + "350": 1.5076608657836914, + "351": 1.4128873348236084, + "352": 1.454950213432312, + "353": 1.487735390663147, + "354": 1.5694859027862549, + "355": 1.595165491104126, + "356": 1.593092679977417, + "357": 1.603345513343811, + "358": 1.4710255861282349, + "359": 1.508878231048584, + "360": 1.6141520738601685, + "361": 1.6575957536697388, + "362": 1.5069397687911987, + "363": 1.4971407651901245, + "364": 1.5202319622039795, + "365": 1.4801868200302124, + "366": 1.5096116065979004, + "367": 1.4805908203125, + "368": 1.4322978258132935, + "369": 1.4010130167007446, + "370": 1.3681437969207764, + "371": 1.3306944370269775, + "372": 1.3294380903244019, + "373": 1.613341212272644, + "374": 1.9168848991394043, + "375": 2.206291913986206, + "376": 1.8911081552505493, + "377": 1.5348509550094604 + }, + "loss": { + "324": 2.4078660011291504, + "325": 2.4143972396850586, + "326": 2.4101521968841553, + "327": 2.4316511154174805, + "328": 2.3986916542053223, + "329": 2.42291259765625, + "330": 2.409928798675537, + "331": 2.42907452583313, + "332": 2.386866569519043, + "333": 2.410284996032715, + "334": 2.3986454010009766, + "335": 2.3882853984832764, + "336": 2.386298656463623, + "337": 2.3972373008728027, + "338": 2.3928301334381104, + "339": 2.419609546661377, + "340": 2.3975677490234375, + "341": 2.4221301078796387, + "342": 2.3810880184173584, + "343": 2.393470048904419, + "344": 2.398592233657837, + "345": 2.407968282699585, + "346": 2.405282974243164, + "347": 2.38531494140625, + "348": 2.3865158557891846, + "349": 2.4027490615844727, + "350": 2.3786582946777344, + "351": 2.4028944969177246, + "352": 2.3642420768737793, + "353": 2.3801767826080322, + "354": 2.4026432037353516, + "355": 2.3965651988983154, + "356": 2.368370532989502, + "357": 2.4082484245300293, + "358": 2.3832249641418457, + "359": 2.402068614959717, + "360": 2.384875535964966, + "361": 2.4020755290985107, + "362": 2.3848319053649902, + "363": 2.394064426422119, + "364": 2.364562511444092, + "365": 2.3977670669555664, + "366": 2.377725124359131, + "367": 2.3808393478393555, + "368": 2.3722097873687744, + "369": 2.3877806663513184, + "370": 2.3650426864624023, + "371": 2.3636343479156494, + "372": 2.3600821495056152, + "373": 2.3635053634643555, + "374": 2.3816933631896973, + "375": 2.448493480682373, + "376": 2.439532518386841, + "377": 2.3987228870391846 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 0.0954196, + 0.0956885, + 0.0954013, + 0.0954759, + 0.0953962, + 0.0952751, + 0.0955473, + 0.0960478, + 0.0957552, + 0.0955043, + 0.0956753, + 0.0957507, + 0.0958678, + 0.0958038, + 0.0956624, + 0.0955235, + 0.0950184, + 0.0956758, + 0.0961864, + 0.0960454, + 0.0955659, + 0.0956629, + 0.0959142, + 0.0958962, + 0.0951233, + 0.0954966, + 0.0954399, + 0.0960118, + 0.095715, + 0.095557, + 0.0951238, + 0.0949589, + 0.0949145, + 0.0949331, + 0.0956573, + 0.0954754, + 0.0948204, + 0.0945901, + 0.0954553, + 0.0955281, + 0.0953407, + 0.0956309, + 0.0954269, + 0.0955989, + 0.0958552, + 0.0960521, + 0.0961934, + 0.0963894, + 0.0963908, + 0.094781, + 0.0928385, + 0.0909585, + 0.0931707, + 0.0953194 + ], + "train_epoch_time": 4.839699745178223, + "train_loss": 2.3774107679232976, + "train_score": 0.2882577115100631, + "val_loss": 2.4424089657315977, + "val_score": 0.27687284719930316 + }, + { + "epoch": 7, + "grad_norm": 1.401110053062439, + "learning_rate": 0.1, + "model_norm": 87.53044891357422, + "step_logs": { + "grad_norm": { + "378": 1.5238767862319946, + "379": 1.5971482992172241, + "380": 1.5722147226333618, + "381": 1.4197204113006592, + "382": 1.5557610988616943, + "383": 1.5613852739334106, + "384": 1.4575474262237549, + "385": 1.3418397903442383, + "386": 1.3571803569793701, + "387": 1.2810419797897339, + "388": 1.4499485492706299, + "389": 1.4625566005706787, + "390": 1.4669203758239746, + "391": 1.4268333911895752, + "392": 1.4231853485107422, + "393": 1.4888262748718262, + "394": 1.5273696184158325, + "395": 1.4988442659378052, + "396": 1.532185673713684, + "397": 1.6217350959777832, + "398": 1.5982587337493896, + "399": 1.559019923210144, + "400": 1.4788126945495605, + "401": 1.3030979633331299, + "402": 1.3073769807815552, + "403": 1.4971922636032104, + "404": 1.3816274404525757, + "405": 1.3173834085464478, + "406": 1.3925862312316895, + "407": 1.3758513927459717, + "408": 1.3136552572250366, + "409": 1.3630038499832153, + "410": 1.5459775924682617, + "411": 1.6935113668441772, + "412": 1.7326537370681763, + "413": 1.6022096872329712, + "414": 1.4557265043258667, + "415": 1.253517985343933, + "416": 1.0845822095870972, + "417": 1.2073835134506226, + "418": 1.3733117580413818, + "419": 1.694291591644287, + "420": 1.775342583656311, + "421": 1.5067014694213867, + "422": 1.3718053102493286, + "423": 1.4382741451263428, + "424": 1.4876372814178467, + "425": 1.4780538082122803, + "426": 1.5483167171478271, + "427": 1.597402811050415, + "428": 1.4621162414550781, + "429": 1.4470614194869995, + "430": 1.4325398206710815, + "431": 1.401110053062439 + }, + "loss": { + "378": 2.3827290534973145, + "379": 2.374232053756714, + "380": 2.3913118839263916, + "381": 2.3736956119537354, + "382": 2.397104263305664, + "383": 2.3694705963134766, + "384": 2.3797054290771484, + "385": 2.3538684844970703, + "386": 2.3716933727264404, + "387": 2.366917610168457, + "388": 2.360433578491211, + "389": 2.3798165321350098, + "390": 2.3500328063964844, + "391": 2.3685245513916016, + "392": 2.376107931137085, + "393": 2.3573555946350098, + "394": 2.387089252471924, + "395": 2.3546972274780273, + "396": 2.3808541297912598, + "397": 2.355680227279663, + "398": 2.4044454097747803, + "399": 2.378586769104004, + "400": 2.3672568798065186, + "401": 2.3572709560394287, + "402": 2.344491481781006, + "403": 2.3278932571411133, + "404": 2.355198860168457, + "405": 2.3390891551971436, + "406": 2.3366992473602295, + "407": 2.3351902961730957, + "408": 2.3390352725982666, + "409": 2.343677043914795, + "410": 2.36517596244812, + "411": 2.369183301925659, + "412": 2.3717522621154785, + "413": 2.365341901779175, + "414": 2.3643646240234375, + "415": 2.3370747566223145, + "416": 2.320711374282837, + "417": 2.323245048522949, + "418": 2.331423759460449, + "419": 2.347210168838501, + "420": 2.3825855255126953, + "421": 2.3443851470947266, + "422": 2.354055404663086, + "423": 2.3360114097595215, + "424": 2.3482937812805176, + "425": 2.3313653469085693, + "426": 2.365417957305908, + "427": 2.3623032569885254, + "428": 2.347109079360962, + "429": 2.344539165496826, + "430": 2.3299427032470703, + "431": 2.3400559425354004 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 0.0953534, + 0.0949019, + 0.0950856, + 0.0959272, + 0.0951941, + 0.0951072, + 0.0957271, + 0.0963163, + 0.096262, + 0.0966495, + 0.0957366, + 0.0956991, + 0.0956221, + 0.0958794, + 0.0959121, + 0.0955097, + 0.0953412, + 0.0954469, + 0.0953015, + 0.0947128, + 0.094956, + 0.0951391, + 0.0955849, + 0.0965235, + 0.096483, + 0.0954065, + 0.0961053, + 0.0964229, + 0.0960157, + 0.0961048, + 0.0964423, + 0.0961877, + 0.0951904, + 0.0942928, + 0.0940479, + 0.0948529, + 0.0957108, + 0.0967476, + 0.0975283, + 0.0969581, + 0.0961125, + 0.0942374, + 0.093796, + 0.0953819, + 0.0961566, + 0.09576, + 0.0955, + 0.0955244, + 0.095177, + 0.0948759, + 0.0956443, + 0.0957252, + 0.0957819, + 0.0959743 + ], + "train_epoch_time": 4.839696645736694, + "train_loss": 2.341660093953312, + "train_score": 0.30697296437095195, + "val_loss": 2.4016604872439675, + "val_score": 0.2907756896845646 + }, + { + "epoch": 8, + "grad_norm": 1.2714836597442627, + "learning_rate": 0.1, + "model_norm": 87.5464096069336, + "step_logs": { + "grad_norm": { + "432": 1.3507100343704224, + "433": 1.3469970226287842, + "434": 1.6031737327575684, + "435": 1.6274340152740479, + "436": 1.561687707901001, + "437": 1.4427980184555054, + "438": 1.3580224514007568, + "439": 1.2925304174423218, + "440": 1.2868298292160034, + "441": 1.2257243394851685, + "442": 1.2014594078063965, + "443": 1.2329719066619873, + "444": 1.2841089963912964, + "445": 1.3346121311187744, + "446": 1.3809603452682495, + "447": 1.3230992555618286, + "448": 1.4006848335266113, + "449": 1.6567952632904053, + "450": 1.7916064262390137, + "451": 1.8325364589691162, + "452": 2.1758933067321777, + "453": 1.8093030452728271, + "454": 1.6594533920288086, + "455": 1.5646799802780151, + "456": 1.4760572910308838, + "457": 1.4579269886016846, + "458": 1.5229400396347046, + "459": 1.7276986837387085, + "460": 1.8374427556991577, + "461": 1.677546501159668, + "462": 1.6864469051361084, + "463": 1.5372563600540161, + "464": 1.479378581047058, + "465": 1.3358135223388672, + "466": 1.3171627521514893, + "467": 1.2679260969161987, + "468": 1.2107120752334595, + "469": 1.259350061416626, + "470": 1.3164029121398926, + "471": 1.4148625135421753, + "472": 1.5306681394577026, + "473": 1.6661498546600342, + "474": 1.5870965719223022, + "475": 1.4273427724838257, + "476": 1.3611736297607422, + "477": 1.4415041208267212, + "478": 1.5811823606491089, + "479": 1.6015987396240234, + "480": 1.605069875717163, + "481": 1.63101065158844, + "482": 1.6104609966278076, + "483": 1.3696142435073853, + "484": 1.2675796747207642, + "485": 1.2714836597442627 + }, + "loss": { + "432": 2.3457813262939453, + "433": 2.3262274265289307, + "434": 2.3438339233398438, + "435": 2.348609685897827, + "436": 2.3572678565979004, + "437": 2.3277664184570312, + "438": 2.3194456100463867, + "439": 2.3039093017578125, + "440": 2.3299155235290527, + "441": 2.3128645420074463, + "442": 2.313471794128418, + "443": 2.3038887977600098, + "444": 2.3367955684661865, + "445": 2.2974181175231934, + "446": 2.313506603240967, + "447": 2.3145275115966797, + "448": 2.3211581707000732, + "449": 2.34653902053833, + "450": 2.3669865131378174, + "451": 2.356989860534668, + "452": 2.3780863285064697, + "453": 2.3617024421691895, + "454": 2.3487439155578613, + "455": 2.33650803565979, + "456": 2.323024034500122, + "457": 2.3067703247070312, + "458": 2.3273158073425293, + "459": 2.335646152496338, + "460": 2.352998733520508, + "461": 2.3597097396850586, + "462": 2.346374034881592, + "463": 2.331382989883423, + "464": 2.311239719390869, + "465": 2.328648567199707, + "466": 2.3075618743896484, + "467": 2.3089752197265625, + "468": 2.3243298530578613, + "469": 2.312345027923584, + "470": 2.313727617263794, + "471": 2.341294288635254, + "472": 2.337461471557617, + "473": 2.3379034996032715, + "474": 2.3516769409179688, + "475": 2.3145625591278076, + "476": 2.3131296634674072, + "477": 2.333616256713867, + "478": 2.3307414054870605, + "479": 2.3015410900115967, + "480": 2.340886354446411, + "481": 2.316655158996582, + "482": 2.3463101387023926, + "483": 2.342644214630127, + "484": 2.3145699501037598, + "485": 2.322575092315674 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 0.0962568, + 0.0962465, + 0.0948022, + 0.0946624, + 0.0950814, + 0.09572, + 0.0961764, + 0.0965012, + 0.0965683, + 0.0968543, + 0.0969746, + 0.0968061, + 0.096592, + 0.0962682, + 0.0960416, + 0.0963561, + 0.0959452, + 0.0944742, + 0.0936501, + 0.0933499, + 0.0909468, + 0.0935187, + 0.0944624, + 0.0950217, + 0.0955206, + 0.0955957, + 0.0952536, + 0.0939938, + 0.093306, + 0.0943726, + 0.0942857, + 0.0951763, + 0.0954794, + 0.09631, + 0.096377, + 0.0966358, + 0.0969432, + 0.0966844, + 0.0963903, + 0.0959002, + 0.0952275, + 0.0943957, + 0.0949167, + 0.0957845, + 0.0961493, + 0.0957376, + 0.0949096, + 0.0947215, + 0.0947843, + 0.0945703, + 0.0947625, + 0.0961504, + 0.0966455, + 0.0966367 + ], + "train_epoch_time": 4.839443922042847, + "train_loss": 2.3077662786076023, + "train_score": 0.31930259139500183, + "val_loss": 2.3614000239410577, + "val_score": 0.3028666761657811 + }, + { + "epoch": 9, + "grad_norm": 1.468070387840271, + "learning_rate": 0.1, + "model_norm": 87.5635757446289, + "step_logs": { + "grad_norm": { + "486": 1.3421059846878052, + "487": 1.5410593748092651, + "488": 1.6748708486557007, + "489": 1.628523349761963, + "490": 1.585997223854065, + "491": 1.5138816833496094, + "492": 1.4490344524383545, + "493": 1.5533103942871094, + "494": 1.6497552394866943, + "495": 1.6603319644927979, + "496": 1.6849194765090942, + "497": 1.6944397687911987, + "498": 1.6065895557403564, + "499": 1.6698063611984253, + "500": 1.6528459787368774, + "501": 1.3871923685073853, + "502": 1.2377095222473145, + "503": 1.3084832429885864, + "504": 1.5100948810577393, + "505": 1.624717116355896, + "506": 1.595871925354004, + "507": 1.3191120624542236, + "508": 1.2762936353683472, + "509": 1.3916237354278564, + "510": 1.4488248825073242, + "511": 1.4460535049438477, + "512": 1.5222827196121216, + "513": 1.4093161821365356, + "514": 1.328999400138855, + "515": 1.2952208518981934, + "516": 1.382433533668518, + "517": 1.4634521007537842, + "518": 1.6107661724090576, + "519": 1.571621298789978, + "520": 1.5592676401138306, + "521": 1.6456609964370728, + "522": 1.6851128339767456, + "523": 1.666506290435791, + "524": 1.5055006742477417, + "525": 1.4418994188308716, + "526": 1.3877149820327759, + "527": 1.3972231149673462, + "528": 1.399568796157837, + "529": 1.3434568643569946, + "530": 1.3118952512741089, + "531": 1.3026467561721802, + "532": 1.411010980606079, + "533": 1.413327693939209, + "534": 1.3558568954467773, + "535": 1.2408007383346558, + "536": 1.1616082191467285, + "537": 1.165681004524231, + "538": 1.3844928741455078, + "539": 1.468070387840271 + }, + "loss": { + "486": 2.3154544830322266, + "487": 2.3100531101226807, + "488": 2.343207836151123, + "489": 2.333224296569824, + "490": 2.3237388134002686, + "491": 2.3195114135742188, + "492": 2.321809768676758, + "493": 2.326282501220703, + "494": 2.32395601272583, + "495": 2.31506609916687, + "496": 2.330772876739502, + "497": 2.3274450302124023, + "498": 2.316956043243408, + "499": 2.3209829330444336, + "500": 2.3378071784973145, + "501": 2.302727222442627, + "502": 2.2809460163116455, + "503": 2.300558567047119, + "504": 2.3031578063964844, + "505": 2.3129429817199707, + "506": 2.3283753395080566, + "507": 2.2861313819885254, + "508": 2.2917866706848145, + "509": 2.29498028755188, + "510": 2.2883615493774414, + "511": 2.3073911666870117, + "512": 2.297729969024658, + "513": 2.3086354732513428, + "514": 2.3066201210021973, + "515": 2.2947752475738525, + "516": 2.2801079750061035, + "517": 2.2713325023651123, + "518": 2.3072500228881836, + "519": 2.301114082336426, + "520": 2.2939233779907227, + "521": 2.2943079471588135, + "522": 2.2893242835998535, + "523": 2.298323631286621, + "524": 2.3116707801818848, + "525": 2.3140971660614014, + "526": 2.288111448287964, + "527": 2.2836203575134277, + "528": 2.3037538528442383, + "529": 2.293445587158203, + "530": 2.3005857467651367, + "531": 2.2702369689941406, + "532": 2.2708632946014404, + "533": 2.270054340362549, + "534": 2.2814066410064697, + "535": 2.2814159393310547, + "536": 2.276214599609375, + "537": 2.2891175746917725, + "538": 2.2826390266418457, + "539": 2.2869319915771484 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 0.096256, + 0.095111, + 0.0943523, + 0.0946223, + 0.0948655, + 0.0952922, + 0.0956739, + 0.0950698, + 0.0944682, + 0.0943807, + 0.0942595, + 0.0941904, + 0.0947238, + 0.0943337, + 0.0944797, + 0.0959893, + 0.096751, + 0.0964124, + 0.0952829, + 0.0946017, + 0.0948145, + 0.0963338, + 0.0965681, + 0.0959516, + 0.0956147, + 0.0956652, + 0.0951994, + 0.0958758, + 0.0963125, + 0.0964736, + 0.0959777, + 0.0954977, + 0.0946767, + 0.0949064, + 0.0949672, + 0.0944269, + 0.0941603, + 0.0943024, + 0.0953267, + 0.0957009, + 0.0959618, + 0.0959008, + 0.0959221, + 0.0962141, + 0.0963944, + 0.0963974, + 0.0958004, + 0.0957857, + 0.0961271, + 0.0967359, + 0.0971213, + 0.0971176, + 0.0959705, + 0.0955 + ], + "train_epoch_time": 4.840027809143066, + "train_loss": 2.276319026810196, + "train_score": 0.3284500538105602, + "val_loss": 2.3410527150474927, + "val_score": 0.31462130432971996 + }, + { + "epoch": 10, + "grad_norm": 1.7734715938568115, + "learning_rate": 0.1, + "model_norm": 87.58110046386719, + "step_logs": { + "grad_norm": { + "540": 1.3855658769607544, + "541": 1.3699744939804077, + "542": 1.5931072235107422, + "543": 1.757995843887329, + "544": 2.2765657901763916, + "545": 2.043253183364868, + "546": 2.891676425933838, + "547": 2.6693274974823, + "548": 2.9742679595947266, + "549": 2.0444986820220947, + "550": 4.142254829406738, + "551": 2.08813214302063, + "552": 2.854707956314087, + "553": 1.805907130241394, + "554": 2.043489933013916, + "555": 1.8520097732543945, + "556": 1.669492483139038, + "557": 1.502358317375183, + "558": 1.2358567714691162, + "559": 1.2964330911636353, + "560": 1.280662178993225, + "561": 1.2584073543548584, + "562": 1.2414016723632812, + "563": 1.2102011442184448, + "564": 1.237259864807129, + "565": 1.2826976776123047, + "566": 1.3327841758728027, + "567": 1.3804187774658203, + "568": 1.5000768899917603, + "569": 1.5731277465820312, + "570": 1.5431245565414429, + "571": 1.4646320343017578, + "572": 1.3928265571594238, + "573": 1.3860430717468262, + "574": 1.4848979711532593, + "575": 1.4542369842529297, + "576": 1.4103590250015259, + "577": 1.4892785549163818, + "578": 1.4899351596832275, + "579": 1.3885960578918457, + "580": 1.2535943984985352, + "581": 1.1883963346481323, + "582": 1.166892170906067, + "583": 1.1567583084106445, + "584": 1.0995984077453613, + "585": 1.2123874425888062, + "586": 1.283324122428894, + "587": 1.437957525253296, + "588": 1.3781952857971191, + "589": 1.2244057655334473, + "590": 1.1463439464569092, + "591": 1.3378748893737793, + "592": 1.6402194499969482, + "593": 1.7734715938568115 + }, + "loss": { + "540": 2.286694049835205, + "541": 2.2679097652435303, + "542": 2.2743968963623047, + "543": 2.3044919967651367, + "544": 2.3004279136657715, + "545": 2.3538618087768555, + "546": 2.3604750633239746, + "547": 2.371980905532837, + "548": 2.3429999351501465, + "549": 2.3255491256713867, + "550": 2.4214377403259277, + "551": 2.3725318908691406, + "552": 2.339686393737793, + "553": 2.3350212574005127, + "554": 2.3282346725463867, + "555": 2.320429563522339, + "556": 2.3140487670898438, + "557": 2.299532413482666, + "558": 2.24477481842041, + "559": 2.264753818511963, + "560": 2.2702298164367676, + "561": 2.2654409408569336, + "562": 2.268239736557007, + "563": 2.2638630867004395, + "564": 2.2540338039398193, + "565": 2.253985643386841, + "566": 2.2684760093688965, + "567": 2.2714717388153076, + "568": 2.2746026515960693, + "569": 2.276184320449829, + "570": 2.279322624206543, + "571": 2.2680301666259766, + "572": 2.2682130336761475, + "573": 2.246882438659668, + "574": 2.2479441165924072, + "575": 2.264397144317627, + "576": 2.2778656482696533, + "577": 2.2779603004455566, + "578": 2.256025791168213, + "579": 2.281663179397583, + "580": 2.2363791465759277, + "581": 2.2800374031066895, + "582": 2.237774610519409, + "583": 2.2278127670288086, + "584": 2.2422704696655273, + "585": 2.2439115047454834, + "586": 2.2704453468322754, + "587": 2.2174038887023926, + "588": 2.227482318878174, + "589": 2.2529821395874023, + "590": 2.2411389350891113, + "591": 2.2315709590911865, + "592": 2.2589306831359863, + "593": 2.2638437747955322 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 0.0959714, + 0.0960266, + 0.0947154, + 0.0937159, + 0.0898757, + 0.0918542, + 0.084953, + 0.0869416, + 0.0841198, + 0.091754, + 0.0738389, + 0.0915842, + 0.0851676, + 0.0934724, + 0.0917702, + 0.0931179, + 0.0943197, + 0.0953219, + 0.0967099, + 0.0964221, + 0.0965137, + 0.0966229, + 0.0967145, + 0.0968667, + 0.0967158, + 0.0964787, + 0.0962323, + 0.0959743, + 0.0952867, + 0.0948441, + 0.0950358, + 0.0954845, + 0.095899, + 0.0959002, + 0.095325, + 0.0955386, + 0.0958165, + 0.0953577, + 0.0953108, + 0.0959459, + 0.0966058, + 0.096996, + 0.0970474, + 0.0970844, + 0.0973746, + 0.0968286, + 0.0965001, + 0.0955452, + 0.0959108, + 0.0967801, + 0.0971517, + 0.0961442, + 0.0943798, + 0.0935046 + ], + "train_epoch_time": 4.839099884033203, + "train_loss": 2.2669085151666892, + "train_score": 0.32776856176993424, + "val_loss": 2.320276185100306, + "val_score": 0.31862173572473496 + }, + { + "epoch": 11, + "grad_norm": 1.5081864595413208, + "learning_rate": 0.1, + "model_norm": 87.59841918945312, + "step_logs": { + "grad_norm": { + "594": 1.6634496450424194, + "595": 1.619261384010315, + "596": 1.6665533781051636, + "597": 1.7308590412139893, + "598": 1.7479417324066162, + "599": 1.6680026054382324, + "600": 1.5196536779403687, + "601": 1.3531891107559204, + "602": 1.228021264076233, + "603": 1.3028531074523926, + "604": 1.2772009372711182, + "605": 1.1832040548324585, + "606": 1.2076756954193115, + "607": 1.4356175661087036, + "608": 1.580788254737854, + "609": 1.5609453916549683, + "610": 1.387777328491211, + "611": 1.3024975061416626, + "612": 1.23203706741333, + "613": 1.25006902217865, + "614": 1.2798360586166382, + "615": 1.4011482000350952, + "616": 1.327973484992981, + "617": 1.3721123933792114, + "618": 1.467121958732605, + "619": 1.5741314888000488, + "620": 1.461991310119629, + "621": 1.1995444297790527, + "622": 0.9937043786048889, + "623": 1.0829651355743408, + "624": 1.353103518486023, + "625": 1.424338936805725, + "626": 1.445154070854187, + "627": 1.4511809349060059, + "628": 1.35936439037323, + "629": 1.2982182502746582, + "630": 1.1945499181747437, + "631": 1.144911289215088, + "632": 1.2437695264816284, + "633": 1.581872582435608, + "634": 1.6544958353042603, + "635": 1.4697126150131226, + "636": 1.4244686365127563, + "637": 1.4537724256515503, + "638": 1.4634218215942383, + "639": 1.4124324321746826, + "640": 1.3953214883804321, + "641": 1.372344732284546, + "642": 1.384291172027588, + "643": 1.3955241441726685, + "644": 1.4376662969589233, + "645": 1.3689109086990356, + "646": 1.4320026636123657, + "647": 1.5081864595413208 + }, + "loss": { + "594": 2.2812418937683105, + "595": 2.2569174766540527, + "596": 2.2505056858062744, + "597": 2.2952072620391846, + "598": 2.2475779056549072, + "599": 2.2572970390319824, + "600": 2.264566421508789, + "601": 2.246230125427246, + "602": 2.244356155395508, + "603": 2.2344260215759277, + "604": 2.2239279747009277, + "605": 2.225612163543701, + "606": 2.243206262588501, + "607": 2.238051652908325, + "608": 2.250304937362671, + "609": 2.2463605403900146, + "610": 2.2210922241210938, + "611": 2.242798328399658, + "612": 2.217909812927246, + "613": 2.2258362770080566, + "614": 2.2233424186706543, + "615": 2.2090396881103516, + "616": 2.243584632873535, + "617": 2.212984561920166, + "618": 2.2275378704071045, + "619": 2.2615268230438232, + "620": 2.23750638961792, + "621": 2.2293214797973633, + "622": 2.218424081802368, + "623": 2.2181177139282227, + "624": 2.227792263031006, + "625": 2.226933717727661, + "626": 2.2210681438446045, + "627": 2.2508931159973145, + "628": 2.22235107421875, + "629": 2.2165441513061523, + "630": 2.200906991958618, + "631": 2.210453510284424, + "632": 2.208587646484375, + "633": 2.228396415710449, + "634": 2.2476110458374023, + "635": 2.2339937686920166, + "636": 2.2140073776245117, + "637": 2.219712734222412, + "638": 2.227468490600586, + "639": 2.229064464569092, + "640": 2.2381138801574707, + "641": 2.227644443511963, + "642": 2.2352161407470703, + "643": 2.197134494781494, + "644": 2.2155399322509766, + "645": 2.2270984649658203, + "646": 2.2132015228271484, + "647": 2.2277960777282715 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 0.094282, + 0.0945101, + 0.094188, + 0.0938735, + 0.0936357, + 0.094195, + 0.0951485, + 0.0960836, + 0.0967496, + 0.0963406, + 0.0964623, + 0.0969508, + 0.0968515, + 0.0955982, + 0.0947397, + 0.0948557, + 0.0958446, + 0.0963557, + 0.0966913, + 0.0966087, + 0.0964473, + 0.0957455, + 0.0962185, + 0.0959198, + 0.0953912, + 0.0948062, + 0.0954414, + 0.0968737, + 0.0978229, + 0.0974244, + 0.096053, + 0.0956434, + 0.0955096, + 0.0955311, + 0.0960085, + 0.0963374, + 0.0968601, + 0.0971203, + 0.0966164, + 0.0946839, + 0.0942601, + 0.0953884, + 0.0956183, + 0.0954557, + 0.0954132, + 0.0957168, + 0.0958318, + 0.0959443, + 0.0958897, + 0.0957562, + 0.0955434, + 0.0959628, + 0.0955724, + 0.0951429 + ], + "train_epoch_time": 4.839369297027588, + "train_loss": 2.2234227234527064, + "train_score": 0.34212921445646793, + "val_loss": 2.303082422602464, + "val_score": 0.3244564433322298 + }, + { + "epoch": 12, + "grad_norm": 1.0092487335205078, + "learning_rate": 0.1, + "model_norm": 87.61363220214844, + "step_logs": { + "grad_norm": { + "648": 1.5923807621002197, + "649": 1.4241580963134766, + "650": 1.380035400390625, + "651": 1.3389984369277954, + "652": 1.377500295639038, + "653": 1.415576457977295, + "654": 1.3388686180114746, + "655": 1.251926302909851, + "656": 1.3301371335983276, + "657": 1.42147696018219, + "658": 1.340073585510254, + "659": 1.2445037364959717, + "660": 1.1605985164642334, + "661": 1.02606999874115, + "662": 1.0356559753417969, + "663": 1.0613659620285034, + "664": 1.2157117128372192, + "665": 1.2903574705123901, + "666": 1.2684019804000854, + "667": 1.2516309022903442, + "668": 1.2222882509231567, + "669": 1.1940480470657349, + "670": 1.1551145315170288, + "671": 1.160732626914978, + "672": 1.1861381530761719, + "673": 1.07341730594635, + "674": 1.0808777809143066, + "675": 1.1456283330917358, + "676": 1.2443689107894897, + "677": 1.315761685371399, + "678": 1.279556155204773, + "679": 1.284864068031311, + "680": 1.2411867380142212, + "681": 1.1781779527664185, + "682": 1.0981937646865845, + "683": 1.0269218683242798, + "684": 0.9819169640541077, + "685": 0.9329985976219177, + "686": 0.9042189717292786, + "687": 1.0235469341278076, + "688": 1.0084115266799927, + "689": 0.9927877187728882, + "690": 0.995064377784729, + "691": 1.0337671041488647, + "692": 1.0011017322540283, + "693": 0.9842377305030823, + "694": 0.8962352275848389, + "695": 0.8242790699005127, + "696": 0.844769299030304, + "697": 0.8928088545799255, + "698": 0.9202558994293213, + "699": 0.8769364953041077, + "700": 0.9151707887649536, + "701": 1.0092487335205078 + }, + "loss": { + "648": 2.240493059158325, + "649": 2.242720603942871, + "650": 2.2161097526550293, + "651": 2.2214736938476562, + "652": 2.208665370941162, + "653": 2.20377254486084, + "654": 2.2091503143310547, + "655": 2.2058968544006348, + "656": 2.2032339572906494, + "657": 2.2002978324890137, + "658": 2.2271029949188232, + "659": 2.183682441711426, + "660": 2.1819634437561035, + "661": 2.184974431991577, + "662": 2.181095600128174, + "663": 2.1750638484954834, + "664": 2.191298007965088, + "665": 2.210533618927002, + "666": 2.1872763633728027, + "667": 2.208545684814453, + "668": 2.1755294799804688, + "669": 2.187657356262207, + "670": 2.1991405487060547, + "671": 2.1800997257232666, + "672": 2.157829761505127, + "673": 2.1626474857330322, + "674": 2.175178050994873, + "675": 2.174002170562744, + "676": 2.167180299758911, + "677": 2.186819553375244, + "678": 2.1927907466888428, + "679": 2.150287628173828, + "680": 2.161402940750122, + "681": 2.182260274887085, + "682": 2.2066738605499268, + "683": 2.160205364227295, + "684": 2.160770893096924, + "685": 2.163475275039673, + "686": 2.1615588665008545, + "687": 2.171290874481201, + "688": 2.163928985595703, + "689": 2.157331943511963, + "690": 2.13478422164917, + "691": 2.1531004905700684, + "692": 2.163862466812134, + "693": 2.1611220836639404, + "694": 2.160479784011841, + "695": 2.1799941062927246, + "696": 2.155761241912842, + "697": 2.1345863342285156, + "698": 2.1607890129089355, + "699": 2.1383707523345947, + "700": 2.1592798233032227, + "701": 2.1568689346313477 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 0.0946443, + 0.0951086, + 0.0947446, + 0.0944089, + 0.0936091, + 0.0928237, + 0.0926756, + 0.0925337, + 0.0915667, + 0.090519, + 0.0904072, + 0.0902274, + 0.0900198, + 0.0899814, + 0.0893509, + 0.0886575, + 0.0874652, + 0.0865874, + 0.086075, + 0.085592, + 0.0850933, + 0.0846366, + 0.084212, + 0.0835863, + 0.0828834, + 0.0827047, + 0.0820997, + 0.0812886, + 0.0803419, + 0.0795147, + 0.079076, + 0.0784296, + 0.0780158, + 0.0776628, + 0.0773424, + 0.0769231, + 0.0764511, + 0.075981, + 0.075451, + 0.0745602, + 0.0739992, + 0.0734383, + 0.0728231, + 0.0721417, + 0.0716312, + 0.0710718, + 0.0706643, + 0.070211, + 0.0695602, + 0.0688563, + 0.0682117, + 0.0676846, + 0.0670187, + 0.0662317 + ], + "train_epoch_time": 4.839498043060303, + "train_loss": 2.151258555238524, + "train_score": 0.3657482963124854, + "val_loss": 2.2226866170267834, + "val_score": 0.34736473925078093 + }, + { + "epoch": 13, + "grad_norm": 0.7189573049545288, + "learning_rate": 0.06666666666666668, + "model_norm": 87.6231689453125, + "step_logs": { + "grad_norm": { + "702": 1.0570307970046997, + "703": 0.9686579704284668, + "704": 0.9260565638542175, + "705": 0.8788942098617554, + "706": 0.9802384376525879, + "707": 1.0322258472442627, + "708": 0.9872215390205383, + "709": 0.950273871421814, + "710": 0.9797484278678894, + "711": 1.0293323993682861, + "712": 0.957111120223999, + "713": 1.0317163467407227, + "714": 1.1893115043640137, + "715": 1.1397957801818848, + "716": 0.9731723666191101, + "717": 0.9121640920639038, + "718": 0.8991885781288147, + "719": 0.8456013798713684, + "720": 0.8160295486450195, + "721": 0.774200439453125, + "722": 0.7564179301261902, + "723": 0.6807435750961304, + "724": 0.680670976638794, + "725": 0.7307837009429932, + "726": 0.7699684500694275, + "727": 0.7705569267272949, + "728": 0.7109479904174805, + "729": 0.703255295753479, + "730": 0.8134910464286804, + "731": 0.7388712763786316, + "732": 0.7542753219604492, + "733": 0.7013862729072571, + "734": 0.7000980973243713, + "735": 0.7420291900634766, + "736": 0.6798243522644043, + "737": 0.6653239727020264, + "738": 0.6890830993652344, + "739": 0.6895594000816345, + "740": 0.7794739007949829, + "741": 0.869519829750061, + "742": 0.7716234922409058, + "743": 0.6729789972305298, + "744": 0.746738612651825, + "745": 0.720713198184967, + "746": 0.7666577100753784, + "747": 0.7970916032791138, + "748": 0.7638863921165466, + "749": 0.8001917004585266, + "750": 0.7377480268478394, + "751": 0.7141143083572388, + "752": 0.7794457674026489, + "753": 0.797374963760376, + "754": 0.7309591770172119, + "755": 0.7189573049545288 + }, + "loss": { + "702": 2.147902488708496, + "703": 2.143873691558838, + "704": 2.146829128265381, + "705": 2.138852119445801, + "706": 2.1529009342193604, + "707": 2.130876064300537, + "708": 2.13419771194458, + "709": 2.122539520263672, + "710": 2.1355533599853516, + "711": 2.1318490505218506, + "712": 2.115267753601074, + "713": 2.1396265029907227, + "714": 2.1589672565460205, + "715": 2.1399919986724854, + "716": 2.141000986099243, + "717": 2.1422321796417236, + "718": 2.1371140480041504, + "719": 2.152160167694092, + "720": 2.1370935440063477, + "721": 2.123014450073242, + "722": 2.1325559616088867, + "723": 2.1401262283325195, + "724": 2.1458566188812256, + "725": 2.120518207550049, + "726": 2.114623546600342, + "727": 2.1158604621887207, + "728": 2.1445391178131104, + "729": 2.116807460784912, + "730": 2.1336774826049805, + "731": 2.138178825378418, + "732": 2.132021427154541, + "733": 2.121915340423584, + "734": 2.134155750274658, + "735": 2.139400005340576, + "736": 2.110149383544922, + "737": 2.126380205154419, + "738": 2.1152446269989014, + "739": 2.1401095390319824, + "740": 2.112760066986084, + "741": 2.111144542694092, + "742": 2.1270744800567627, + "743": 2.1188018321990967, + "744": 2.125664234161377, + "745": 2.112760066986084, + "746": 2.1231720447540283, + "747": 2.110657215118408, + "748": 2.1248974800109863, + "749": 2.1240930557250977, + "750": 2.1287522315979004, + "751": 2.1012113094329834, + "752": 2.1266796588897705, + "753": 2.1155855655670166, + "754": 2.1015982627868652, + "755": 2.107381582260132 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 0.0655304, + 0.0651083, + 0.064588, + 0.064065, + 0.0632908, + 0.0625854, + 0.0620706, + 0.0615297, + 0.0608837, + 0.060197, + 0.0597117, + 0.0589978, + 0.0581308, + 0.0576164, + 0.0572895, + 0.0567745, + 0.0561865, + 0.0556535, + 0.0550788, + 0.0545155, + 0.053928, + 0.0533933, + 0.0527839, + 0.0521247, + 0.0514777, + 0.0508689, + 0.0503171, + 0.0497096, + 0.0490074, + 0.0484637, + 0.0478408, + 0.0472704, + 0.0466622, + 0.0460221, + 0.0454517, + 0.0448514, + 0.0442238, + 0.0436148, + 0.0429431, + 0.0422702, + 0.0417302, + 0.041176, + 0.0405242, + 0.0399265, + 0.0392913, + 0.0386626, + 0.0380715, + 0.0374418, + 0.0368625, + 0.0362595, + 0.0356203, + 0.0350001, + 0.0344167, + 0.0338098 + ], + "train_epoch_time": 4.840372085571289, + "train_loss": 2.1106098669673314, + "train_score": 0.37544050399679024, + "val_loss": 2.1894215573947, + "val_score": 0.3537510763148079 + }, + { + "epoch": 14, + "grad_norm": 0.6357522010803223, + "learning_rate": 0.03333333333333334, + "model_norm": 87.6263427734375, + "step_logs": { + "grad_norm": { + "756": 0.7133497595787048, + "757": 0.6459547281265259, + "758": 0.7396783828735352, + "759": 0.6765357851982117, + "760": 0.7040878534317017, + "761": 0.7207474708557129, + "762": 0.6932805776596069, + "763": 0.6927089691162109, + "764": 0.7085327506065369, + "765": 0.6976022124290466, + "766": 0.719830334186554, + "767": 0.700122594833374, + "768": 0.6259276270866394, + "769": 0.6756634712219238, + "770": 0.6431193947792053, + "771": 0.6716915369033813, + "772": 0.6533259153366089, + "773": 0.6473256945610046, + "774": 0.6735907196998596, + "775": 0.6981966495513916, + "776": 0.7065365314483643, + "777": 0.635334312915802, + "778": 0.6984379291534424, + "779": 0.6666753888130188, + "780": 0.6803655624389648, + "781": 0.6826092600822449, + "782": 0.662533164024353, + "783": 0.6707337498664856, + "784": 0.6607283353805542, + "785": 0.6483444571495056, + "786": 0.7350357174873352, + "787": 0.6047378182411194, + "788": 0.6693747639656067, + "789": 0.6867666244506836, + "790": 0.6892209649085999, + "791": 0.7210102677345276, + "792": 0.6311251521110535, + "793": 0.632441520690918, + "794": 0.6815193891525269, + "795": 0.6448667645454407, + "796": 0.6682194471359253, + "797": 0.6627563834190369, + "798": 0.6034180521965027, + "799": 0.6212030053138733, + "800": 0.6476743817329407, + "801": 0.6449694633483887, + "802": 0.6639724373817444, + "803": 0.6212056875228882, + "804": 0.6693212389945984, + "805": 0.6246767640113831, + "806": 0.6212588548660278, + "807": 0.6836837530136108, + "808": 0.6258649826049805, + "809": 0.6357522010803223 + }, + "loss": { + "756": 2.0989303588867188, + "757": 2.103926658630371, + "758": 2.120269775390625, + "759": 2.1012096405029297, + "760": 2.079378366470337, + "761": 2.121035099029541, + "762": 2.118846893310547, + "763": 2.1070432662963867, + "764": 2.1176600456237793, + "765": 2.0965466499328613, + "766": 2.115776538848877, + "767": 2.127931833267212, + "768": 2.1014981269836426, + "769": 2.083810806274414, + "770": 2.1252427101135254, + "771": 2.1187243461608887, + "772": 2.120224952697754, + "773": 2.101409435272217, + "774": 2.110621929168701, + "775": 2.10959529876709, + "776": 2.1071276664733887, + "777": 2.112398624420166, + "778": 2.0872693061828613, + "779": 2.108560562133789, + "780": 2.0847537517547607, + "781": 2.0925183296203613, + "782": 2.112980365753174, + "783": 2.1092426776885986, + "784": 2.1252660751342773, + "785": 2.0984835624694824, + "786": 2.101619243621826, + "787": 2.1003973484039307, + "788": 2.1352851390838623, + "789": 2.102938652038574, + "790": 2.100388526916504, + "791": 2.1139585971832275, + "792": 2.109753370285034, + "793": 2.099212884902954, + "794": 2.0989956855773926, + "795": 2.116032361984253, + "796": 2.0871188640594482, + "797": 2.1162281036376953, + "798": 2.102323055267334, + "799": 2.0910918712615967, + "800": 2.1121444702148438, + "801": 2.1059091091156006, + "802": 2.0819010734558105, + "803": 2.0846638679504395, + "804": 2.0995519161224365, + "805": 2.098008155822754, + "806": 2.0978121757507324, + "807": 2.107565402984619, + "808": 2.0821597576141357, + "809": 2.0969314575195312 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 0.0331992, + 0.0326103, + 0.0319664, + 0.0313739, + 0.0307511, + 0.0301353, + 0.0295304, + 0.0289168, + 0.0282998, + 0.0276885, + 0.0270705, + 0.0264623, + 0.0258634, + 0.0252387, + 0.0246322, + 0.0240125, + 0.0234015, + 0.0227876, + 0.0221693, + 0.0215511, + 0.0209356, + 0.0203308, + 0.0197076, + 0.0190973, + 0.0184805, + 0.0178656, + 0.017253, + 0.0166371, + 0.016023, + 0.0154083, + 0.0147867, + 0.01418, + 0.0135609, + 0.0129441, + 0.0123285, + 0.0117115, + 0.0110995, + 0.0104833, + 0.00986576, + 0.00925084, + 0.00863399, + 0.00801801, + 0.00740266, + 0.00678587, + 0.00616906, + 0.00555251, + 0.00493569, + 0.00431926, + 0.00370224, + 0.00308553, + 0.00246858, + 0.00185147, + 0.00123442, + 0.000617247 + ], + "train_epoch_time": 4.842392444610596, + "train_loss": 2.099011701490821, + "train_score": 0.37810930776459245, + "val_loss": 2.1825786824888804, + "val_score": 0.3543071898404 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:23:40.336308", + "final_model_norm": 87.6263427734375, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:21:58.820654", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.1, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 2.7014002799987793, + "learning_rate": 1.0000000000000001e-11, + "model_norm": 87.42987823486328, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 9.138082504272461, + "3": 5.208849906921387, + "4": 3.945364475250244, + "5": 3.708055019378662, + "6": 6.398460865020752, + "7": 16.664430618286133, + "8": 10.699326515197754, + "9": 5.082013130187988, + "10": 4.138754844665527, + "11": 4.742880344390869, + "12": 7.428307056427002, + "13": 6.356147766113281, + "14": 13.46587085723877, + "15": 2.582383871078491, + "16": 3.6345374584198, + "17": 56.4307861328125, + "18": 3.446540117263794, + "19": 5.41510534286499, + "20": 5.19655179977417, + "21": 8.504855155944824, + "22": 3.3510003089904785, + "23": 48.02460479736328, + "24": 4.429149150848389, + "25": 6.106604099273682, + "26": 4.597447395324707, + "27": 3.7286911010742188, + "28": 4.605628967285156, + "29": 3.0380897521972656, + "30": 5.887907028198242, + "31": 2.898404121398926, + "32": 3.8490893840789795, + "33": 2.5686304569244385, + "34": 4.083178520202637, + "35": 4.642446994781494, + "36": 4.855794429779053, + "37": 2.6388304233551025, + "38": 3.178757905960083, + "39": 3.434710741043091, + "40": 3.305979013442993, + "41": 3.1685378551483154, + "42": 2.536409378051758, + "43": 2.8201422691345215, + "44": 2.798776865005493, + "45": 2.7589268684387207, + "46": 2.7870497703552246, + "47": 2.6443395614624023, + "48": 2.780182123184204, + "49": 2.5823721885681152, + "50": 1.9752962589263916, + "51": 2.321368455886841, + "52": 3.3420050144195557, + "53": 2.7014002799987793 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.9599180221557617, + "3": 3.7437262535095215, + "4": 3.5936968326568604, + "5": 3.540834903717041, + "6": 3.4970359802246094, + "7": 3.7328438758850098, + "8": 4.164545059204102, + "9": 3.5597293376922607, + "10": 3.3924622535705566, + "11": 3.439350128173828, + "12": 3.373410701751709, + "13": 3.5318424701690674, + "14": 3.3596181869506836, + "15": 3.2627570629119873, + "16": 3.201747179031372, + "17": 3.860020637512207, + "18": 3.183465003967285, + "19": 3.1514649391174316, + "20": 3.1900830268859863, + "21": 3.2659409046173096, + "22": 2.9773917198181152, + "23": 4.214755535125732, + "24": 2.9944722652435303, + "25": 3.2068495750427246, + "26": 3.121352195739746, + "27": 2.948528289794922, + "28": 2.9719438552856445, + "29": 2.868144989013672, + "30": 2.9442081451416016, + "31": 2.866201877593994, + "32": 2.8892412185668945, + "33": 2.8153820037841797, + "34": 2.853144407272339, + "35": 2.9503626823425293, + "36": 3.1002845764160156, + "37": 2.7691664695739746, + "38": 2.807326316833496, + "39": 2.853483200073242, + "40": 2.817741870880127, + "41": 2.907651901245117, + "42": 2.7306299209594727, + "43": 2.809755563735962, + "44": 2.7774600982666016, + "45": 2.7897768020629883, + "46": 2.745231866836548, + "47": 2.8025074005126953, + "48": 2.712313413619995, + "49": 2.822211503982544, + "50": 2.6848225593566895, + "51": 2.700636863708496, + "52": 2.7530298233032227, + "53": 2.882366418838501 + }, + "lr": { + "0": 1.0000000000000001e-11, + "1": 0.0020000000098000003, + "2": 0.0040000000096000006, + "3": 0.0060000000094, + "4": 0.0080000000092, + "5": 0.010000000009, + "6": 0.0120000000088, + "7": 0.014000000008600001, + "8": 0.0160000000084, + "9": 0.018000000008200002, + "10": 0.020000000008000004, + "11": 0.022000000007800002, + "12": 0.0240000000076, + "13": 0.0260000000074, + "14": 0.0280000000072, + "15": 0.030000000007, + "16": 0.0320000000068, + "17": 0.034000000006599994, + "18": 0.0360000000064, + "19": 0.038000000006200005, + "20": 0.04000000000600001, + "21": 0.0420000000058, + "22": 0.044000000005600004, + "23": 0.046000000005400006, + "24": 0.0480000000052, + "25": 0.05000000000499999, + "26": 0.0520000000048, + "27": 0.05400000000460001, + "28": 0.0560000000044, + "29": 0.0580000000042, + "30": 0.060000000004, + "31": 0.06200000000380001, + "32": 0.0640000000036, + "33": 0.06600000000339999, + "34": 0.06800000000319999, + "35": 0.07000000000300001, + "36": 0.0720000000028, + "37": 0.0740000000026, + "38": 0.07600000000240001, + "39": 0.0780000000022, + "40": 0.08000000000200001, + "41": 0.08200000000180001, + "42": 0.0840000000016, + "43": 0.08600000000140001, + "44": 0.08800000000120001, + "45": 0.090000000001, + "46": 0.09200000000080001, + "47": 0.0940000000006, + "48": 0.0960000000004, + "49": 0.0980000000002, + "50": 0.1, + "51": 0.1, + "52": 0.1, + "53": 0.1 + } + }, + "step_size_list": [ + 1e-11, + 0.00178359, + 0.00383813, + 0.00587232, + 0.00786375, + 0.00980954, + 0.0112124, + 0.00920591, + 0.0131158, + 0.0168967, + 0.0190387, + 0.0205234, + 0.0200621, + 0.0226342, + 0.0159487, + 0.0291076, + 0.0300184, + 0.00226295, + 0.0337343, + 0.0322913, + 0.0342085, + 0.028667, + 0.0406289, + 0.00338587, + 0.0414784, + 0.0387384, + 0.0442154, + 0.0479015, + 0.0466727, + 0.0530492, + 0.0443379, + 0.0568359, + 0.0549786, + 0.0612622, + 0.0567291, + 0.055747, + 0.0565241, + 0.067701, + 0.0668558, + 0.0671696, + 0.0692549, + 0.0718312, + 0.0764364, + 0.0766684, + 0.0782855, + 0.0801583, + 0.0814046, + 0.0841337, + 0.0844485, + 0.0878307, + 0.0932259, + 0.0909283, + 0.0831359, + 0.0887635 + ], + "train_epoch_time": 4.842854261398315, + "train_loss": 2.6698590013867984, + "train_score": 0.2439685707027293, + "val_loss": 2.700050770346798, + "val_score": 0.2380704654275892 + }, + { + "epoch": 1, + "grad_norm": 1.7721285820007324, + "learning_rate": 0.1, + "model_norm": 87.4465103149414, + "step_logs": { + "grad_norm": { + "54": 1.965592622756958, + "55": 2.045867919921875, + "56": 2.3545002937316895, + "57": 2.237187623977661, + "58": 1.8276700973510742, + "59": 1.9295403957366943, + "60": 2.125190019607544, + "61": 2.9536099433898926, + "62": 2.3407278060913086, + "63": 1.7702795267105103, + "64": 1.8679486513137817, + "65": 2.803359031677246, + "66": 2.3363051414489746, + "67": 1.460381269454956, + "68": 1.6297032833099365, + "69": 2.2212111949920654, + "70": 2.2825076580047607, + "71": 2.4100303649902344, + "72": 2.243866205215454, + "73": 1.718739628791809, + "74": 1.857200026512146, + "75": 2.276668071746826, + "76": 2.097562789916992, + "77": 1.6709188222885132, + "78": 1.8551037311553955, + "79": 2.5150721073150635, + "80": 2.029993772506714, + "81": 1.244290828704834, + "82": 1.4587386846542358, + "83": 2.289640426635742, + "84": 2.1908371448516846, + "85": 1.6843329668045044, + "86": 1.716336727142334, + "87": 2.178058385848999, + "88": 1.9921116828918457, + "89": 1.5284374952316284, + "90": 1.803568959236145, + "91": 2.30283784866333, + "92": 2.015841484069824, + "93": 1.4610092639923096, + "94": 1.5536742210388184, + "95": 1.9504945278167725, + "96": 2.265597343444824, + "97": 2.180148124694824, + "98": 1.6076425313949585, + "99": 1.3786959648132324, + "100": 1.5268253087997437, + "101": 2.073302745819092, + "102": 2.0648410320281982, + "103": 1.7442132234573364, + "104": 1.7094382047653198, + "105": 1.555678129196167, + "106": 1.550230860710144, + "107": 1.7721285820007324 + }, + "loss": { + "54": 2.662569522857666, + "55": 2.6708662509918213, + "56": 2.645061492919922, + "57": 2.718660354614258, + "58": 2.633521318435669, + "59": 2.6217260360717773, + "60": 2.653656482696533, + "61": 2.696911096572876, + "62": 2.7883663177490234, + "63": 2.622990131378174, + "64": 2.6221072673797607, + "65": 2.6512012481689453, + "66": 2.76615834236145, + "67": 2.582188129425049, + "68": 2.585904121398926, + "69": 2.6308398246765137, + "70": 2.7060368061065674, + "71": 2.647770881652832, + "72": 2.708874225616455, + "73": 2.6287479400634766, + "74": 2.6195075511932373, + "75": 2.6110892295837402, + "76": 2.6809592247009277, + "77": 2.590961456298828, + "78": 2.6345272064208984, + "79": 2.626781463623047, + "80": 2.7129602432250977, + "81": 2.5486814975738525, + "82": 2.5531840324401855, + "83": 2.6010518074035645, + "84": 2.6774182319641113, + "85": 2.584240674972534, + "86": 2.594029188156128, + "87": 2.5646281242370605, + "88": 2.6595239639282227, + "89": 2.5524184703826904, + "90": 2.587097406387329, + "91": 2.602904796600342, + "92": 2.6498537063598633, + "93": 2.553509473800659, + "94": 2.564358711242676, + "95": 2.5807104110717773, + "96": 2.6363778114318848, + "97": 2.6329097747802734, + "98": 2.5728330612182617, + "99": 2.562537908554077, + "100": 2.5657973289489746, + "101": 2.579831600189209, + "102": 2.6441688537597656, + "103": 2.5644607543945312, + "104": 2.5824155807495117, + "105": 2.552157402038574, + "106": 2.5535764694213867, + "107": 2.552483081817627 + }, + "lr": { + "54": 0.1, + "55": 0.1, + "56": 0.1, + "57": 0.1, + "58": 0.1, + "59": 0.1, + "60": 0.1, + "61": 0.1, + "62": 0.1, + "63": 0.1, + "64": 0.1, + "65": 0.1, + "66": 0.1, + "67": 0.1, + "68": 0.1, + "69": 0.1, + "70": 0.1, + "71": 0.1, + "72": 0.1, + "73": 0.1, + "74": 0.1, + "75": 0.1, + "76": 0.1, + "77": 0.1, + "78": 0.1, + "79": 0.1, + "80": 0.1, + "81": 0.1, + "82": 0.1, + "83": 0.1, + "84": 0.1, + "85": 0.1, + "86": 0.1, + "87": 0.1, + "88": 0.1, + "89": 0.1, + "90": 0.1, + "91": 0.1, + "92": 0.1, + "93": 0.1, + "94": 0.1, + "95": 0.1, + "96": 0.1, + "97": 0.1, + "98": 0.1, + "99": 0.1, + "100": 0.1, + "101": 0.1, + "102": 0.1, + "103": 0.1, + "104": 0.1, + "105": 0.1, + "106": 0.1, + "107": 0.1 + } + }, + "step_size_list": [ + 0.0932355, + 0.0927337, + 0.0905147, + 0.091571, + 0.0940362, + 0.0933702, + 0.0921576, + 0.086078, + 0.0910541, + 0.0943629, + 0.0937616, + 0.0870919, + 0.0910198, + 0.0960341, + 0.0951154, + 0.0914271, + 0.091219, + 0.0901159, + 0.0914968, + 0.0946801, + 0.093823, + 0.0909708, + 0.0924167, + 0.0948875, + 0.0938691, + 0.0892534, + 0.0929413, + 0.0970522, + 0.0959995, + 0.090845, + 0.0917739, + 0.0947966, + 0.094627, + 0.0915342, + 0.0930571, + 0.095624, + 0.0940851, + 0.090755, + 0.0928784, + 0.0959881, + 0.0955049, + 0.0931351, + 0.0911288, + 0.0917211, + 0.0952175, + 0.0964238, + 0.0956546, + 0.0923096, + 0.0925393, + 0.0944005, + 0.0946451, + 0.0954733, + 0.0955059, + 0.0942048 + ], + "train_epoch_time": 4.839518308639526, + "train_loss": 2.58684149985676, + "train_score": 0.22548870155681325, + "val_loss": 2.622239226451834, + "val_score": 0.2215843856437335 + }, + { + "epoch": 2, + "grad_norm": 1.5319663286209106, + "learning_rate": 0.1, + "model_norm": 87.46024322509766, + "step_logs": { + "grad_norm": { + "108": 1.7222405672073364, + "109": 1.7508620023727417, + "110": 1.6630184650421143, + "111": 1.5427573919296265, + "112": 1.613377571105957, + "113": 1.6383252143859863, + "114": 1.6512948274612427, + "115": 1.6589441299438477, + "116": 1.666428565979004, + "117": 1.7925798892974854, + "118": 1.8243111371994019, + "119": 1.7605955600738525, + "120": 1.8316779136657715, + "121": 1.771317958831787, + "122": 1.799514651298523, + "123": 1.9349422454833984, + "124": 1.7726531028747559, + "125": 1.4533581733703613, + "126": 1.5049976110458374, + "127": 1.6781474351882935, + "128": 1.6998845338821411, + "129": 1.590766191482544, + "130": 1.7025343179702759, + "131": 1.8275318145751953, + "132": 1.8172345161437988, + "133": 1.591578722000122, + "134": 1.588600516319275, + "135": 1.678156852722168, + "136": 1.7168233394622803, + "137": 1.606050968170166, + "138": 1.5711708068847656, + "139": 1.653882622718811, + "140": 1.618391513824463, + "141": 1.7055188417434692, + "142": 1.6396424770355225, + "143": 1.4647818803787231, + "144": 1.417738914489746, + "145": 1.555832028388977, + "146": 1.672027587890625, + "147": 1.7019164562225342, + "148": 1.7026259899139404, + "149": 1.9225009679794312, + "150": 1.7311534881591797, + "151": 1.4409128427505493, + "152": 1.4107351303100586, + "153": 1.4797552824020386, + "154": 1.5716218948364258, + "155": 1.4705257415771484, + "156": 1.36956787109375, + "157": 1.4272806644439697, + "158": 1.42551589012146, + "159": 1.589177131652832, + "160": 1.6364829540252686, + "161": 1.5319663286209106 + }, + "loss": { + "108": 2.5938098430633545, + "109": 2.5373518466949463, + "110": 2.5869998931884766, + "111": 2.5499298572540283, + "112": 2.5595407485961914, + "113": 2.51430606842041, + "114": 2.590421676635742, + "115": 2.5450260639190674, + "116": 2.580981731414795, + "117": 2.535814046859741, + "118": 2.5899620056152344, + "119": 2.5390784740448, + "120": 2.5932376384735107, + "121": 2.5510153770446777, + "122": 2.5700395107269287, + "123": 2.563209056854248, + "124": 2.615906000137329, + "125": 2.53306245803833, + "126": 2.522599697113037, + "127": 2.527998924255371, + "128": 2.578871250152588, + "129": 2.544523239135742, + "130": 2.5542256832122803, + "131": 2.5489180088043213, + "132": 2.5765740871429443, + "133": 2.5239710807800293, + "134": 2.5494332313537598, + "135": 2.5302553176879883, + "136": 2.556654214859009, + "137": 2.539095878601074, + "138": 2.545562267303467, + "139": 2.538005828857422, + "140": 2.542201042175293, + "141": 2.5334973335266113, + "142": 2.5553481578826904, + "143": 2.524021863937378, + "144": 2.528252601623535, + "145": 2.5096750259399414, + "146": 2.537395477294922, + "147": 2.5060198307037354, + "148": 2.5493154525756836, + "149": 2.52825927734375, + "150": 2.5793089866638184, + "151": 2.5261688232421875, + "152": 2.5134482383728027, + "153": 2.4945778846740723, + "154": 2.523097515106201, + "155": 2.526151657104492, + "156": 2.515076160430908, + "157": 2.529634475708008, + "158": 2.515226125717163, + "159": 2.523855686187744, + "160": 2.5593619346618652, + "161": 2.5114336013793945 + }, + "lr": { + "108": 0.1, + "109": 0.1, + "110": 0.1, + "111": 0.1, + "112": 0.1, + "113": 0.1, + "114": 0.1, + "115": 0.1, + "116": 0.1, + "117": 0.1, + "118": 0.1, + "119": 0.1, + "120": 0.1, + "121": 0.1, + "122": 0.1, + "123": 0.1, + "124": 0.1, + "125": 0.1, + "126": 0.1, + "127": 0.1, + "128": 0.1, + "129": 0.1, + "130": 0.1, + "131": 0.1, + "132": 0.1, + "133": 0.1, + "134": 0.1, + "135": 0.1, + "136": 0.1, + "137": 0.1, + "138": 0.1, + "139": 0.1, + "140": 0.1, + "141": 0.1, + "142": 0.1, + "143": 0.1, + "144": 0.1, + "145": 0.1, + "146": 0.1, + "147": 0.1, + "148": 0.1, + "149": 0.1, + "150": 0.1, + "151": 0.1, + "152": 0.1, + "153": 0.1, + "154": 0.1, + "155": 0.1, + "156": 0.1, + "157": 0.1, + "158": 0.1, + "159": 0.1, + "160": 0.1, + "161": 0.1 + } + }, + "step_size_list": [ + 0.0945916, + 0.0943033, + 0.094926, + 0.0955411, + 0.0951612, + 0.0949328, + 0.095, + 0.0948705, + 0.0948949, + 0.0940416, + 0.0939629, + 0.0942472, + 0.0939242, + 0.0942066, + 0.0940734, + 0.0931937, + 0.0943342, + 0.0959975, + 0.0957034, + 0.0947239, + 0.0946948, + 0.095263, + 0.0946305, + 0.0938513, + 0.0939775, + 0.0952217, + 0.095284, + 0.0947283, + 0.0945498, + 0.0951662, + 0.0953754, + 0.0948868, + 0.0951009, + 0.094571, + 0.0950025, + 0.095923, + 0.0961769, + 0.0953993, + 0.0947787, + 0.0945366, + 0.0946202, + 0.0931885, + 0.0945095, + 0.0960528, + 0.0961917, + 0.0957957, + 0.0953336, + 0.0958956, + 0.0964051, + 0.0961293, + 0.0961173, + 0.0952352, + 0.0950282, + 0.0955361 + ], + "train_epoch_time": 4.8396055698394775, + "train_loss": 2.531406187771725, + "train_score": 0.24703528509506023, + "val_loss": 2.5778532852124676, + "val_score": 0.24431777441816407 + }, + { + "epoch": 3, + "grad_norm": 1.6726247072219849, + "learning_rate": 0.1, + "model_norm": 87.47252655029297, + "step_logs": { + "grad_norm": { + "162": 1.6153781414031982, + "163": 1.673122525215149, + "164": 1.7028475999832153, + "165": 1.6289361715316772, + "166": 1.5314459800720215, + "167": 1.3829418420791626, + "168": 1.3458621501922607, + "169": 1.4614267349243164, + "170": 1.490400791168213, + "171": 1.575042963027954, + "172": 1.655622959136963, + "173": 1.4333730936050415, + "174": 1.4942058324813843, + "175": 1.755441427230835, + "176": 1.8549357652664185, + "177": 1.8647880554199219, + "178": 1.6314679384231567, + "179": 1.4605293273925781, + "180": 1.4982885122299194, + "181": 1.5453455448150635, + "182": 1.5823681354522705, + "183": 1.4793906211853027, + "184": 1.3944069147109985, + "185": 1.5388894081115723, + "186": 1.4895504713058472, + "187": 1.3919439315795898, + "188": 1.5177021026611328, + "189": 1.5192327499389648, + "190": 1.4143918752670288, + "191": 1.3188138008117676, + "192": 1.3198351860046387, + "193": 1.3574910163879395, + "194": 1.2956126928329468, + "195": 1.2524044513702393, + "196": 1.2873085737228394, + "197": 1.3792039155960083, + "198": 1.3747633695602417, + "199": 1.368151307106018, + "200": 1.3862868547439575, + "201": 1.536097526550293, + "202": 1.4076716899871826, + "203": 1.2324305772781372, + "204": 1.3698409795761108, + "205": 1.6835252046585083, + "206": 1.6933979988098145, + "207": 1.6048285961151123, + "208": 1.4945200681686401, + "209": 1.3977588415145874, + "210": 1.3661211729049683, + "211": 1.3046761751174927, + "212": 1.3673046827316284, + "213": 1.5285789966583252, + "214": 1.8321200609207153, + "215": 1.6726247072219849 + }, + "loss": { + "162": 2.5297117233276367, + "163": 2.5398120880126953, + "164": 2.5312044620513916, + "165": 2.5157523155212402, + "166": 2.53338885307312, + "167": 2.531099796295166, + "168": 2.4975533485412598, + "169": 2.490152597427368, + "170": 2.528290033340454, + "171": 2.500814914703369, + "172": 2.5340723991394043, + "173": 2.482034683227539, + "174": 2.527773857116699, + "175": 2.4900028705596924, + "176": 2.5368003845214844, + "177": 2.526069164276123, + "178": 2.542509078979492, + "179": 2.510859966278076, + "180": 2.500474214553833, + "181": 2.5281267166137695, + "182": 2.512002944946289, + "183": 2.5041253566741943, + "184": 2.505188465118408, + "185": 2.5058794021606445, + "186": 2.519810199737549, + "187": 2.5065155029296875, + "188": 2.5223541259765625, + "189": 2.510152816772461, + "190": 2.5078835487365723, + "191": 2.4967923164367676, + "192": 2.4992456436157227, + "193": 2.4970943927764893, + "194": 2.484459638595581, + "195": 2.4877758026123047, + "196": 2.4717559814453125, + "197": 2.4962782859802246, + "198": 2.4987924098968506, + "199": 2.4934144020080566, + "200": 2.4905261993408203, + "201": 2.486286163330078, + "202": 2.51426362991333, + "203": 2.4798288345336914, + "204": 2.4783501625061035, + "205": 2.5024008750915527, + "206": 2.535660982131958, + "207": 2.4954142570495605, + "208": 2.501339912414551, + "209": 2.503080368041992, + "210": 2.5021119117736816, + "211": 2.4640250205993652, + "212": 2.4925060272216797, + "213": 2.490812301635742, + "214": 2.531944513320923, + "215": 2.521343469619751 + }, + "lr": { + "162": 0.1, + "163": 0.1, + "164": 0.1, + "165": 0.1, + "166": 0.1, + "167": 0.1, + "168": 0.1, + "169": 0.1, + "170": 0.1, + "171": 0.1, + "172": 0.1, + "173": 0.1, + "174": 0.1, + "175": 0.1, + "176": 0.1, + "177": 0.1, + "178": 0.1, + "179": 0.1, + "180": 0.1, + "181": 0.1, + "182": 0.1, + "183": 0.1, + "184": 0.1, + "185": 0.1, + "186": 0.1, + "187": 0.1, + "188": 0.1, + "189": 0.1, + "190": 0.1, + "191": 0.1, + "192": 0.1, + "193": 0.1, + "194": 0.1, + "195": 0.1, + "196": 0.1, + "197": 0.1, + "198": 0.1, + "199": 0.1, + "200": 0.1, + "201": 0.1, + "202": 0.1, + "203": 0.1, + "204": 0.1, + "205": 0.1, + "206": 0.1, + "207": 0.1, + "208": 0.1, + "209": 0.1, + "210": 0.1, + "211": 0.1, + "212": 0.1, + "213": 0.1, + "214": 0.1, + "215": 0.1 + } + }, + "step_size_list": [ + 0.0950954, + 0.0947769, + 0.0945824, + 0.0949905, + 0.095576, + 0.0963595, + 0.0965007, + 0.0958879, + 0.095792, + 0.0952745, + 0.094869, + 0.0960256, + 0.0957705, + 0.0941727, + 0.093649, + 0.0935602, + 0.095026, + 0.0959252, + 0.095704, + 0.09549, + 0.0952527, + 0.095813, + 0.0962643, + 0.095488, + 0.095783, + 0.0962789, + 0.0956334, + 0.0956046, + 0.0961645, + 0.0966342, + 0.0966324, + 0.0964415, + 0.0967322, + 0.0969439, + 0.0967565, + 0.0963298, + 0.096356, + 0.0963822, + 0.0962851, + 0.0954698, + 0.0962088, + 0.0970285, + 0.0963524, + 0.0946404, + 0.0946481, + 0.0950928, + 0.095726, + 0.0962439, + 0.0964047, + 0.0966613, + 0.0963853, + 0.0955198, + 0.0937834, + 0.0947436 + ], + "train_epoch_time": 4.84104323387146, + "train_loss": 2.494138606480581, + "train_score": 0.26137800383430987, + "val_loss": 2.54036904010926, + "val_score": 0.25223342436189355 + }, + { + "epoch": 4, + "grad_norm": 1.2146599292755127, + "learning_rate": 0.1, + "model_norm": 87.48461151123047, + "step_logs": { + "grad_norm": { + "216": 1.3636150360107422, + "217": 1.4804573059082031, + "218": 1.5302199125289917, + "219": 1.4577096700668335, + "220": 1.3446636199951172, + "221": 1.3021408319473267, + "222": 1.3390588760375977, + "223": 1.4846988916397095, + "224": 1.5785391330718994, + "225": 1.5727964639663696, + "226": 1.4491405487060547, + "227": 1.340945839881897, + "228": 1.2831707000732422, + "229": 1.3626240491867065, + "230": 1.3536001443862915, + "231": 1.2594223022460938, + "232": 1.3504176139831543, + "233": 1.4755704402923584, + "234": 1.4534399509429932, + "235": 1.4111806154251099, + "236": 1.4107003211975098, + "237": 1.3766318559646606, + "238": 1.367026925086975, + "239": 1.4091522693634033, + "240": 1.416739821434021, + "241": 1.3254085779190063, + "242": 1.3123525381088257, + "243": 1.2553538084030151, + "244": 1.2974796295166016, + "245": 1.4017691612243652, + "246": 1.4274452924728394, + "247": 1.4385408163070679, + "248": 1.4232410192489624, + "249": 1.346845269203186, + "250": 1.4480693340301514, + "251": 1.5677980184555054, + "252": 1.4672484397888184, + "253": 1.4176898002624512, + "254": 1.4086984395980835, + "255": 1.716362476348877, + "256": 1.7754286527633667, + "257": 1.4389187097549438, + "258": 1.3786344528198242, + "259": 1.4783419370651245, + "260": 1.510939121246338, + "261": 1.4312331676483154, + "262": 1.4567210674285889, + "263": 1.563134789466858, + "264": 1.646837830543518, + "265": 1.7298420667648315, + "266": 1.6760711669921875, + "267": 1.4475209712982178, + "268": 1.4026678800582886, + "269": 1.2146599292755127 + }, + "loss": { + "216": 2.4945056438446045, + "217": 2.476877212524414, + "218": 2.4778897762298584, + "219": 2.5171587467193604, + "220": 2.506223678588867, + "221": 2.4749577045440674, + "222": 2.499462604522705, + "223": 2.4800853729248047, + "224": 2.521491527557373, + "225": 2.4830408096313477, + "226": 2.501441478729248, + "227": 2.4814603328704834, + "228": 2.4833438396453857, + "229": 2.4706075191497803, + "230": 2.4815661907196045, + "231": 2.476168632507324, + "232": 2.458076000213623, + "233": 2.4829487800598145, + "234": 2.4967284202575684, + "235": 2.47981595993042, + "236": 2.500000476837158, + "237": 2.490534782409668, + "238": 2.477534770965576, + "239": 2.4788246154785156, + "240": 2.491225242614746, + "241": 2.4874119758605957, + "242": 2.4652035236358643, + "243": 2.4688680171966553, + "244": 2.4687812328338623, + "245": 2.467210292816162, + "246": 2.4625253677368164, + "247": 2.4746756553649902, + "248": 2.4987213611602783, + "249": 2.468862295150757, + "250": 2.4748666286468506, + "251": 2.4936366081237793, + "252": 2.50899600982666, + "253": 2.471942901611328, + "254": 2.4647915363311768, + "255": 2.472774028778076, + "256": 2.5127928256988525, + "257": 2.471195697784424, + "258": 2.482217311859131, + "259": 2.4569308757781982, + "260": 2.4877946376800537, + "261": 2.4683563709259033, + "262": 2.462571144104004, + "263": 2.486539840698242, + "264": 2.5109944343566895, + "265": 2.4866538047790527, + "266": 2.493426561355591, + "267": 2.471728563308716, + "268": 2.4728283882141113, + "269": 2.4548778533935547 + }, + "lr": { + "216": 0.1, + "217": 0.1, + "218": 0.1, + "219": 0.1, + "220": 0.1, + "221": 0.1, + "222": 0.1, + "223": 0.1, + "224": 0.1, + "225": 0.1, + "226": 0.1, + "227": 0.1, + "228": 0.1, + "229": 0.1, + "230": 0.1, + "231": 0.1, + "232": 0.1, + "233": 0.1, + "234": 0.1, + "235": 0.1, + "236": 0.1, + "237": 0.1, + "238": 0.1, + "239": 0.1, + "240": 0.1, + "241": 0.1, + "242": 0.1, + "243": 0.1, + "244": 0.1, + "245": 0.1, + "246": 0.1, + "247": 0.1, + "248": 0.1, + "249": 0.1, + "250": 0.1, + "251": 0.1, + "252": 0.1, + "253": 0.1, + "254": 0.1, + "255": 0.1, + "256": 0.1, + "257": 0.1, + "258": 0.1, + "259": 0.1, + "260": 0.1, + "261": 0.1, + "262": 0.1, + "263": 0.1, + "264": 0.1, + "265": 0.1, + "266": 0.1, + "267": 0.1, + "268": 0.1, + "269": 0.1 + } + }, + "step_size_list": [ + 0.0964068, + 0.095763, + 0.0954882, + 0.0959501, + 0.0965183, + 0.096688, + 0.0965373, + 0.095745, + 0.0952916, + 0.0952552, + 0.0959715, + 0.0965035, + 0.0967912, + 0.0963784, + 0.0964397, + 0.0968966, + 0.0964232, + 0.0957996, + 0.0959412, + 0.0961397, + 0.0961722, + 0.0963348, + 0.0963657, + 0.0961489, + 0.0961276, + 0.0965893, + 0.0966248, + 0.0969071, + 0.0967029, + 0.0961704, + 0.0960272, + 0.0959866, + 0.0961046, + 0.0964564, + 0.0959358, + 0.095303, + 0.0958863, + 0.0960935, + 0.0961302, + 0.0943782, + 0.094098, + 0.0959792, + 0.0963127, + 0.0957418, + 0.095613, + 0.0960159, + 0.0958694, + 0.0953169, + 0.0948763, + 0.0943246, + 0.0946672, + 0.0959338, + 0.096174, + 0.0970826 + ], + "train_epoch_time": 4.841325998306274, + "train_loss": 2.4533854401778625, + "train_score": 0.27739979369910583, + "val_loss": 2.50115905016079, + "val_score": 0.2703923287402612 + }, + { + "epoch": 5, + "grad_norm": 1.604719638824463, + "learning_rate": 0.1, + "model_norm": 87.49969482421875, + "step_logs": { + "grad_norm": { + "270": 1.1493749618530273, + "271": 1.1330543756484985, + "272": 1.237888216972351, + "273": 1.3979982137680054, + "274": 1.4486361742019653, + "275": 1.462369441986084, + "276": 1.534291386604309, + "277": 1.5890707969665527, + "278": 1.4712353944778442, + "279": 1.287907600402832, + "280": 1.2565165758132935, + "281": 1.4038207530975342, + "282": 1.5695432424545288, + "283": 1.639522910118103, + "284": 1.6006356477737427, + "285": 1.3548380136489868, + "286": 1.2830750942230225, + "287": 1.3571367263793945, + "288": 1.2246315479278564, + "289": 1.120367407798767, + "290": 1.1854363679885864, + "291": 1.3918384313583374, + "292": 1.5270098447799683, + "293": 1.581470251083374, + "294": 1.4569016695022583, + "295": 1.372684121131897, + "296": 1.5808931589126587, + "297": 1.7226909399032593, + "298": 1.5298813581466675, + "299": 1.496250033378601, + "300": 1.6258769035339355, + "301": 1.7273105382919312, + "302": 1.6980730295181274, + "303": 1.6322118043899536, + "304": 1.6293002367019653, + "305": 1.4697439670562744, + "306": 1.426142692565918, + "307": 1.7037168741226196, + "308": 1.7753783464431763, + "309": 1.8286687135696411, + "310": 1.5889177322387695, + "311": 1.6861062049865723, + "312": 1.6601293087005615, + "313": 1.6899635791778564, + "314": 1.6301934719085693, + "315": 1.5262844562530518, + "316": 1.5647791624069214, + "317": 1.733574628829956, + "318": 1.7964720726013184, + "319": 1.6857889890670776, + "320": 1.6071444749832153, + "321": 1.5039522647857666, + "322": 1.45350182056427, + "323": 1.604719638824463 + }, + "loss": { + "270": 2.459867000579834, + "271": 2.4469497203826904, + "272": 2.4497859477996826, + "273": 2.4574506282806396, + "274": 2.4623823165893555, + "275": 2.4494693279266357, + "276": 2.4682140350341797, + "277": 2.4670796394348145, + "278": 2.46246075630188, + "279": 2.457047939300537, + "280": 2.436194658279419, + "281": 2.4511539936065674, + "282": 2.471869945526123, + "283": 2.490079879760742, + "284": 2.461785078048706, + "285": 2.449409246444702, + "286": 2.4527413845062256, + "287": 2.4596033096313477, + "288": 2.445344924926758, + "289": 2.424729824066162, + "290": 2.430593967437744, + "291": 2.4237821102142334, + "292": 2.4400696754455566, + "293": 2.438300371170044, + "294": 2.4570446014404297, + "295": 2.4188010692596436, + "296": 2.452712059020996, + "297": 2.454869508743286, + "298": 2.459745407104492, + "299": 2.4303040504455566, + "300": 2.4442968368530273, + "301": 2.4529972076416016, + "302": 2.472748279571533, + "303": 2.4257872104644775, + "304": 2.448838710784912, + "305": 2.4362494945526123, + "306": 2.427426338195801, + "307": 2.4044203758239746, + "308": 2.4801931381225586, + "309": 2.46386456489563, + "310": 2.4611735343933105, + "311": 2.4443514347076416, + "312": 2.441993236541748, + "313": 2.433992624282837, + "314": 2.438014030456543, + "315": 2.411799907684326, + "316": 2.3855862617492676, + "317": 2.4409897327423096, + "318": 2.4333548545837402, + "319": 2.424011707305908, + "320": 2.4327902793884277, + "321": 2.4080169200897217, + "322": 2.3980765342712402, + "323": 2.4144649505615234 + }, + "lr": { + "270": 0.1, + "271": 0.1, + "272": 0.1, + "273": 0.1, + "274": 0.1, + "275": 0.1, + "276": 0.1, + "277": 0.1, + "278": 0.1, + "279": 0.1, + "280": 0.1, + "281": 0.1, + "282": 0.1, + "283": 0.1, + "284": 0.1, + "285": 0.1, + "286": 0.1, + "287": 0.1, + "288": 0.1, + "289": 0.1, + "290": 0.1, + "291": 0.1, + "292": 0.1, + "293": 0.1, + "294": 0.1, + "295": 0.1, + "296": 0.1, + "297": 0.1, + "298": 0.1, + "299": 0.1, + "300": 0.1, + "301": 0.1, + "302": 0.1, + "303": 0.1, + "304": 0.1, + "305": 0.1, + "306": 0.1, + "307": 0.1, + "308": 0.1, + "309": 0.1, + "310": 0.1, + "311": 0.1, + "312": 0.1, + "313": 0.1, + "314": 0.1, + "315": 0.1, + "316": 0.1, + "317": 0.1, + "318": 0.1, + "319": 0.1, + "320": 0.1, + "321": 0.1, + "322": 0.1, + "323": 0.1 + } + }, + "step_size_list": [ + 0.097385, + 0.0974438, + 0.0969673, + 0.0961756, + 0.095913, + 0.0958173, + 0.0954483, + 0.0951315, + 0.09579, + 0.0967348, + 0.0968613, + 0.0961354, + 0.0952535, + 0.0948789, + 0.0950538, + 0.0963883, + 0.096753, + 0.096391, + 0.0970248, + 0.0974769, + 0.0971904, + 0.0961573, + 0.0954398, + 0.0951215, + 0.0958595, + 0.096251, + 0.0951522, + 0.0943001, + 0.0954584, + 0.0955969, + 0.09487, + 0.0942671, + 0.0944908, + 0.0947946, + 0.0948585, + 0.0957549, + 0.0959791, + 0.0943075, + 0.0940254, + 0.0936451, + 0.0951213, + 0.0945043, + 0.0946584, + 0.0944583, + 0.0948315, + 0.095393, + 0.0951186, + 0.0942011, + 0.093781, + 0.0944626, + 0.0949591, + 0.0955141, + 0.0957809, + 0.0949373 + ], + "train_epoch_time": 4.8408050537109375, + "train_loss": 2.421827687697226, + "train_score": 0.2904624732016149, + "val_loss": 2.4764338429294415, + "val_score": 0.279689294051905 + }, + { + "epoch": 6, + "grad_norm": 1.850106954574585, + "learning_rate": 0.1, + "model_norm": 87.51580810546875, + "step_logs": { + "grad_norm": { + "324": 1.70496666431427, + "325": 1.5352458953857422, + "326": 1.5272716283798218, + "327": 1.486746907234192, + "328": 1.3978716135025024, + "329": 1.3832483291625977, + "330": 1.4204920530319214, + "331": 1.5262407064437866, + "332": 1.687807321548462, + "333": 1.6875107288360596, + "334": 1.487512469291687, + "335": 1.3805464506149292, + "336": 1.3373854160308838, + "337": 1.392936110496521, + "338": 1.544201135635376, + "339": 1.7049697637557983, + "340": 1.7199921607971191, + "341": 1.566656470298767, + "342": 1.4393539428710938, + "343": 1.4776968955993652, + "344": 1.5886560678482056, + "345": 1.5943487882614136, + "346": 1.5563284158706665, + "347": 1.4807718992233276, + "348": 1.4561889171600342, + "349": 1.3084001541137695, + "350": 1.102138638496399, + "351": 1.1093521118164062, + "352": 1.2013018131256104, + "353": 1.3223005533218384, + "354": 1.4350271224975586, + "355": 1.4879282712936401, + "356": 1.5674432516098022, + "357": 1.6384798288345337, + "358": 1.6519635915756226, + "359": 1.6509078741073608, + "360": 1.581932783126831, + "361": 1.5987820625305176, + "362": 1.4949012994766235, + "363": 1.448889136314392, + "364": 1.5567195415496826, + "365": 1.5549774169921875, + "366": 1.3681429624557495, + "367": 1.3798903226852417, + "368": 1.302626609802246, + "369": 1.2896926403045654, + "370": 1.2533292770385742, + "371": 1.1450278759002686, + "372": 1.140235424041748, + "373": 1.2935467958450317, + "374": 1.5056853294372559, + "375": 1.6462469100952148, + "376": 1.8939093351364136, + "377": 1.850106954574585 + }, + "loss": { + "324": 2.4293105602264404, + "325": 2.4545586109161377, + "326": 2.39117431640625, + "327": 2.425450325012207, + "328": 2.395688533782959, + "329": 2.403061866760254, + "330": 2.3921380043029785, + "331": 2.4047253131866455, + "332": 2.391915798187256, + "333": 2.4117398262023926, + "334": 2.3873023986816406, + "335": 2.418776273727417, + "336": 2.3815364837646484, + "337": 2.3877670764923096, + "338": 2.431896686553955, + "339": 2.41985821723938, + "340": 2.4127912521362305, + "341": 2.4208478927612305, + "342": 2.369502544403076, + "343": 2.383650779724121, + "344": 2.3845064640045166, + "345": 2.4073538780212402, + "346": 2.3645572662353516, + "347": 2.3936514854431152, + "348": 2.38271164894104, + "349": 2.3873305320739746, + "350": 2.3465943336486816, + "351": 2.379585027694702, + "352": 2.3605422973632812, + "353": 2.3752424716949463, + "354": 2.375164031982422, + "355": 2.3937907218933105, + "356": 2.3961539268493652, + "357": 2.405888557434082, + "358": 2.401010751724243, + "359": 2.384521007537842, + "360": 2.3716607093811035, + "361": 2.3785009384155273, + "362": 2.375952959060669, + "363": 2.3757760524749756, + "364": 2.3742928504943848, + "365": 2.3953442573547363, + "366": 2.347576141357422, + "367": 2.382486343383789, + "368": 2.3730404376983643, + "369": 2.368624210357666, + "370": 2.371485710144043, + "371": 2.3564565181732178, + "372": 2.3519186973571777, + "373": 2.3728907108306885, + "374": 2.36344838142395, + "375": 2.369312286376953, + "376": 2.397822380065918, + "377": 2.4184350967407227 + }, + "lr": { + "324": 0.1, + "325": 0.1, + "326": 0.1, + "327": 0.1, + "328": 0.1, + "329": 0.1, + "330": 0.1, + "331": 0.1, + "332": 0.1, + "333": 0.1, + "334": 0.1, + "335": 0.1, + "336": 0.1, + "337": 0.1, + "338": 0.1, + "339": 0.1, + "340": 0.1, + "341": 0.1, + "342": 0.1, + "343": 0.1, + "344": 0.1, + "345": 0.1, + "346": 0.1, + "347": 0.1, + "348": 0.1, + "349": 0.1, + "350": 0.1, + "351": 0.1, + "352": 0.1, + "353": 0.1, + "354": 0.1, + "355": 0.1, + "356": 0.1, + "357": 0.1, + "358": 0.1, + "359": 0.1, + "360": 0.1, + "361": 0.1, + "362": 0.1, + "363": 0.1, + "364": 0.1, + "365": 0.1, + "366": 0.1, + "367": 0.1, + "368": 0.1, + "369": 0.1, + "370": 0.1, + "371": 0.1, + "372": 0.1, + "373": 0.1, + "374": 0.1, + "375": 0.1, + "376": 0.1, + "377": 0.1 + } + }, + "step_size_list": [ + 0.0943548, + 0.0954187, + 0.0953494, + 0.0956419, + 0.0960815, + 0.0961713, + 0.0959531, + 0.0953804, + 0.0943798, + 0.0944253, + 0.095571, + 0.0962095, + 0.0963808, + 0.0960957, + 0.0953265, + 0.0943339, + 0.0942235, + 0.0951753, + 0.0958114, + 0.0956203, + 0.0949739, + 0.0949852, + 0.0951277, + 0.0956204, + 0.0957398, + 0.0965387, + 0.0974771, + 0.0974793, + 0.0970339, + 0.09645, + 0.0958451, + 0.0955801, + 0.0951233, + 0.0947156, + 0.0946226, + 0.094594, + 0.0949885, + 0.0949007, + 0.0955084, + 0.0957689, + 0.0951444, + 0.0951953, + 0.0961662, + 0.0961575, + 0.0965482, + 0.096608, + 0.0967943, + 0.0972934, + 0.0973104, + 0.0965943, + 0.0954234, + 0.0945902, + 0.093041, + 0.093391 + ], + "train_epoch_time": 4.840641260147095, + "train_loss": 2.380497599674263, + "train_score": 0.29963795729996995, + "val_loss": 2.4356343497091264, + "val_score": 0.2938567378422423 + }, + { + "epoch": 7, + "grad_norm": 1.6480035781860352, + "learning_rate": 0.1, + "model_norm": 87.53150939941406, + "step_logs": { + "grad_norm": { + "378": 1.5266987085342407, + "379": 1.3259601593017578, + "380": 1.3328115940093994, + "381": 1.485754370689392, + "382": 1.7592358589172363, + "383": 1.7902156114578247, + "384": 1.8240782022476196, + "385": 1.6953977346420288, + "386": 1.4446179866790771, + "387": 1.3356860876083374, + "388": 1.3338754177093506, + "389": 1.4443869590759277, + "390": 1.5102128982543945, + "391": 1.5769864320755005, + "392": 1.609325647354126, + "393": 1.524599313735962, + "394": 1.1873465776443481, + "395": 0.9014818072319031, + "396": 0.9501115679740906, + "397": 1.0860326290130615, + "398": 1.4255868196487427, + "399": 1.672377586364746, + "400": 1.8315880298614502, + "401": 1.6081513166427612, + "402": 1.409303069114685, + "403": 1.4617589712142944, + "404": 1.4337639808654785, + "405": 1.4396882057189941, + "406": 1.564761996269226, + "407": 1.8757449388504028, + "408": 1.6897828578948975, + "409": 1.6864510774612427, + "410": 1.6438335180282593, + "411": 1.4547008275985718, + "412": 1.3749847412109375, + "413": 1.5135363340377808, + "414": 1.5014058351516724, + "415": 1.7297778129577637, + "416": 1.6596649885177612, + "417": 1.5333251953125, + "418": 1.5313479900360107, + "419": 1.69602370262146, + "420": 1.6985406875610352, + "421": 1.5260009765625, + "422": 1.3236879110336304, + "423": 1.3171796798706055, + "424": 1.4673880338668823, + "425": 1.4992319345474243, + "426": 1.3739311695098877, + "427": 1.3212213516235352, + "428": 1.5277010202407837, + "429": 1.633017659187317, + "430": 1.7155961990356445, + "431": 1.6480035781860352 + }, + "loss": { + "378": 2.3986642360687256, + "379": 2.366818904876709, + "380": 2.35683536529541, + "381": 2.360246181488037, + "382": 2.3710055351257324, + "383": 2.4133899211883545, + "384": 2.3755006790161133, + "385": 2.3968124389648438, + "386": 2.363870143890381, + "387": 2.344353199005127, + "388": 2.320923089981079, + "389": 2.361589193344116, + "390": 2.3946704864501953, + "391": 2.3677263259887695, + "392": 2.3542699813842773, + "393": 2.3507258892059326, + "394": 2.3582303524017334, + "395": 2.3268086910247803, + "396": 2.3252711296081543, + "397": 2.3379464149475098, + "398": 2.3369557857513428, + "399": 2.3873050212860107, + "400": 2.3689322471618652, + "401": 2.396152973175049, + "402": 2.3575778007507324, + "403": 2.369710683822632, + "404": 2.3685595989227295, + "405": 2.3393683433532715, + "406": 2.346273422241211, + "407": 2.3763089179992676, + "408": 2.387648105621338, + "409": 2.3750905990600586, + "410": 2.3492965698242188, + "411": 2.3642311096191406, + "412": 2.353775978088379, + "413": 2.347517728805542, + "414": 2.3600897789001465, + "415": 2.359027862548828, + "416": 2.3703038692474365, + "417": 2.368494987487793, + "418": 2.337583541870117, + "419": 2.3678970336914062, + "420": 2.3678038120269775, + "421": 2.342033863067627, + "422": 2.3472423553466797, + "423": 2.3356282711029053, + "424": 2.3358168601989746, + "425": 2.347956657409668, + "426": 2.3190560340881348, + "427": 2.3381059169769287, + "428": 2.327958583831787, + "429": 2.365657329559326, + "430": 2.3531432151794434, + "431": 2.365602493286133 + }, + "lr": { + "378": 0.1, + "379": 0.1, + "380": 0.1, + "381": 0.1, + "382": 0.1, + "383": 0.1, + "384": 0.1, + "385": 0.1, + "386": 0.1, + "387": 0.1, + "388": 0.1, + "389": 0.1, + "390": 0.1, + "391": 0.1, + "392": 0.1, + "393": 0.1, + "394": 0.1, + "395": 0.1, + "396": 0.1, + "397": 0.1, + "398": 0.1, + "399": 0.1, + "400": 0.1, + "401": 0.1, + "402": 0.1, + "403": 0.1, + "404": 0.1, + "405": 0.1, + "406": 0.1, + "407": 0.1, + "408": 0.1, + "409": 0.1, + "410": 0.1, + "411": 0.1, + "412": 0.1, + "413": 0.1, + "414": 0.1, + "415": 0.1, + "416": 0.1, + "417": 0.1, + "418": 0.1, + "419": 0.1, + "420": 0.1, + "421": 0.1, + "422": 0.1, + "423": 0.1, + "424": 0.1, + "425": 0.1, + "426": 0.1, + "427": 0.1, + "428": 0.1, + "429": 0.1, + "430": 0.1, + "431": 0.1 + } + }, + "step_size_list": [ + 0.0953666, + 0.0964188, + 0.0963683, + 0.0955326, + 0.0938733, + 0.0937737, + 0.0934551, + 0.094343, + 0.0957724, + 0.0963345, + 0.0963085, + 0.0957698, + 0.0954544, + 0.0950104, + 0.0947863, + 0.0952889, + 0.0970977, + 0.0982837, + 0.0980959, + 0.0975396, + 0.095833, + 0.0944664, + 0.0933876, + 0.0948799, + 0.095958, + 0.0956861, + 0.095841, + 0.0957579, + 0.095041, + 0.0931072, + 0.0943579, + 0.0943509, + 0.0945617, + 0.0957163, + 0.096139, + 0.0953478, + 0.095442, + 0.0940363, + 0.0945087, + 0.0952714, + 0.0952237, + 0.0942739, + 0.0942576, + 0.095264, + 0.0964019, + 0.0964189, + 0.0955939, + 0.0954322, + 0.0960892, + 0.0964013, + 0.0952266, + 0.0946644, + 0.0941142, + 0.0945712 + ], + "train_epoch_time": 4.840502023696899, + "train_loss": 2.3419843219445116, + "train_score": 0.3116133877673484, + "val_loss": 2.4106282238024113, + "val_score": 0.29856127986404296 + }, + { + "epoch": 8, + "grad_norm": 1.6200577020645142, + "learning_rate": 0.1, + "model_norm": 87.547607421875, + "step_logs": { + "grad_norm": { + "432": 1.634557843208313, + "433": 1.5858564376831055, + "434": 1.498273491859436, + "435": 1.468005657196045, + "436": 1.3487775325775146, + "437": 1.3570528030395508, + "438": 1.4803147315979004, + "439": 1.5396385192871094, + "440": 1.586411714553833, + "441": 1.6063412427902222, + "442": 1.609969139099121, + "443": 1.8239301443099976, + "444": 1.7798283100128174, + "445": 1.507086992263794, + "446": 1.2801793813705444, + "447": 1.3306251764297485, + "448": 1.4774353504180908, + "449": 1.4359245300292969, + "450": 1.3928784132003784, + "451": 1.375860333442688, + "452": 1.405258297920227, + "453": 1.4549062252044678, + "454": 1.5258451700210571, + "455": 1.6319407224655151, + "456": 1.6003764867782593, + "457": 1.464892864227295, + "458": 1.3905911445617676, + "459": 1.3548095226287842, + "460": 1.6180012226104736, + "461": 1.834257960319519, + "462": 1.824002981185913, + "463": 1.7079906463623047, + "464": 1.428998351097107, + "465": 1.329432725906372, + "466": 1.1733323335647583, + "467": 1.2698811292648315, + "468": 1.3429077863693237, + "469": 1.3825491666793823, + "470": 1.3863537311553955, + "471": 1.4176111221313477, + "472": 1.4104974269866943, + "473": 1.4849673509597778, + "474": 1.586929202079773, + "475": 1.4831091165542603, + "476": 1.286625862121582, + "477": 1.276072382926941, + "478": 1.2926793098449707, + "479": 1.3325908184051514, + "480": 1.3457083702087402, + "481": 1.3737918138504028, + "482": 1.4687963724136353, + "483": 1.6570770740509033, + "484": 1.761545181274414, + "485": 1.6200577020645142 + }, + "loss": { + "432": 2.3617753982543945, + "433": 2.34804105758667, + "434": 2.3420891761779785, + "435": 2.359283924102783, + "436": 2.334181785583496, + "437": 2.342970848083496, + "438": 2.295233726501465, + "439": 2.3428447246551514, + "440": 2.3305728435516357, + "441": 2.3393940925598145, + "442": 2.3540444374084473, + "443": 2.349869728088379, + "444": 2.3877902030944824, + "445": 2.346569538116455, + "446": 2.3178114891052246, + "447": 2.326875925064087, + "448": 2.3456501960754395, + "449": 2.344538688659668, + "450": 2.3177645206451416, + "451": 2.318577289581299, + "452": 2.3197152614593506, + "453": 2.3167388439178467, + "454": 2.3275227546691895, + "455": 2.3517160415649414, + "456": 2.3250317573547363, + "457": 2.3436105251312256, + "458": 2.307124614715576, + "459": 2.3200998306274414, + "460": 2.305172920227051, + "461": 2.372474193572998, + "462": 2.34440541267395, + "463": 2.350816249847412, + "464": 2.313886880874634, + "465": 2.3120365142822266, + "466": 2.285431385040283, + "467": 2.3201370239257812, + "468": 2.321953296661377, + "469": 2.3189384937286377, + "470": 2.337470054626465, + "471": 2.3227221965789795, + "472": 2.3120028972625732, + "473": 2.322312116622925, + "474": 2.3122975826263428, + "475": 2.3390138149261475, + "476": 2.2942404747009277, + "477": 2.2792513370513916, + "478": 2.3139185905456543, + "479": 2.31300950050354, + "480": 2.3121891021728516, + "481": 2.2937021255493164, + "482": 2.319324493408203, + "483": 2.3062589168548584, + "484": 2.3141398429870605, + "485": 2.3350830078125 + }, + "lr": { + "432": 0.1, + "433": 0.1, + "434": 0.1, + "435": 0.1, + "436": 0.1, + "437": 0.1, + "438": 0.1, + "439": 0.1, + "440": 0.1, + "441": 0.1, + "442": 0.1, + "443": 0.1, + "444": 0.1, + "445": 0.1, + "446": 0.1, + "447": 0.1, + "448": 0.1, + "449": 0.1, + "450": 0.1, + "451": 0.1, + "452": 0.1, + "453": 0.1, + "454": 0.1, + "455": 0.1, + "456": 0.1, + "457": 0.1, + "458": 0.1, + "459": 0.1, + "460": 0.1, + "461": 0.1, + "462": 0.1, + "463": 0.1, + "464": 0.1, + "465": 0.1, + "466": 0.1, + "467": 0.1, + "468": 0.1, + "469": 0.1, + "470": 0.1, + "471": 0.1, + "472": 0.1, + "473": 0.1, + "474": 0.1, + "475": 0.1, + "476": 0.1, + "477": 0.1, + "478": 0.1, + "479": 0.1, + "480": 0.1, + "481": 0.1, + "482": 0.1, + "483": 0.1, + "484": 0.1, + "485": 0.1 + } + }, + "step_size_list": [ + 0.0946465, + 0.0949168, + 0.0954268, + 0.0956323, + 0.0962493, + 0.0962186, + 0.0954438, + 0.0951846, + 0.0948773, + 0.0947733, + 0.0947819, + 0.0933894, + 0.0937793, + 0.0953838, + 0.0965854, + 0.0963349, + 0.095554, + 0.095788, + 0.0959828, + 0.0960779, + 0.0959173, + 0.0956312, + 0.0952368, + 0.0946411, + 0.0947796, + 0.0956222, + 0.0959778, + 0.0961949, + 0.0946267, + 0.0933788, + 0.0933745, + 0.0941578, + 0.0957739, + 0.0963186, + 0.0970761, + 0.0966415, + 0.0962618, + 0.0960418, + 0.0960511, + 0.0958534, + 0.0958749, + 0.0954675, + 0.0948357, + 0.0955092, + 0.0965179, + 0.0965511, + 0.096515, + 0.0963032, + 0.0962315, + 0.0960485, + 0.0955558, + 0.0943813, + 0.0937167, + 0.0946791 + ], + "train_epoch_time": 4.84143590927124, + "train_loss": 2.3093996868927134, + "train_score": 0.3223569763784258, + "val_loss": 2.369595475092821, + "val_score": 0.3103069389318079 + }, + { + "epoch": 9, + "grad_norm": 1.0912246704101562, + "learning_rate": 0.1, + "model_norm": 87.5638198852539, + "step_logs": { + "grad_norm": { + "486": 1.5025008916854858, + "487": 1.5095657110214233, + "488": 1.592063069343567, + "489": 1.7205984592437744, + "490": 1.773987054824829, + "491": 1.6198124885559082, + "492": 1.5632734298706055, + "493": 1.4860070943832397, + "494": 1.2874897718429565, + "495": 1.2284979820251465, + "496": 1.2067540884017944, + "497": 1.3678255081176758, + "498": 1.597486972808838, + "499": 1.5526199340820312, + "500": 1.5520646572113037, + "501": 1.4706977605819702, + "502": 1.467108964920044, + "503": 1.6719712018966675, + "504": 1.5829285383224487, + "505": 1.4955989122390747, + "506": 1.5018434524536133, + "507": 1.4837757349014282, + "508": 1.3472092151641846, + "509": 1.361122488975525, + "510": 1.3829684257507324, + "511": 1.4686342477798462, + "512": 1.5566428899765015, + "513": 1.5263723134994507, + "514": 1.7206555604934692, + "515": 1.8564311265945435, + "516": 1.7699187994003296, + "517": 1.5642356872558594, + "518": 1.6314109563827515, + "519": 1.4676313400268555, + "520": 1.3388612270355225, + "521": 1.3338922262191772, + "522": 1.3107889890670776, + "523": 1.3294310569763184, + "524": 1.4511823654174805, + "525": 1.467866063117981, + "526": 1.6884526014328003, + "527": 1.5701409578323364, + "528": 1.762635350227356, + "529": 1.6352455615997314, + "530": 1.7891312837600708, + "531": 1.690053105354309, + "532": 1.4078140258789062, + "533": 1.2745277881622314, + "534": 1.269148349761963, + "535": 1.3134315013885498, + "536": 1.3920936584472656, + "537": 1.3074783086776733, + "538": 1.129028081893921, + "539": 1.0912246704101562 + }, + "loss": { + "486": 2.3220713138580322, + "487": 2.330090045928955, + "488": 2.311925172805786, + "489": 2.3156092166900635, + "490": 2.306424379348755, + "491": 2.332205295562744, + "492": 2.317397117614746, + "493": 2.331104278564453, + "494": 2.2727203369140625, + "495": 2.284027099609375, + "496": 2.294236183166504, + "497": 2.2855887413024902, + "498": 2.3141837120056152, + "499": 2.3078932762145996, + "500": 2.3057961463928223, + "501": 2.317169666290283, + "502": 2.289058208465576, + "503": 2.305058240890503, + "504": 2.2939486503601074, + "505": 2.3050730228424072, + "506": 2.30854868888855, + "507": 2.2708868980407715, + "508": 2.3110127449035645, + "509": 2.302642345428467, + "510": 2.2753477096557617, + "511": 2.3006060123443604, + "512": 2.311389923095703, + "513": 2.308623790740967, + "514": 2.305128574371338, + "515": 2.311640739440918, + "516": 2.3273863792419434, + "517": 2.3004872798919678, + "518": 2.325859308242798, + "519": 2.3006064891815186, + "520": 2.276432991027832, + "521": 2.3090970516204834, + "522": 2.2938272953033447, + "523": 2.3008387088775635, + "524": 2.3139688968658447, + "525": 2.293001174926758, + "526": 2.3016910552978516, + "527": 2.290680408477783, + "528": 2.2834925651550293, + "529": 2.2939915657043457, + "530": 2.3023834228515625, + "531": 2.298981189727783, + "532": 2.291498899459839, + "533": 2.288029670715332, + "534": 2.285649061203003, + "535": 2.2905266284942627, + "536": 2.293748140335083, + "537": 2.273296356201172, + "538": 2.2550418376922607, + "539": 2.2850148677825928 + }, + "lr": { + "486": 0.1, + "487": 0.1, + "488": 0.1, + "489": 0.1, + "490": 0.1, + "491": 0.1, + "492": 0.1, + "493": 0.1, + "494": 0.1, + "495": 0.1, + "496": 0.1, + "497": 0.1, + "498": 0.1, + "499": 0.1, + "500": 0.1, + "501": 0.1, + "502": 0.1, + "503": 0.1, + "504": 0.1, + "505": 0.1, + "506": 0.1, + "507": 0.1, + "508": 0.1, + "509": 0.1, + "510": 0.1, + "511": 0.1, + "512": 0.1, + "513": 0.1, + "514": 0.1, + "515": 0.1, + "516": 0.1, + "517": 0.1, + "518": 0.1, + "519": 0.1, + "520": 0.1, + "521": 0.1, + "522": 0.1, + "523": 0.1, + "524": 0.1, + "525": 0.1, + "526": 0.1, + "527": 0.1, + "528": 0.1, + "529": 0.1, + "530": 0.1, + "531": 0.1, + "532": 0.1, + "533": 0.1, + "534": 0.1, + "535": 0.1, + "536": 0.1, + "537": 0.1, + "538": 0.1, + "539": 0.1 + } + }, + "step_size_list": [ + 0.0953644, + 0.095338, + 0.0948032, + 0.0939917, + 0.0936134, + 0.0946744, + 0.0949913, + 0.0954778, + 0.0964815, + 0.0968018, + 0.0969239, + 0.096068, + 0.0947744, + 0.0950366, + 0.0950357, + 0.0955409, + 0.0955096, + 0.0942829, + 0.0948214, + 0.0953726, + 0.0953424, + 0.0953767, + 0.0962216, + 0.0961327, + 0.0959666, + 0.0955223, + 0.0950193, + 0.0951965, + 0.0939656, + 0.0930628, + 0.0936945, + 0.0949505, + 0.0945881, + 0.0955281, + 0.096212, + 0.0962902, + 0.09639, + 0.0963013, + 0.0956476, + 0.0955126, + 0.0941682, + 0.0948935, + 0.0936304, + 0.0944927, + 0.0935003, + 0.0941513, + 0.0958547, + 0.0965719, + 0.0965963, + 0.0963709, + 0.0959469, + 0.0963763, + 0.0972513, + 0.0974606 + ], + "train_epoch_time": 4.840567588806152, + "train_loss": 2.2588146516890233, + "train_score": 0.32843772405531346, + "val_loss": 2.3237856173487947, + "val_score": 0.30992573229371473 + }, + { + "epoch": 10, + "grad_norm": 1.4195648431777954, + "learning_rate": 0.1, + "model_norm": 87.58116912841797, + "step_logs": { + "grad_norm": { + "540": 1.100197672843933, + "541": 1.1823339462280273, + "542": 1.2725900411605835, + "543": 1.331581950187683, + "544": 1.5299253463745117, + "545": 1.8025225400924683, + "546": 1.7377359867095947, + "547": 1.8973256349563599, + "548": 1.8250856399536133, + "549": 1.4262754917144775, + "550": 1.443953037261963, + "551": 1.2793861627578735, + "552": 1.1450295448303223, + "553": 1.0606615543365479, + "554": 1.1801726818084717, + "555": 1.3393418788909912, + "556": 1.5357460975646973, + "557": 1.671959638595581, + "558": 1.5620275735855103, + "559": 1.5698570013046265, + "560": 1.47999107837677, + "561": 1.5179015398025513, + "562": 1.4943252801895142, + "563": 1.5487949848175049, + "564": 1.7194210290908813, + "565": 1.6637580394744873, + "566": 1.537824273109436, + "567": 1.4476512670516968, + "568": 1.4622572660446167, + "569": 1.6627964973449707, + "570": 1.7058537006378174, + "571": 1.620262622833252, + "572": 1.5235130786895752, + "573": 1.474424123764038, + "574": 1.5082924365997314, + "575": 1.4345836639404297, + "576": 1.3322948217391968, + "577": 1.2427113056182861, + "578": 1.255328893661499, + "579": 1.3651137351989746, + "580": 1.4349088668823242, + "581": 1.398942470550537, + "582": 1.4133671522140503, + "583": 1.484761118888855, + "584": 1.5328387022018433, + "585": 1.5369879007339478, + "586": 1.5777214765548706, + "587": 1.6712647676467896, + "588": 1.747390627861023, + "589": 1.7286624908447266, + "590": 1.9535447359085083, + "591": 1.67344069480896, + "592": 1.2704472541809082, + "593": 1.4195648431777954 + }, + "loss": { + "540": 2.2657480239868164, + "541": 2.260096549987793, + "542": 2.2562482357025146, + "543": 2.2681894302368164, + "544": 2.2761921882629395, + "545": 2.306246280670166, + "546": 2.3245182037353516, + "547": 2.2958405017852783, + "548": 2.304215431213379, + "549": 2.2735280990600586, + "550": 2.2743730545043945, + "551": 2.2598328590393066, + "552": 2.2704267501831055, + "553": 2.247743606567383, + "554": 2.2535901069641113, + "555": 2.2711734771728516, + "556": 2.2813498973846436, + "557": 2.2679498195648193, + "558": 2.259808301925659, + "559": 2.277531147003174, + "560": 2.272543430328369, + "561": 2.2580676078796387, + "562": 2.2870192527770996, + "563": 2.2986292839050293, + "564": 2.279205799102783, + "565": 2.2739334106445312, + "566": 2.2868971824645996, + "567": 2.2592694759368896, + "568": 2.254410743713379, + "569": 2.283161163330078, + "570": 2.304507255554199, + "571": 2.2747268676757812, + "572": 2.2719836235046387, + "573": 2.240689754486084, + "574": 2.250385284423828, + "575": 2.2764248847961426, + "576": 2.22999906539917, + "577": 2.2403783798217773, + "578": 2.2409114837646484, + "579": 2.246426582336426, + "580": 2.2490358352661133, + "581": 2.24831485748291, + "582": 2.22908353805542, + "583": 2.2384352684020996, + "584": 2.262144088745117, + "585": 2.2450196743011475, + "586": 2.2659473419189453, + "587": 2.293720245361328, + "588": 2.271733522415161, + "589": 2.2749931812286377, + "590": 2.272538185119629, + "591": 2.2866933345794678, + "592": 2.227475643157959, + "593": 2.2312941551208496 + }, + "lr": { + "540": 0.1, + "541": 0.1, + "542": 0.1, + "543": 0.1, + "544": 0.1, + "545": 0.1, + "546": 0.1, + "547": 0.1, + "548": 0.1, + "549": 0.1, + "550": 0.1, + "551": 0.1, + "552": 0.1, + "553": 0.1, + "554": 0.1, + "555": 0.1, + "556": 0.1, + "557": 0.1, + "558": 0.1, + "559": 0.1, + "560": 0.1, + "561": 0.1, + "562": 0.1, + "563": 0.1, + "564": 0.1, + "565": 0.1, + "566": 0.1, + "567": 0.1, + "568": 0.1, + "569": 0.1, + "570": 0.1, + "571": 0.1, + "572": 0.1, + "573": 0.1, + "574": 0.1, + "575": 0.1, + "576": 0.1, + "577": 0.1, + "578": 0.1, + "579": 0.1, + "580": 0.1, + "581": 0.1, + "582": 0.1, + "583": 0.1, + "584": 0.1, + "585": 0.1, + "586": 0.1, + "587": 0.1, + "588": 0.1, + "589": 0.1, + "590": 0.1, + "591": 0.1, + "592": 0.1, + "593": 0.1 + } + }, + "step_size_list": [ + 0.0973983, + 0.0970002, + 0.0965355, + 0.0962384, + 0.0951098, + 0.0934194, + 0.0939008, + 0.09273, + 0.0932593, + 0.0957178, + 0.0956172, + 0.096505, + 0.0971937, + 0.0975586, + 0.0970024, + 0.0962009, + 0.0950849, + 0.0941948, + 0.094878, + 0.0948673, + 0.0954024, + 0.0951459, + 0.0953453, + 0.0950409, + 0.0939094, + 0.0942626, + 0.0950837, + 0.0955676, + 0.0954725, + 0.0942907, + 0.0940614, + 0.0945443, + 0.0951402, + 0.0953734, + 0.0951886, + 0.0956752, + 0.0961725, + 0.0966682, + 0.0966033, + 0.0960174, + 0.0956229, + 0.0958293, + 0.0957114, + 0.0953069, + 0.0950631, + 0.0950017, + 0.0947933, + 0.0942608, + 0.0937028, + 0.0938371, + 0.0922538, + 0.09423, + 0.0965037, + 0.0956794 + ], + "train_epoch_time": 4.840311050415039, + "train_loss": 2.2552605436043214, + "train_score": 0.337925932508288, + "val_loss": 2.31916275254347, + "val_score": 0.31855446384229563 + }, + { + "epoch": 11, + "grad_norm": 1.5625512599945068, + "learning_rate": 0.1, + "model_norm": 87.59830474853516, + "step_logs": { + "grad_norm": { + "594": 1.6534924507141113, + "595": 1.6314411163330078, + "596": 1.4768848419189453, + "597": 1.3829001188278198, + "598": 1.5773581266403198, + "599": 1.7535510063171387, + "600": 1.5041229724884033, + "601": 1.2253459692001343, + "602": 1.2250187397003174, + "603": 1.1967824697494507, + "604": 1.0975279808044434, + "605": 1.055688738822937, + "606": 1.156018853187561, + "607": 1.3279361724853516, + "608": 1.4560978412628174, + "609": 1.496138095855713, + "610": 1.680279016494751, + "611": 1.7630523443222046, + "612": 1.6128991842269897, + "613": 1.5570846796035767, + "614": 1.6538764238357544, + "615": 1.7344026565551758, + "616": 1.7097742557525635, + "617": 1.745121717453003, + "618": 1.4628164768218994, + "619": 1.534752607345581, + "620": 1.5265697240829468, + "621": 1.495509147644043, + "622": 1.4786078929901123, + "623": 1.4330041408538818, + "624": 1.4551668167114258, + "625": 1.5479933023452759, + "626": 1.3500335216522217, + "627": 1.3044919967651367, + "628": 1.5800038576126099, + "629": 1.800116777420044, + "630": 1.5918378829956055, + "631": 1.3650813102722168, + "632": 1.2443476915359497, + "633": 1.3246335983276367, + "634": 1.3975545167922974, + "635": 1.4696154594421387, + "636": 1.3224786520004272, + "637": 1.3797218799591064, + "638": 1.4980833530426025, + "639": 1.4243566989898682, + "640": 1.2857396602630615, + "641": 1.2422525882720947, + "642": 1.2802238464355469, + "643": 1.255600929260254, + "644": 1.2912657260894775, + "645": 1.3614187240600586, + "646": 1.4797443151474, + "647": 1.5625512599945068 + }, + "loss": { + "594": 2.256520986557007, + "595": 2.2483062744140625, + "596": 2.2642526626586914, + "597": 2.229402542114258, + "598": 2.2512664794921875, + "599": 2.227321147918701, + "600": 2.256075859069824, + "601": 2.2356483936309814, + "602": 2.2189698219299316, + "603": 2.227083683013916, + "604": 2.2117767333984375, + "605": 2.224368095397949, + "606": 2.2249794006347656, + "607": 2.2397990226745605, + "608": 2.237287998199463, + "609": 2.2222719192504883, + "610": 2.223987102508545, + "611": 2.274860382080078, + "612": 2.2311863899230957, + "613": 2.2601726055145264, + "614": 2.2343757152557373, + "615": 2.2645065784454346, + "616": 2.2727839946746826, + "617": 2.2672462463378906, + "618": 2.244750499725342, + "619": 2.236546039581299, + "620": 2.237711191177368, + "621": 2.2292706966400146, + "622": 2.2331385612487793, + "623": 2.216628074645996, + "624": 2.249546527862549, + "625": 2.2301745414733887, + "626": 2.2357091903686523, + "627": 2.1932413578033447, + "628": 2.230203151702881, + "629": 2.2156214714050293, + "630": 2.2423171997070312, + "631": 2.205599069595337, + "632": 2.1958603858947754, + "633": 2.2244582176208496, + "634": 2.226994752883911, + "635": 2.2127346992492676, + "636": 2.2161855697631836, + "637": 2.225216865539551, + "638": 2.2458338737487793, + "639": 2.208857536315918, + "640": 2.2217001914978027, + "641": 2.1997947692871094, + "642": 2.206165313720703, + "643": 2.2320661544799805, + "644": 2.2041714191436768, + "645": 2.232840061187744, + "646": 2.2346062660217285, + "647": 2.2509753704071045 + }, + "lr": { + "594": 0.1, + "595": 0.1, + "596": 0.1, + "597": 0.1, + "598": 0.1, + "599": 0.1, + "600": 0.1, + "601": 0.1, + "602": 0.1, + "603": 0.1, + "604": 0.1, + "605": 0.1, + "606": 0.1, + "607": 0.1, + "608": 0.1, + "609": 0.1, + "610": 0.1, + "611": 0.1, + "612": 0.1, + "613": 0.1, + "614": 0.1, + "615": 0.1, + "616": 0.1, + "617": 0.1, + "618": 0.1, + "619": 0.1, + "620": 0.1, + "621": 0.1, + "622": 0.1, + "623": 0.1, + "624": 0.1, + "625": 0.1, + "626": 0.1, + "627": 0.1, + "628": 0.1, + "629": 0.1, + "630": 0.1, + "631": 0.1, + "632": 0.1, + "633": 0.1, + "634": 0.1, + "635": 0.1, + "636": 0.1, + "637": 0.1, + "638": 0.1, + "639": 0.1, + "640": 0.1, + "641": 0.1, + "642": 0.1, + "643": 0.1, + "644": 0.1, + "645": 0.1, + "646": 0.1, + "647": 0.1 + } + }, + "step_size_list": [ + 0.094288, + 0.0944117, + 0.0954048, + 0.0958873, + 0.0947635, + 0.0935429, + 0.0952254, + 0.0967511, + 0.0967292, + 0.0968846, + 0.0973491, + 0.0975561, + 0.0970844, + 0.0962126, + 0.095476, + 0.0952051, + 0.0940314, + 0.0936049, + 0.0944914, + 0.0949095, + 0.0942321, + 0.0937717, + 0.0939574, + 0.0937065, + 0.0954505, + 0.0949976, + 0.0950506, + 0.0952233, + 0.0953333, + 0.095573, + 0.095505, + 0.0949015, + 0.0960836, + 0.0962655, + 0.0946998, + 0.0931856, + 0.0946519, + 0.0959469, + 0.0965943, + 0.0962056, + 0.095799, + 0.0953468, + 0.0962039, + 0.0958981, + 0.0952413, + 0.0956092, + 0.096413, + 0.0966113, + 0.0964185, + 0.0965889, + 0.0963555, + 0.0960149, + 0.0953294, + 0.0948556 + ], + "train_epoch_time": 4.840200424194336, + "train_loss": 2.2180573551008314, + "train_score": 0.3428084647210804, + "val_loss": 2.2931347347701867, + "val_score": 0.3267975034519397 + }, + { + "epoch": 12, + "grad_norm": 0.8762531280517578, + "learning_rate": 0.1, + "model_norm": 87.6137924194336, + "step_logs": { + "grad_norm": { + "648": 1.5413535833358765, + "649": 1.4510596990585327, + "650": 1.323213815689087, + "651": 1.405971884727478, + "652": 1.4707849025726318, + "653": 1.6178920269012451, + "654": 1.691859245300293, + "655": 1.710860013961792, + "656": 1.5524107217788696, + "657": 1.3920801877975464, + "658": 1.4672938585281372, + "659": 1.3626227378845215, + "660": 1.3100682497024536, + "661": 1.188665509223938, + "662": 1.116929292678833, + "663": 1.1059703826904297, + "664": 1.1361353397369385, + "665": 1.1766940355300903, + "666": 1.1772736310958862, + "667": 1.2098441123962402, + "668": 1.2716014385223389, + "669": 1.1813251972198486, + "670": 1.1426494121551514, + "671": 1.1829032897949219, + "672": 1.2042458057403564, + "673": 1.176770806312561, + "674": 1.0168719291687012, + "675": 0.957220733165741, + "676": 0.9578326940536499, + "677": 0.9323024153709412, + "678": 0.9654185175895691, + "679": 0.9408234357833862, + "680": 0.9756593704223633, + "681": 0.9996417760848999, + "682": 1.0176331996917725, + "683": 1.1016645431518555, + "684": 1.1992191076278687, + "685": 1.2792115211486816, + "686": 1.2094345092773438, + "687": 1.0129423141479492, + "688": 0.9981645941734314, + "689": 0.9855990409851074, + "690": 0.9641879200935364, + "691": 0.9150952100753784, + "692": 0.9417808651924133, + "693": 0.8396671414375305, + "694": 0.7987462878227234, + "695": 0.8581586480140686, + "696": 0.8286956548690796, + "697": 0.7874364256858826, + "698": 0.7451260685920715, + "699": 0.7946550846099854, + "700": 0.8350131511688232, + "701": 0.8762531280517578 + }, + "loss": { + "648": 2.2028181552886963, + "649": 2.228684186935425, + "650": 2.2092976570129395, + "651": 2.204345941543579, + "652": 2.230433940887451, + "653": 2.217052698135376, + "654": 2.21187162399292, + "655": 2.2144064903259277, + "656": 2.2300021648406982, + "657": 2.206143379211426, + "658": 2.2129125595092773, + "659": 2.190964698791504, + "660": 2.1846811771392822, + "661": 2.2048850059509277, + "662": 2.2016615867614746, + "663": 2.19024395942688, + "664": 2.187074661254883, + "665": 2.196682929992676, + "666": 2.1928462982177734, + "667": 2.1666507720947266, + "668": 2.173252820968628, + "669": 2.200779914855957, + "670": 2.180562973022461, + "671": 2.171079635620117, + "672": 2.16996431350708, + "673": 2.2069549560546875, + "674": 2.1732025146484375, + "675": 2.162417411804199, + "676": 2.1946511268615723, + "677": 2.1678366661071777, + "678": 2.1641478538513184, + "679": 2.183716297149658, + "680": 2.1696090698242188, + "681": 2.1490530967712402, + "682": 2.175893783569336, + "683": 2.182178497314453, + "684": 2.1715240478515625, + "685": 2.178544044494629, + "686": 2.1701955795288086, + "687": 2.1578786373138428, + "688": 2.1630373001098633, + "689": 2.1605517864227295, + "690": 2.1448769569396973, + "691": 2.164898157119751, + "692": 2.1333160400390625, + "693": 2.130385398864746, + "694": 2.1412806510925293, + "695": 2.1399881839752197, + "696": 2.1563222408294678, + "697": 2.1380600929260254, + "698": 2.1288375854492188, + "699": 2.135341167449951, + "700": 2.149369716644287, + "701": 2.149366855621338 + }, + "lr": { + "648": 0.1, + "649": 0.09938271604938272, + "650": 0.09876543209876543, + "651": 0.09814814814814815, + "652": 0.09753086419753088, + "653": 0.09691358024691359, + "654": 0.09629629629629631, + "655": 0.09567901234567902, + "656": 0.09506172839506173, + "657": 0.09444444444444444, + "658": 0.09382716049382717, + "659": 0.09320987654320989, + "660": 0.0925925925925926, + "661": 0.09197530864197531, + "662": 0.09135802469135804, + "663": 0.09074074074074075, + "664": 0.09012345679012346, + "665": 0.08950617283950618, + "666": 0.08888888888888889, + "667": 0.08827160493827162, + "668": 0.08765432098765433, + "669": 0.08703703703703704, + "670": 0.08641975308641976, + "671": 0.08580246913580247, + "672": 0.0851851851851852, + "673": 0.08456790123456791, + "674": 0.08395061728395062, + "675": 0.08333333333333334, + "676": 0.08271604938271605, + "677": 0.08209876543209876, + "678": 0.08148148148148149, + "679": 0.08086419753086421, + "680": 0.08024691358024692, + "681": 0.07962962962962963, + "682": 0.07901234567901234, + "683": 0.07839506172839507, + "684": 0.07777777777777778, + "685": 0.0771604938271605, + "686": 0.07654320987654323, + "687": 0.07592592592592594, + "688": 0.07530864197530865, + "689": 0.07469135802469136, + "690": 0.07407407407407407, + "691": 0.07345679012345678, + "692": 0.0728395061728395, + "693": 0.07222222222222223, + "694": 0.07160493827160495, + "695": 0.07098765432098766, + "696": 0.07037037037037037, + "697": 0.06975308641975309, + "698": 0.0691358024691358, + "699": 0.06851851851851852, + "700": 0.06790123456790124, + "701": 0.06728395061728396 + } + }, + "step_size_list": [ + 0.0948833, + 0.0949263, + 0.0950457, + 0.094011, + 0.0931264, + 0.0916691, + 0.0906481, + 0.0899886, + 0.0904173, + 0.0906829, + 0.0897316, + 0.0896684, + 0.0893432, + 0.0893424, + 0.089053, + 0.0884984, + 0.0877887, + 0.0870506, + 0.0864602, + 0.0857158, + 0.0848863, + 0.0846997, + 0.0842402, + 0.0834939, + 0.0828275, + 0.0823822, + 0.0823068, + 0.0818876, + 0.0813103, + 0.0807694, + 0.0800765, + 0.0795603, + 0.0788587, + 0.0781822, + 0.0775542, + 0.0767225, + 0.0758249, + 0.0749874, + 0.0746184, + 0.0745797, + 0.0740247, + 0.0734579, + 0.0729037, + 0.0724278, + 0.071753, + 0.0713693, + 0.0708492, + 0.070131, + 0.0695906, + 0.0690546, + 0.0685181, + 0.0678313, + 0.0671616, + 0.0664849 + ], + "train_epoch_time": 4.840845823287964, + "train_loss": 2.1405552824395286, + "train_score": 0.3687858680484969, + "val_loss": 2.2164132720977925, + "val_score": 0.3456560345591141 + }, + { + "epoch": 13, + "grad_norm": 0.7531309723854065, + "learning_rate": 0.06666666666666668, + "model_norm": 87.62348175048828, + "step_logs": { + "grad_norm": { + "702": 0.8920872211456299, + "703": 0.9194406867027283, + "704": 0.988314688205719, + "705": 1.1157653331756592, + "706": 1.0968096256256104, + "707": 1.1118769645690918, + "708": 1.1954256296157837, + "709": 1.3032866716384888, + "710": 1.1688765287399292, + "711": 0.9599566459655762, + "712": 1.010603666305542, + "713": 1.0050030946731567, + "714": 0.9598475098609924, + "715": 1.0469393730163574, + "716": 1.0152608156204224, + "717": 0.9659439921379089, + "718": 0.8634149432182312, + "719": 0.8878005743026733, + "720": 0.896177351474762, + "721": 0.8501298427581787, + "722": 0.8294501304626465, + "723": 0.7876549363136292, + "724": 0.7377925515174866, + "725": 0.7456555366516113, + "726": 0.7465358972549438, + "727": 0.7019779086112976, + "728": 0.7784258127212524, + "729": 0.8708042502403259, + "730": 0.8745377659797668, + "731": 0.8901929259300232, + "732": 0.8764516115188599, + "733": 0.8969185948371887, + "734": 0.8551320433616638, + "735": 0.7365669012069702, + "736": 0.7328318357467651, + "737": 0.8150904774665833, + "738": 0.8248913288116455, + "739": 0.6946381330490112, + "740": 0.708453357219696, + "741": 0.7869342565536499, + "742": 0.7736213207244873, + "743": 0.8139951825141907, + "744": 0.796597957611084, + "745": 0.7518201470375061, + "746": 0.7371087670326233, + "747": 0.6931148767471313, + "748": 0.7689252495765686, + "749": 0.7809926271438599, + "750": 0.7097293734550476, + "751": 0.7736760377883911, + "752": 0.7193594574928284, + "753": 0.706220269203186, + "754": 0.6613119840621948, + "755": 0.7531309723854065 + }, + "loss": { + "702": 2.1390342712402344, + "703": 2.134037494659424, + "704": 2.140979290008545, + "705": 2.1441988945007324, + "706": 2.1380772590637207, + "707": 2.1422739028930664, + "708": 2.142864942550659, + "709": 2.1670923233032227, + "710": 2.1580002307891846, + "711": 2.1448566913604736, + "712": 2.1190733909606934, + "713": 2.14148211479187, + "714": 2.1272506713867188, + "715": 2.1676583290100098, + "716": 2.123312473297119, + "717": 2.1017513275146484, + "718": 2.1215970516204834, + "719": 2.1481404304504395, + "720": 2.110671043395996, + "721": 2.116016387939453, + "722": 2.13571834564209, + "723": 2.1065866947174072, + "724": 2.1273536682128906, + "725": 2.1519713401794434, + "726": 2.113983392715454, + "727": 2.1189074516296387, + "728": 2.1163251399993896, + "729": 2.1233696937561035, + "730": 2.1128978729248047, + "731": 2.1425318717956543, + "732": 2.1122403144836426, + "733": 2.127007007598877, + "734": 2.115316390991211, + "735": 2.142245292663574, + "736": 2.103114604949951, + "737": 2.1076889038085938, + "738": 2.12778377532959, + "739": 2.1301040649414062, + "740": 2.1092724800109863, + "741": 2.151620626449585, + "742": 2.104706287384033, + "743": 2.105891227722168, + "744": 2.1168055534362793, + "745": 2.098604679107666, + "746": 2.1319808959960938, + "747": 2.116122245788574, + "748": 2.103595733642578, + "749": 2.0936508178710938, + "750": 2.124605655670166, + "751": 2.097956657409668, + "752": 2.1295769214630127, + "753": 2.121328353881836, + "754": 2.1142678260803223, + "755": 2.1180806159973145 + }, + "lr": { + "702": 0.06666666666666668, + "703": 0.06604938271604939, + "704": 0.0654320987654321, + "705": 0.06481481481481481, + "706": 0.06419753086419754, + "707": 0.06358024691358025, + "708": 0.06296296296296297, + "709": 0.06234567901234568, + "710": 0.061728395061728406, + "711": 0.061111111111111116, + "712": 0.060493827160493834, + "713": 0.059876543209876544, + "714": 0.05925925925925926, + "715": 0.05864197530864197, + "716": 0.058024691358024696, + "717": 0.05740740740740741, + "718": 0.05679012345679013, + "719": 0.05617283950617285, + "720": 0.05555555555555556, + "721": 0.05493827160493828, + "722": 0.05432098765432099, + "723": 0.0537037037037037, + "724": 0.05308641975308642, + "725": 0.05246913580246914, + "726": 0.051851851851851864, + "727": 0.051234567901234575, + "728": 0.05061728395061729, + "729": 0.05, + "730": 0.04938271604938271, + "731": 0.04876543209876543, + "732": 0.048148148148148155, + "733": 0.047530864197530866, + "734": 0.04691358024691358, + "735": 0.046296296296296294, + "736": 0.04567901234567902, + "737": 0.04506172839506173, + "738": 0.044444444444444446, + "739": 0.04382716049382716, + "740": 0.04320987654320988, + "741": 0.0425925925925926, + "742": 0.04197530864197531, + "743": 0.04135802469135802, + "744": 0.040740740740740744, + "745": 0.04012345679012346, + "746": 0.03950617283950617, + "747": 0.03888888888888889, + "748": 0.038271604938271614, + "749": 0.037654320987654324, + "750": 0.037037037037037035, + "751": 0.03641975308641975, + "752": 0.03580246913580248, + "753": 0.03518518518518519, + "754": 0.0345679012345679, + "755": 0.033950617283950615 + } + }, + "step_size_list": [ + 0.06585, + 0.0651965, + 0.0644698, + 0.0636178, + 0.0630587, + 0.0624348, + 0.0616683, + 0.0608587, + 0.0605453, + 0.0603192, + 0.0596246, + 0.0590428, + 0.0585084, + 0.0577852, + 0.0572188, + 0.0566851, + 0.0562291, + 0.0555999, + 0.0549745, + 0.0544276, + 0.0538498, + 0.0532823, + 0.0527283, + 0.0521159, + 0.0514998, + 0.0509311, + 0.0502531, + 0.0495575, + 0.0489453, + 0.0483296, + 0.0477303, + 0.0471074, + 0.0465362, + 0.0460265, + 0.0454141, + 0.044744, + 0.0441308, + 0.0436107, + 0.0429889, + 0.0423331, + 0.0417263, + 0.0410907, + 0.0404935, + 0.0399078, + 0.0393083, + 0.038718, + 0.0380669, + 0.0374489, + 0.0368751, + 0.0362315, + 0.0356474, + 0.0350403, + 0.0344448, + 0.033797 + ], + "train_epoch_time": 4.841166734695435, + "train_loss": 2.105520798315106, + "train_score": 0.37542593249973644, + "val_loss": 2.187971991868572, + "val_score": 0.35329362842437184 + }, + { + "epoch": 14, + "grad_norm": 0.6560465693473816, + "learning_rate": 0.03333333333333334, + "model_norm": 87.62673950195312, + "step_logs": { + "grad_norm": { + "756": 0.6808722019195557, + "757": 0.7502784729003906, + "758": 0.7747045159339905, + "759": 0.7402014136314392, + "760": 0.7361641526222229, + "761": 0.7099589705467224, + "762": 0.6706918478012085, + "763": 0.6561278104782104, + "764": 0.7447983026504517, + "765": 0.6444520950317383, + "766": 0.6793810725212097, + "767": 0.6898874044418335, + "768": 0.7020453810691833, + "769": 0.7227084636688232, + "770": 0.7597751617431641, + "771": 0.656038224697113, + "772": 0.6833887100219727, + "773": 0.7233046889305115, + "774": 0.7019231915473938, + "775": 0.7187379598617554, + "776": 0.6745233535766602, + "777": 0.6481652855873108, + "778": 0.6588069796562195, + "779": 0.7660980820655823, + "780": 0.6767035126686096, + "781": 0.7113878130912781, + "782": 0.6418550610542297, + "783": 0.6614627242088318, + "784": 0.6701388359069824, + "785": 0.7062239050865173, + "786": 0.7151431441307068, + "787": 0.6770057678222656, + "788": 0.6858407258987427, + "789": 0.6519733667373657, + "790": 0.6951921582221985, + "791": 0.6831929087638855, + "792": 0.5835621356964111, + "793": 0.6608464121818542, + "794": 0.6295688152313232, + "795": 0.602670431137085, + "796": 0.6260209679603577, + "797": 0.6543610692024231, + "798": 0.6884217262268066, + "799": 0.6450269818305969, + "800": 0.6900261640548706, + "801": 0.6984003186225891, + "802": 0.6354591846466064, + "803": 0.6883190274238586, + "804": 0.6483860611915588, + "805": 0.6386538147926331, + "806": 0.640887975692749, + "807": 0.679754912853241, + "808": 0.6633153557777405, + "809": 0.6560465693473816 + }, + "loss": { + "756": 2.1119837760925293, + "757": 2.1168980598449707, + "758": 2.1193339824676514, + "759": 2.119785785675049, + "760": 2.1012823581695557, + "761": 2.110630989074707, + "762": 2.10760235786438, + "763": 2.113950729370117, + "764": 2.067664623260498, + "765": 2.088411808013916, + "766": 2.1109251976013184, + "767": 2.107616424560547, + "768": 2.121735095977783, + "769": 2.0850253105163574, + "770": 2.132988929748535, + "771": 2.0806896686553955, + "772": 2.1015868186950684, + "773": 2.0957279205322266, + "774": 2.0994162559509277, + "775": 2.083505153656006, + "776": 2.1091468334198, + "777": 2.096006393432617, + "778": 2.097630739212036, + "779": 2.112154483795166, + "780": 2.0990288257598877, + "781": 2.064929962158203, + "782": 2.115248203277588, + "783": 2.0665059089660645, + "784": 2.1107029914855957, + "785": 2.1103851795196533, + "786": 2.0840463638305664, + "787": 2.112826347351074, + "788": 2.12038254737854, + "789": 2.060861825942993, + "790": 2.079716205596924, + "791": 2.112527370452881, + "792": 2.109745502471924, + "793": 2.083125352859497, + "794": 2.101740837097168, + "795": 2.0904805660247803, + "796": 2.1020026206970215, + "797": 2.0670418739318848, + "798": 2.090491533279419, + "799": 2.1115198135375977, + "800": 2.0831446647644043, + "801": 2.072781562805176, + "802": 2.107959747314453, + "803": 2.097409725189209, + "804": 2.1161949634552, + "805": 2.1058473587036133, + "806": 2.1048529148101807, + "807": 2.0991337299346924, + "808": 2.091625213623047, + "809": 2.08298397064209 + }, + "lr": { + "756": 0.03333333333333334, + "757": 0.03271604938271605, + "758": 0.03209876543209877, + "759": 0.03148148148148148, + "760": 0.030864197530864203, + "761": 0.030246913580246917, + "762": 0.02962962962962963, + "763": 0.02901234567901234, + "764": 0.028395061728395066, + "765": 0.02777777777777778, + "766": 0.027160493827160494, + "767": 0.026543209876543208, + "768": 0.025925925925925932, + "769": 0.025308641975308646, + "770": 0.024691358024691357, + "771": 0.02407407407407407, + "772": 0.023456790123456795, + "773": 0.02283950617283951, + "774": 0.022222222222222223, + "775": 0.021604938271604937, + "776": 0.02098765432098766, + "777": 0.020370370370370372, + "778": 0.019753086419753086, + "779": 0.0191358024691358, + "780": 0.018518518518518524, + "781": 0.01790123456790124, + "782": 0.01728395061728395, + "783": 0.016666666666666663, + "784": 0.016049382716049387, + "785": 0.015432098765432101, + "786": 0.014814814814814815, + "787": 0.014197530864197528, + "788": 0.013580246913580252, + "789": 0.012962962962962966, + "790": 0.012345679012345678, + "791": 0.011728395061728392, + "792": 0.011111111111111117, + "793": 0.01049382716049383, + "794": 0.009876543209876543, + "795": 0.009259259259259257, + "796": 0.008641975308641981, + "797": 0.008024691358024694, + "798": 0.007407407407407408, + "799": 0.006790123456790121, + "800": 0.006172839506172845, + "801": 0.005555555555555558, + "802": 0.0049382716049382715, + "803": 0.004320987654320985, + "804": 0.003703703703703709, + "805": 0.0030864197530864226, + "806": 0.0024691358024691358, + "807": 0.001851851851851849, + "808": 0.0012345679012345735, + "809": 0.0006172839506172868 + } + }, + "step_size_list": [ + 0.0332118, + 0.0325744, + 0.0319535, + 0.0313539, + 0.0307418, + 0.0301381, + 0.0295362, + 0.0289269, + 0.0282873, + 0.0277013, + 0.0270801, + 0.0264639, + 0.0258481, + 0.0252287, + 0.0246091, + 0.0240143, + 0.0233958, + 0.0227746, + 0.0221644, + 0.0215472, + 0.0209403, + 0.0203289, + 0.0197128, + 0.0190851, + 0.0184812, + 0.0178621, + 0.0172549, + 0.0166373, + 0.016022, + 0.015404, + 0.0147879, + 0.0141757, + 0.0135598, + 0.0129457, + 0.012328, + 0.0117132, + 0.0111012, + 0.0104823, + 0.00986736, + 0.00925182, + 0.00863502, + 0.00801803, + 0.00740119, + 0.00678558, + 0.00616849, + 0.00555193, + 0.00493594, + 0.00431888, + 0.00370234, + 0.0030855, + 0.00246854, + 0.00185147, + 0.00123441, + 0.000617245 + ], + "train_epoch_time": 4.840723752975464, + "train_loss": 2.093103907200664, + "train_score": 0.3785397239275266, + "val_loss": 2.1785143122470476, + "val_score": 0.35478706283218964 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:25:22.001760", + "final_model_norm": 87.62673950195312, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:23:40.473868", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 2.8237144947052, + "learning_rate": 2.15e-11, + "model_norm": 87.43746185302734, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.703997611999512, + "3": 7.326879501342773, + "4": 21.96558952331543, + "5": 7.369707107543945, + "6": 5.422325611114502, + "7": 4.3268046379089355, + "8": 3.9849159717559814, + "9": 8.04224967956543, + "10": 6.365480899810791, + "11": 4.813935279846191, + "12": 34.062808990478516, + "13": 4.699474811553955, + "14": 6.995782375335693, + "15": 8.23671817779541, + "16": 3.417386293411255, + "17": 9.862605094909668, + "18": 4.489058017730713, + "19": 13.414119720458984, + "20": 5.0882062911987305, + "21": 37.26375961303711, + "22": 7.999494552612305, + "23": 9.745227813720703, + "24": 4.009833812713623, + "25": 36.2741584777832, + "26": 3.2594213485717773, + "27": 4.331626892089844, + "28": 2.405197858810425, + "29": 3.363208055496216, + "30": 4.6236419677734375, + "31": 3.833583354949951, + "32": 4.102850437164307, + "33": 5.206770896911621, + "34": 27.12824058532715, + "35": 3.4224886894226074, + "36": 6.442839622497559, + "37": 5.323864936828613, + "38": 8.582418441772461, + "39": 4.82737922668457, + "40": 4.733067512512207, + "41": 27.60544776916504, + "42": 3.88569712638855, + "43": 5.079615116119385, + "44": 7.171720027923584, + "45": 6.145301818847656, + "46": 2.9072279930114746, + "47": 6.455597877502441, + "48": 3.6128594875335693, + "49": 7.043355464935303, + "50": 2.3525805473327637, + "51": 2.960170269012451, + "52": 6.720886707305908, + "53": 2.8237144947052 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.860989570617676, + "3": 3.6825003623962402, + "4": 4.148896217346191, + "5": 4.060670375823975, + "6": 3.558018684387207, + "7": 3.6036229133605957, + "8": 3.4577369689941406, + "9": 3.5245635509490967, + "10": 3.764491558074951, + "11": 3.3649075031280518, + "12": 3.5065202713012695, + "13": 3.3472676277160645, + "14": 3.5612375736236572, + "15": 3.181992769241333, + "16": 3.2212250232696533, + "17": 3.297783851623535, + "18": 3.2949283123016357, + "19": 3.5093657970428467, + "20": 3.236755609512329, + "21": 4.153029441833496, + "22": 3.4655709266662598, + "23": 3.8188352584838867, + "24": 3.2795932292938232, + "25": 4.070119857788086, + "26": 3.0209813117980957, + "27": 3.2271833419799805, + "28": 2.8818459510803223, + "29": 2.9359586238861084, + "30": 3.0909037590026855, + "31": 3.120154619216919, + "32": 2.940892219543457, + "33": 3.044102430343628, + "34": 4.243189811706543, + "35": 3.1709108352661133, + "36": 3.156486988067627, + "37": 3.0236644744873047, + "38": 2.9539918899536133, + "39": 3.2043702602386475, + "40": 3.2703495025634766, + "41": 4.97122859954834, + "42": 3.2410924434661865, + "43": 3.352753162384033, + "44": 3.334832191467285, + "45": 3.126955032348633, + "46": 2.9199960231781006, + "47": 3.1369423866271973, + "48": 3.067199945449829, + "49": 3.5366017818450928, + "50": 2.9732704162597656, + "51": 2.9412715435028076, + "52": 3.3356733322143555, + "53": 3.2908201217651367 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 2.15e-11, + 0.00343811, + 0.00819006, + 0.0117913, + 0.00859949, + 0.0187973, + 0.0233147, + 0.0279172, + 0.0318817, + 0.0285591, + 0.0349191, + 0.040675, + 0.00541052, + 0.0471964, + 0.0425847, + 0.0382198, + 0.0611709, + 0.0351768, + 0.0625865, + 0.0264014, + 0.0639908, + 0.00561004, + 0.0504965, + 0.0443546, + 0.0823638, + 0.00584982, + 0.0934328, + 0.0868034, + 0.107419, + 0.100547, + 0.0892048, + 0.101451, + 0.0987226, + 0.0869553, + 0.0106883, + 0.117764, + 0.0767146, + 0.0911385, + 0.0537997, + 0.104175, + 0.108237, + 0.0121478, + 0.127124, + 0.108035, + 0.076941, + 0.0892336, + 0.153778, + 0.0862767, + 0.143415, + 0.085036, + 0.179151, + 0.162846, + 0.0875507, + 0.170572 + ], + "train_epoch_time": 4.843285083770752, + "train_loss": 2.936614261774969, + "train_score": 0.18691714488536418, + "val_loss": 2.96571738985458, + "val_score": 0.18380005022962112 + }, + { + "epoch": 1, + "grad_norm": 1.3842122554779053, + "learning_rate": 0.215, + "model_norm": 87.45478820800781, + "step_logs": { + "grad_norm": { + "54": 2.8213584423065186, + "55": 2.51896071434021, + "56": 2.485569477081299, + "57": 3.002544403076172, + "58": 2.7876532077789307, + "59": 2.2174365520477295, + "60": 1.964108943939209, + "61": 3.3254904747009277, + "62": 2.0521535873413086, + "63": 1.5978906154632568, + "64": 1.8865280151367188, + "65": 1.7981300354003906, + "66": 1.7488449811935425, + "67": 2.379397392272949, + "68": 2.1411471366882324, + "69": 1.9745234251022339, + "70": 2.0030853748321533, + "71": 3.452584981918335, + "72": 1.9897698163986206, + "73": 1.8727123737335205, + "74": 1.5652499198913574, + "75": 2.163674831390381, + "76": 1.918753981590271, + "77": 2.1521198749542236, + "78": 1.8960561752319336, + "79": 2.127223253250122, + "80": 1.8088271617889404, + "81": 1.5839332342147827, + "82": 1.7778187990188599, + "83": 2.045609712600708, + "84": 1.6922314167022705, + "85": 1.2461128234863281, + "86": 1.4176477193832397, + "87": 2.2051236629486084, + "88": 1.6134084463119507, + "89": 1.1456162929534912, + "90": 1.2071748971939087, + "91": 1.4080860614776611, + "92": 1.487453579902649, + "93": 1.8650773763656616, + "94": 1.7297559976577759, + "95": 1.4833413362503052, + "96": 1.4754977226257324, + "97": 1.8308939933776855, + "98": 1.6010785102844238, + "99": 1.493167757987976, + "100": 1.4946473836898804, + "101": 1.7533742189407349, + "102": 1.4729269742965698, + "103": 1.3775416612625122, + "104": 1.589133858680725, + "105": 1.4126205444335938, + "106": 1.304403305053711, + "107": 1.3842122554779053 + }, + "loss": { + "54": 2.9311022758483887, + "55": 2.976961851119995, + "56": 2.9425265789031982, + "57": 2.9114437103271484, + "58": 3.0787158012390137, + "59": 2.944338083267212, + "60": 2.780221939086914, + "61": 2.8786559104919434, + "62": 3.070720911026001, + "63": 2.746140718460083, + "64": 2.731119155883789, + "65": 2.7641401290893555, + "66": 2.752035617828369, + "67": 2.7536611557006836, + "68": 2.956040382385254, + "69": 2.7385635375976562, + "70": 2.791156053543091, + "71": 2.824984073638916, + "72": 2.9984707832336426, + "73": 2.7185893058776855, + "74": 2.694605588912964, + "75": 2.708252429962158, + "76": 2.836428642272949, + "77": 2.730621099472046, + "78": 2.8064048290252686, + "79": 2.716933250427246, + "80": 2.816277265548706, + "81": 2.6442980766296387, + "82": 2.71547794342041, + "83": 2.7009973526000977, + "84": 2.7831273078918457, + "85": 2.622030258178711, + "86": 2.6283698081970215, + "87": 2.7053375244140625, + "88": 2.77567195892334, + "89": 2.625410556793213, + "90": 2.6169495582580566, + "91": 2.613706350326538, + "92": 2.6670570373535156, + "93": 2.637639284133911, + "94": 2.7454357147216797, + "95": 2.6323599815368652, + "96": 2.655250072479248, + "97": 2.6189985275268555, + "98": 2.702467918395996, + "99": 2.60925555229187, + "100": 2.6488986015319824, + "101": 2.6088175773620605, + "102": 2.7031571865081787, + "103": 2.5998706817626953, + "104": 2.6384899616241455, + "105": 2.614164352416992, + "106": 2.5784783363342285, + "107": 2.626443386077881 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.166416, + 0.174921, + 0.175409, + 0.161306, + 0.169113, + 0.182277, + 0.187093, + 0.152161, + 0.187375, + 0.195464, + 0.188582, + 0.190985, + 0.192055, + 0.176082, + 0.184277, + 0.186463, + 0.186222, + 0.147908, + 0.188276, + 0.188815, + 0.195857, + 0.181308, + 0.188674, + 0.181843, + 0.188976, + 0.182351, + 0.19113, + 0.195101, + 0.19109, + 0.184305, + 0.193587, + 0.202132, + 0.19867, + 0.180185, + 0.19531, + 0.204035, + 0.202857, + 0.198789, + 0.197396, + 0.188304, + 0.192453, + 0.197274, + 0.197585, + 0.188995, + 0.195105, + 0.196912, + 0.197128, + 0.190826, + 0.197924, + 0.199358, + 0.194942, + 0.198695, + 0.200759, + 0.199365 + ], + "train_epoch_time": 4.8404860496521, + "train_loss": 2.6236208142645903, + "train_score": 0.2179799587646208, + "val_loss": 2.649655413271776, + "val_score": 0.21359249436212052 + }, + { + "epoch": 2, + "grad_norm": 1.34837806224823, + "learning_rate": 0.215, + "model_norm": 87.4744644165039, + "step_logs": { + "grad_norm": { + "108": 1.5592724084854126, + "109": 2.103461742401123, + "110": 1.5801812410354614, + "111": 1.093946099281311, + "112": 1.136074423789978, + "113": 1.3073256015777588, + "114": 1.2383995056152344, + "115": 1.588004469871521, + "116": 1.532800316810608, + "117": 1.181828260421753, + "118": 1.3396342992782593, + "119": 1.5502949953079224, + "120": 1.3899825811386108, + "121": 1.2337206602096558, + "122": 1.3734662532806396, + "123": 1.4737595319747925, + "124": 1.4598513841629028, + "125": 1.4499573707580566, + "126": 1.4866669178009033, + "127": 1.3141696453094482, + "128": 1.296535849571228, + "129": 1.2824946641921997, + "130": 1.5136902332305908, + "131": 1.574975848197937, + "132": 1.4911938905715942, + "133": 1.5007022619247437, + "134": 1.4506276845932007, + "135": 1.5785022974014282, + "136": 1.3659920692443848, + "137": 1.0307453870773315, + "138": 1.0795910358428955, + "139": 1.2393927574157715, + "140": 1.3061350584030151, + "141": 1.3156977891921997, + "142": 1.4069629907608032, + "143": 1.5293782949447632, + "144": 1.4213391542434692, + "145": 1.1267948150634766, + "146": 1.1314029693603516, + "147": 1.36870276927948, + "148": 1.3218140602111816, + "149": 1.237614393234253, + "150": 1.2959716320037842, + "151": 1.545208215713501, + "152": 1.490453839302063, + "153": 1.357922911643982, + "154": 1.3239798545837402, + "155": 1.262686848640442, + "156": 1.19283127784729, + "157": 1.1629854440689087, + "158": 1.2180469036102295, + "159": 1.1661994457244873, + "160": 1.271503210067749, + "161": 1.34837806224823 + }, + "loss": { + "108": 2.6187286376953125, + "109": 2.640376329421997, + "110": 2.7371749877929688, + "111": 2.5788116455078125, + "112": 2.5667243003845215, + "113": 2.5690135955810547, + "114": 2.593691349029541, + "115": 2.5491881370544434, + "116": 2.6932168006896973, + "117": 2.562211513519287, + "118": 2.5980257987976074, + "119": 2.603912115097046, + "120": 2.6333229541778564, + "121": 2.5534958839416504, + "122": 2.6163206100463867, + "123": 2.5902304649353027, + "124": 2.63192081451416, + "125": 2.581106185913086, + "126": 2.5988032817840576, + "127": 2.581291675567627, + "128": 2.590378522872925, + "129": 2.5595955848693848, + "130": 2.581890821456909, + "131": 2.6275458335876465, + "132": 2.6293866634368896, + "133": 2.591433048248291, + "134": 2.6372129917144775, + "135": 2.601038932800293, + "136": 2.647150993347168, + "137": 2.539316177368164, + "138": 2.5527286529541016, + "139": 2.5431435108184814, + "140": 2.5849978923797607, + "141": 2.550452947616577, + "142": 2.581634998321533, + "143": 2.590792655944824, + "144": 2.6380398273468018, + "145": 2.548248291015625, + "146": 2.5381650924682617, + "147": 2.5606985092163086, + "148": 2.59602689743042, + "149": 2.5511741638183594, + "150": 2.580410957336426, + "151": 2.5843911170959473, + "152": 2.638113498687744, + "153": 2.5573649406433105, + "154": 2.595184326171875, + "155": 2.539991855621338, + "156": 2.563715934753418, + "157": 2.5298314094543457, + "158": 2.5551815032958984, + "159": 2.5379412174224854, + "160": 2.5451836585998535, + "161": 2.5429296493530273 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 0.195489, + 0.182182, + 0.195799, + 0.204784, + 0.203974, + 0.20065, + 0.20215, + 0.194334, + 0.196566, + 0.203098, + 0.200138, + 0.195593, + 0.199282, + 0.202053, + 0.199534, + 0.197222, + 0.197784, + 0.19769, + 0.19699, + 0.200574, + 0.200979, + 0.201108, + 0.196276, + 0.195191, + 0.197083, + 0.19663, + 0.198015, + 0.194927, + 0.199856, + 0.205746, + 0.204941, + 0.201891, + 0.200757, + 0.20038, + 0.198627, + 0.19598, + 0.198647, + 0.20407, + 0.203943, + 0.199324, + 0.200494, + 0.201965, + 0.20094, + 0.195576, + 0.197153, + 0.199534, + 0.200445, + 0.201409, + 0.202895, + 0.203315, + 0.202368, + 0.203289, + 0.201257, + 0.199655 + ], + "train_epoch_time": 4.839672327041626, + "train_loss": 2.562408241411535, + "train_score": 0.24528896164688865, + "val_loss": 2.6070068703726976, + "val_score": 0.24454201331916003 + }, + { + "epoch": 3, + "grad_norm": 1.2930474281311035, + "learning_rate": 0.215, + "model_norm": 87.4908447265625, + "step_logs": { + "grad_norm": { + "162": 1.361615538597107, + "163": 1.392086148262024, + "164": 1.619390606880188, + "165": 1.4804311990737915, + "166": 1.2816448211669922, + "167": 1.4333467483520508, + "168": 1.5159316062927246, + "169": 1.311583399772644, + "170": 1.1395535469055176, + "171": 1.2622100114822388, + "172": 1.2925114631652832, + "173": 1.2221670150756836, + "174": 1.094551920890808, + "175": 1.0472376346588135, + "176": 1.0356019735336304, + "177": 1.0175933837890625, + "178": 1.0302155017852783, + "179": 1.2447725534439087, + "180": 1.3009377717971802, + "181": 1.3946105241775513, + "182": 1.45509672164917, + "183": 1.340830683708191, + "184": 1.3039110898971558, + "185": 1.366570234298706, + "186": 1.3199069499969482, + "187": 1.1985795497894287, + "188": 1.3332396745681763, + "189": 1.301342487335205, + "190": 1.2098512649536133, + "191": 1.1354190111160278, + "192": 1.1484336853027344, + "193": 1.3417631387710571, + "194": 1.3261157274246216, + "195": 1.3717952966690063, + "196": 1.304904818534851, + "197": 1.1362359523773193, + "198": 1.2044957876205444, + "199": 1.3503167629241943, + "200": 1.2129201889038086, + "201": 1.0190742015838623, + "202": 1.0551466941833496, + "203": 1.1930627822875977, + "204": 1.1593202352523804, + "205": 1.0762124061584473, + "206": 1.098315715789795, + "207": 1.3510856628417969, + "208": 1.2960174083709717, + "209": 1.1468430757522583, + "210": 1.2015275955200195, + "211": 1.298621416091919, + "212": 1.2985262870788574, + "213": 1.2118626832962036, + "214": 1.205458641052246, + "215": 1.2930474281311035 + }, + "loss": { + "162": 2.5700790882110596, + "163": 2.5708465576171875, + "164": 2.5671658515930176, + "165": 2.6177597045898438, + "166": 2.5604801177978516, + "167": 2.57741641998291, + "168": 2.568589687347412, + "169": 2.5828664302825928, + "170": 2.5363388061523438, + "171": 2.5402991771698, + "172": 2.5580549240112305, + "173": 2.5598230361938477, + "174": 2.514066219329834, + "175": 2.500812292098999, + "176": 2.520977258682251, + "177": 2.502847909927368, + "178": 2.489912271499634, + "179": 2.5389232635498047, + "180": 2.561051845550537, + "181": 2.5480899810791016, + "182": 2.5768723487854004, + "183": 2.558711528778076, + "184": 2.581841230392456, + "185": 2.5424580574035645, + "186": 2.585146903991699, + "187": 2.529067039489746, + "188": 2.5568032264709473, + "189": 2.563478946685791, + "190": 2.553945302963257, + "191": 2.5235276222229004, + "192": 2.5243968963623047, + "193": 2.530046224594116, + "194": 2.5652194023132324, + "195": 2.529226541519165, + "196": 2.5821762084960938, + "197": 2.530539035797119, + "198": 2.5176146030426025, + "199": 2.532806396484375, + "200": 2.5715689659118652, + "201": 2.503427267074585, + "202": 2.520176649093628, + "203": 2.515115261077881, + "204": 2.5395665168762207, + "205": 2.5176780223846436, + "206": 2.5222558975219727, + "207": 2.5116360187530518, + "208": 2.559041976928711, + "209": 2.504093647003174, + "210": 2.521381378173828, + "211": 2.5288586616516113, + "212": 2.530637264251709, + "213": 2.531001567840576, + "214": 2.5445199012756348, + "215": 2.527702808380127 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 0.199527, + 0.198884, + 0.193726, + 0.197247, + 0.201129, + 0.198031, + 0.196136, + 0.200635, + 0.203784, + 0.20142, + 0.200896, + 0.20231, + 0.204523, + 0.205321, + 0.205598, + 0.205845, + 0.20558, + 0.201763, + 0.200739, + 0.198696, + 0.197551, + 0.199901, + 0.200786, + 0.199266, + 0.200476, + 0.202627, + 0.200049, + 0.200744, + 0.202522, + 0.203807, + 0.203567, + 0.199722, + 0.200243, + 0.199077, + 0.200768, + 0.203822, + 0.202458, + 0.199557, + 0.202544, + 0.205821, + 0.205253, + 0.20267, + 0.203427, + 0.204868, + 0.204487, + 0.199419, + 0.20083, + 0.203509, + 0.202534, + 0.200618, + 0.200629, + 0.202376, + 0.202564, + 0.200727 + ], + "train_epoch_time": 4.840532064437866, + "train_loss": 2.545092095466734, + "train_score": 0.2581678174404024, + "val_loss": 2.6027835313269247, + "val_score": 0.25135440628304134 + }, + { + "epoch": 4, + "grad_norm": 1.3145029544830322, + "learning_rate": 0.215, + "model_norm": 87.51119232177734, + "step_logs": { + "grad_norm": { + "216": 1.2182166576385498, + "217": 1.0294996500015259, + "218": 1.1008670330047607, + "219": 1.19731605052948, + "220": 1.2013248205184937, + "221": 1.154158115386963, + "222": 1.2316288948059082, + "223": 1.135221004486084, + "224": 1.057712435722351, + "225": 1.0639936923980713, + "226": 1.0720773935317993, + "227": 1.3009594678878784, + "228": 1.5026884078979492, + "229": 1.3627347946166992, + "230": 1.1201125383377075, + "231": 1.1280312538146973, + "232": 1.2300243377685547, + "233": 1.3506304025650024, + "234": 1.4924287796020508, + "235": 1.6115299463272095, + "236": 1.462262511253357, + "237": 1.0871405601501465, + "238": 1.1100214719772339, + "239": 1.2975413799285889, + "240": 1.3630799055099487, + "241": 1.2814949750900269, + "242": 1.246268391609192, + "243": 1.2167056798934937, + "244": 1.1162651777267456, + "245": 1.1490232944488525, + "246": 1.1480011940002441, + "247": 1.1182979345321655, + "248": 1.065187692642212, + "249": 1.080504059791565, + "250": 1.117148995399475, + "251": 1.1543786525726318, + "252": 1.3190550804138184, + "253": 1.281561017036438, + "254": 1.2185546159744263, + "255": 1.1256446838378906, + "256": 1.0067986249923706, + "257": 1.0031418800354004, + "258": 1.0006635189056396, + "259": 1.0217490196228027, + "260": 1.0725486278533936, + "261": 1.2708911895751953, + "262": 1.2438842058181763, + "263": 1.1742539405822754, + "264": 1.2642422914505005, + "265": 1.2814592123031616, + "266": 1.2299879789352417, + "267": 1.1052236557006836, + "268": 1.0351731777191162, + "269": 1.3145029544830322 + }, + "loss": { + "216": 2.563236713409424, + "217": 2.5135276317596436, + "218": 2.5074362754821777, + "219": 2.4941115379333496, + "220": 2.531029224395752, + "221": 2.5122103691101074, + "222": 2.513491153717041, + "223": 2.5352628231048584, + "224": 2.4982762336730957, + "225": 2.4997172355651855, + "226": 2.5034897327423096, + "227": 2.5220346450805664, + "228": 2.5605366230010986, + "229": 2.5451760292053223, + "230": 2.509514093399048, + "231": 2.5111324787139893, + "232": 2.5351409912109375, + "233": 2.506951332092285, + "234": 2.586801528930664, + "235": 2.5398645401000977, + "236": 2.5718321800231934, + "237": 2.520191192626953, + "238": 2.502455234527588, + "239": 2.500775098800659, + "240": 2.5360636711120605, + "241": 2.5270955562591553, + "242": 2.528837203979492, + "243": 2.504053831100464, + "244": 2.5205416679382324, + "245": 2.496408462524414, + "246": 2.5048253536224365, + "247": 2.4868712425231934, + "248": 2.5096635818481445, + "249": 2.478222370147705, + "250": 2.517609119415283, + "251": 2.4875168800354004, + "252": 2.5157854557037354, + "253": 2.521237373352051, + "254": 2.51275634765625, + "255": 2.5100231170654297, + "256": 2.4955272674560547, + "257": 2.489665985107422, + "258": 2.464855194091797, + "259": 2.4700636863708496, + "260": 2.4838998317718506, + "261": 2.502572774887085, + "262": 2.5432636737823486, + "263": 2.4720115661621094, + "264": 2.5000240802764893, + "265": 2.4900641441345215, + "266": 2.4900217056274414, + "267": 2.456181526184082, + "268": 2.463703155517578, + "269": 2.468128204345703 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 0.202403, + 0.205677, + 0.204381, + 0.202488, + 0.202583, + 0.203406, + 0.201901, + 0.20386, + 0.205125, + 0.205019, + 0.204888, + 0.200533, + 0.196383, + 0.199363, + 0.204034, + 0.203893, + 0.202038, + 0.199402, + 0.196785, + 0.193708, + 0.197361, + 0.204681, + 0.204192, + 0.20049, + 0.199303, + 0.200961, + 0.201684, + 0.202153, + 0.204151, + 0.203434, + 0.20349, + 0.203973, + 0.205035, + 0.204637, + 0.204122, + 0.203293, + 0.200122, + 0.200929, + 0.202158, + 0.203933, + 0.206005, + 0.206047, + 0.206004, + 0.205656, + 0.204804, + 0.201051, + 0.201802, + 0.202837, + 0.201174, + 0.200767, + 0.201818, + 0.204089, + 0.205396, + 0.199952 + ], + "train_epoch_time": 4.839815616607666, + "train_loss": 2.5160131506461494, + "train_score": 0.2526228478683461, + "val_loss": 2.559002738595693, + "val_score": 0.24101248576843642 + }, + { + "epoch": 5, + "grad_norm": 1.2344266176223755, + "learning_rate": 0.215, + "model_norm": 87.53753662109375, + "step_logs": { + "grad_norm": { + "270": 1.2850974798202515, + "271": 1.2239179611206055, + "272": 1.24366295337677, + "273": 1.286130666732788, + "274": 1.2779815196990967, + "275": 1.2388229370117188, + "276": 1.2091007232666016, + "277": 1.1750178337097168, + "278": 1.080449104309082, + "279": 1.0785633325576782, + "280": 1.1624637842178345, + "281": 1.357628345489502, + "282": 1.378400444984436, + "283": 1.4578369855880737, + "284": 1.3952049016952515, + "285": 1.2184643745422363, + "286": 1.1723947525024414, + "287": 1.4340606927871704, + "288": 1.367334246635437, + "289": 1.3118513822555542, + "290": 1.3222535848617554, + "291": 1.277795672416687, + "292": 1.1154917478561401, + "293": 1.4842160940170288, + "294": 1.3879642486572266, + "295": 1.2027218341827393, + "296": 1.1791794300079346, + "297": 1.2830088138580322, + "298": 1.4632295370101929, + "299": 1.297553539276123, + "300": 1.2972139120101929, + "301": 1.2117164134979248, + "302": 1.1019034385681152, + "303": 1.1867973804473877, + "304": 1.2630165815353394, + "305": 1.3319343328475952, + "306": 1.463961124420166, + "307": 1.420211672782898, + "308": 1.1998400688171387, + "309": 1.186724305152893, + "310": 1.2939151525497437, + "311": 1.3201467990875244, + "312": 1.2927229404449463, + "313": 1.0932945013046265, + "314": 0.9508772492408752, + "315": 1.0865265130996704, + "316": 1.1584097146987915, + "317": 1.266713261604309, + "318": 1.3500679731369019, + "319": 1.4456214904785156, + "320": 1.3497941493988037, + "321": 1.1739401817321777, + "322": 1.196366548538208, + "323": 1.2344266176223755 + }, + "loss": { + "270": 2.4963231086730957, + "271": 2.457305431365967, + "272": 2.504878044128418, + "273": 2.470299243927002, + "274": 2.4994258880615234, + "275": 2.4559359550476074, + "276": 2.4859466552734375, + "277": 2.4565062522888184, + "278": 2.4518582820892334, + "279": 2.449338436126709, + "280": 2.458657741546631, + "281": 2.4458084106445312, + "282": 2.5018844604492188, + "283": 2.4499363899230957, + "284": 2.503763198852539, + "285": 2.4574086666107178, + "286": 2.454789161682129, + "287": 2.4405882358551025, + "288": 2.5001232624053955, + "289": 2.434464931488037, + "290": 2.4658236503601074, + "291": 2.454690456390381, + "292": 2.4201343059539795, + "293": 2.466909885406494, + "294": 2.4778316020965576, + "295": 2.4609146118164062, + "296": 2.412415027618408, + "297": 2.442077398300171, + "298": 2.4399757385253906, + "299": 2.464625120162964, + "300": 2.43546462059021, + "301": 2.495152473449707, + "302": 2.419623374938965, + "303": 2.4138479232788086, + "304": 2.4415388107299805, + "305": 2.4420111179351807, + "306": 2.4415626525878906, + "307": 2.4777493476867676, + "308": 2.436062812805176, + "309": 2.4265921115875244, + "310": 2.425304651260376, + "311": 2.4613921642303467, + "312": 2.412060260772705, + "313": 2.4266796112060547, + "314": 2.389078140258789, + "315": 2.381617546081543, + "316": 2.4306089878082275, + "317": 2.392611503601074, + "318": 2.4400267601013184, + "319": 2.4236679077148438, + "320": 2.433302164077759, + "321": 2.413728713989258, + "322": 2.413306713104248, + "323": 2.3973727226257324 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 0.200725, + 0.201777, + 0.201617, + 0.200563, + 0.200889, + 0.201466, + 0.202216, + 0.20275, + 0.204532, + 0.204556, + 0.203006, + 0.198888, + 0.198773, + 0.19666, + 0.198417, + 0.201888, + 0.202793, + 0.197142, + 0.199002, + 0.199815, + 0.199773, + 0.200652, + 0.203739, + 0.196169, + 0.198417, + 0.202222, + 0.202456, + 0.200473, + 0.196467, + 0.200291, + 0.200135, + 0.202209, + 0.203996, + 0.20231, + 0.20089, + 0.199426, + 0.196461, + 0.197699, + 0.202157, + 0.202374, + 0.200147, + 0.199793, + 0.200097, + 0.204188, + 0.206595, + 0.204123, + 0.202955, + 0.200542, + 0.199018, + 0.196762, + 0.198984, + 0.202567, + 0.202114, + 0.201249 + ], + "train_epoch_time": 4.840233087539673, + "train_loss": 2.4109062772228183, + "train_score": 0.28348166238799843, + "val_loss": 2.4727224501621845, + "val_score": 0.27163910068545905 + }, + { + "epoch": 6, + "grad_norm": 1.0698927640914917, + "learning_rate": 0.215, + "model_norm": 87.56552124023438, + "step_logs": { + "grad_norm": { + "324": 1.309128999710083, + "325": 1.2631114721298218, + "326": 1.2900135517120361, + "327": 1.2306216955184937, + "328": 1.1494269371032715, + "329": 1.2550078630447388, + "330": 1.5193229913711548, + "331": 1.3595046997070312, + "332": 1.072856068611145, + "333": 1.0630807876586914, + "334": 1.389190435409546, + "335": 1.4626983404159546, + "336": 1.2771999835968018, + "337": 1.2225881814956665, + "338": 1.3771287202835083, + "339": 1.3456461429595947, + "340": 1.0806690454483032, + "341": 1.149245262145996, + "342": 1.0304591655731201, + "343": 1.0370887517929077, + "344": 1.1288073062896729, + "345": 1.2489933967590332, + "346": 1.1935603618621826, + "347": 1.1135035753250122, + "348": 1.317034125328064, + "349": 1.181177020072937, + "350": 1.167004108428955, + "351": 1.1846635341644287, + "352": 1.1871981620788574, + "353": 1.3269562721252441, + "354": 1.3967430591583252, + "355": 1.3563923835754395, + "356": 1.1799196004867554, + "357": 1.1373260021209717, + "358": 1.121716856956482, + "359": 1.0838626623153687, + "360": 1.1106144189834595, + "361": 1.2185466289520264, + "362": 1.4820932149887085, + "363": 1.3594046831130981, + "364": 1.3651905059814453, + "365": 1.270385980606079, + "366": 1.1773992776870728, + "367": 1.304679274559021, + "368": 1.2258681058883667, + "369": 1.167794942855835, + "370": 1.2519911527633667, + "371": 1.1871514320373535, + "372": 1.2129698991775513, + "373": 1.272553563117981, + "374": 1.3957493305206299, + "375": 1.2517162561416626, + "376": 1.0713804960250854, + "377": 1.0698927640914917 + }, + "loss": { + "324": 2.386399269104004, + "325": 2.445016860961914, + "326": 2.396030902862549, + "327": 2.4223196506500244, + "328": 2.4171853065490723, + "329": 2.3736605644226074, + "330": 2.4139795303344727, + "331": 2.4522480964660645, + "332": 2.3881003856658936, + "333": 2.3730225563049316, + "334": 2.3800950050354004, + "335": 2.431530475616455, + "336": 2.421450138092041, + "337": 2.4071640968322754, + "338": 2.3899314403533936, + "339": 2.4413974285125732, + "340": 2.3730709552764893, + "341": 2.3817226886749268, + "342": 2.366791248321533, + "343": 2.3388569355010986, + "344": 2.3523006439208984, + "345": 2.3732428550720215, + "346": 2.3952674865722656, + "347": 2.381603956222534, + "348": 2.39683198928833, + "349": 2.4112133979797363, + "350": 2.375216484069824, + "351": 2.3985180854797363, + "352": 2.3709568977355957, + "353": 2.396017551422119, + "354": 2.400608539581299, + "355": 2.3932509422302246, + "356": 2.3683276176452637, + "357": 2.367392063140869, + "358": 2.38385009765625, + "359": 2.3542637825012207, + "360": 2.32932710647583, + "361": 2.3663556575775146, + "362": 2.3727309703826904, + "363": 2.4313533306121826, + "364": 2.352915048599243, + "365": 2.4125397205352783, + "366": 2.368180274963379, + "367": 2.3804922103881836, + "368": 2.3812994956970215, + "369": 2.351163625717163, + "370": 2.355973482131958, + "371": 2.3940610885620117, + "372": 2.355034351348877, + "373": 2.3667378425598145, + "374": 2.373485565185547, + "375": 2.3836302757263184, + "376": 2.3319592475891113, + "377": 2.3355607986450195 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 0.199591, + 0.200907, + 0.200063, + 0.20146, + 0.203068, + 0.200685, + 0.194959, + 0.198886, + 0.204409, + 0.204529, + 0.197762, + 0.196421, + 0.200481, + 0.201546, + 0.198101, + 0.199124, + 0.204197, + 0.202904, + 0.205108, + 0.204872, + 0.203169, + 0.20081, + 0.20208, + 0.203605, + 0.199481, + 0.20241, + 0.202517, + 0.202277, + 0.202086, + 0.199258, + 0.197726, + 0.198589, + 0.202221, + 0.203072, + 0.203456, + 0.204054, + 0.20342, + 0.201414, + 0.19554, + 0.19876, + 0.198129, + 0.200576, + 0.202272, + 0.199653, + 0.201341, + 0.202381, + 0.200649, + 0.202204, + 0.201469, + 0.200269, + 0.197568, + 0.20081, + 0.204195, + 0.204239 + ], + "train_epoch_time": 4.840440273284912, + "train_loss": 2.3239737161775915, + "train_score": 0.3152371771195356, + "val_loss": 2.37487924112655, + "val_score": 0.30604639064293915 + }, + { + "epoch": 7, + "grad_norm": 1.1673446893692017, + "learning_rate": 0.215, + "model_norm": 87.59405517578125, + "step_logs": { + "grad_norm": { + "378": 1.0856293439865112, + "379": 1.1353424787521362, + "380": 1.2636758089065552, + "381": 1.34560227394104, + "382": 1.5504242181777954, + "383": 1.4265116453170776, + "384": 1.2201482057571411, + "385": 1.1396905183792114, + "386": 1.0633373260498047, + "387": 1.0913819074630737, + "388": 1.0919487476348877, + "389": 1.1365587711334229, + "390": 1.4007723331451416, + "391": 1.4686057567596436, + "392": 1.5081781148910522, + "393": 1.3079806566238403, + "394": 1.214347004890442, + "395": 1.1966195106506348, + "396": 1.063156247138977, + "397": 1.1483653783798218, + "398": 1.2561752796173096, + "399": 1.2642120122909546, + "400": 1.1772046089172363, + "401": 1.1589879989624023, + "402": 1.135513424873352, + "403": 1.152271032333374, + "404": 1.1834492683410645, + "405": 1.256049633026123, + "406": 1.1937779188156128, + "407": 1.1257883310317993, + "408": 1.1758086681365967, + "409": 1.1744511127471924, + "410": 1.2455041408538818, + "411": 1.415810227394104, + "412": 1.293781042098999, + "413": 1.3222863674163818, + "414": 1.2363009452819824, + "415": 1.3613141775131226, + "416": 1.3382188081741333, + "417": 1.1950047016143799, + "418": 1.121564269065857, + "419": 1.1536364555358887, + "420": 1.1189732551574707, + "421": 1.1400444507598877, + "422": 1.1774303913116455, + "423": 1.3642103672027588, + "424": 1.2602027654647827, + "425": 1.2266814708709717, + "426": 1.2392609119415283, + "427": 1.1484966278076172, + "428": 1.1638505458831787, + "429": 1.224527359008789, + "430": 1.1911342144012451, + "431": 1.1673446893692017 + }, + "loss": { + "378": 2.328446865081787, + "379": 2.3256967067718506, + "380": 2.3429102897644043, + "381": 2.358628511428833, + "382": 2.345167875289917, + "383": 2.405836582183838, + "384": 2.3169164657592773, + "385": 2.3362081050872803, + "386": 2.3163704872131348, + "387": 2.324340581893921, + "388": 2.3124208450317383, + "389": 2.331120729446411, + "390": 2.339414358139038, + "391": 2.39945650100708, + "392": 2.326516628265381, + "393": 2.3788561820983887, + "394": 2.302523136138916, + "395": 2.321241855621338, + "396": 2.3199801445007324, + "397": 2.310746669769287, + "398": 2.338313102722168, + "399": 2.3144783973693848, + "400": 2.3162448406219482, + "401": 2.3223297595977783, + "402": 2.3196043968200684, + "403": 2.3204143047332764, + "404": 2.31657075881958, + "405": 2.3323097229003906, + "406": 2.3416872024536133, + "407": 2.3012123107910156, + "408": 2.300736904144287, + "409": 2.329308032989502, + "410": 2.3181538581848145, + "411": 2.356184959411621, + "412": 2.3543615341186523, + "413": 2.3153626918792725, + "414": 2.3357255458831787, + "415": 2.320425271987915, + "416": 2.3382961750030518, + "417": 2.308790683746338, + "418": 2.291630744934082, + "419": 2.3231401443481445, + "420": 2.3026514053344727, + "421": 2.285475254058838, + "422": 2.3193094730377197, + "423": 2.309246063232422, + "424": 2.347034454345703, + "425": 2.297950029373169, + "426": 2.3215527534484863, + "427": 2.2853357791900635, + "428": 2.2994134426116943, + "429": 2.30588698387146, + "430": 2.3058009147644043, + "431": 2.2755813598632812 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 0.203905, + 0.20291, + 0.200322, + 0.19861, + 0.193661, + 0.19708, + 0.201108, + 0.202875, + 0.204281, + 0.203774, + 0.203708, + 0.202913, + 0.197218, + 0.196055, + 0.194552, + 0.199571, + 0.201151, + 0.201629, + 0.2043, + 0.202572, + 0.200458, + 0.200143, + 0.202007, + 0.202414, + 0.202877, + 0.202541, + 0.201879, + 0.200426, + 0.201798, + 0.202982, + 0.201954, + 0.202133, + 0.200571, + 0.196985, + 0.199735, + 0.198857, + 0.20087, + 0.198001, + 0.198645, + 0.201596, + 0.20302, + 0.202527, + 0.203126, + 0.202614, + 0.202019, + 0.197858, + 0.200421, + 0.200861, + 0.200726, + 0.202439, + 0.202196, + 0.200952, + 0.201661, + 0.201997 + ], + "train_epoch_time": 4.840257883071899, + "train_loss": 2.3028510326292455, + "train_score": 0.30849735465761235, + "val_loss": 2.353059915669613, + "val_score": 0.2998887769595627 + }, + { + "epoch": 8, + "grad_norm": 1.1082972288131714, + "learning_rate": 0.215, + "model_norm": 87.62403106689453, + "step_logs": { + "grad_norm": { + "432": 1.1565858125686646, + "433": 1.108580470085144, + "434": 1.0306254625320435, + "435": 0.9733507037162781, + "436": 0.987404465675354, + "437": 1.2669869661331177, + "438": 1.2660613059997559, + "439": 1.2554049491882324, + "440": 1.1647225618362427, + "441": 1.0433356761932373, + "442": 1.162516474723816, + "443": 1.2768657207489014, + "444": 1.3018276691436768, + "445": 1.2005199193954468, + "446": 1.149218201637268, + "447": 0.9853675961494446, + "448": 1.0335965156555176, + "449": 1.2494053840637207, + "450": 1.3953478336334229, + "451": 1.3595314025878906, + "452": 1.358251929283142, + "453": 1.4063332080841064, + "454": 1.296646237373352, + "455": 1.2485666275024414, + "456": 1.2314960956573486, + "457": 1.2004880905151367, + "458": 1.1638100147247314, + "459": 1.0473333597183228, + "460": 1.0785902738571167, + "461": 1.17708158493042, + "462": 1.2843329906463623, + "463": 1.3547714948654175, + "464": 1.364400029182434, + "465": 1.191654086112976, + "466": 1.0111395120620728, + "467": 1.068028211593628, + "468": 1.358184814453125, + "469": 1.7749218940734863, + "470": 1.2150791883468628, + "471": 0.9120576977729797, + "472": 0.8075768947601318, + "473": 0.8076027035713196, + "474": 0.9236871600151062, + "475": 1.017232060432434, + "476": 1.153769850730896, + "477": 1.2207270860671997, + "478": 1.1661633253097534, + "479": 1.1541210412979126, + "480": 1.129042148590088, + "481": 1.1720428466796875, + "482": 1.1354085206985474, + "483": 1.135421633720398, + "484": 1.0487339496612549, + "485": 1.1082972288131714 + }, + "loss": { + "432": 2.3171162605285645, + "433": 2.2820565700531006, + "434": 2.2835030555725098, + "435": 2.2673423290252686, + "436": 2.2827863693237305, + "437": 2.2763357162475586, + "438": 2.3395254611968994, + "439": 2.2768778800964355, + "440": 2.3267083168029785, + "441": 2.275376796722412, + "442": 2.2747557163238525, + "443": 2.285367012023926, + "444": 2.318378210067749, + "445": 2.3075637817382812, + "446": 2.2737903594970703, + "447": 2.2553725242614746, + "448": 2.244853973388672, + "449": 2.2789177894592285, + "450": 2.3110175132751465, + "451": 2.3002848625183105, + "452": 2.2897796630859375, + "453": 2.296938419342041, + "454": 2.325416326522827, + "455": 2.2813148498535156, + "456": 2.2858972549438477, + "457": 2.292266368865967, + "458": 2.2870535850524902, + "459": 2.267334461212158, + "460": 2.252924919128418, + "461": 2.2743101119995117, + "462": 2.2688026428222656, + "463": 2.2813546657562256, + "464": 2.2884106636047363, + "465": 2.306873083114624, + "466": 2.2575159072875977, + "467": 2.2513270378112793, + "468": 2.2696642875671387, + "469": 2.3163137435913086, + "470": 2.3101627826690674, + "471": 2.2393722534179688, + "472": 2.231386661529541, + "473": 2.2225685119628906, + "474": 2.213020086288452, + "475": 2.2348809242248535, + "476": 2.224827289581299, + "477": 2.266059160232544, + "478": 2.2679929733276367, + "479": 2.260349750518799, + "480": 2.2663614749908447, + "481": 2.2581393718719482, + "482": 2.235718011856079, + "483": 2.2321369647979736, + "484": 2.250542163848877, + "485": 2.262209177017212 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 0.202437, + 0.203234, + 0.204761, + 0.205758, + 0.205562, + 0.19985, + 0.200251, + 0.20011, + 0.202319, + 0.204484, + 0.202093, + 0.199686, + 0.199336, + 0.201473, + 0.202364, + 0.20549, + 0.204536, + 0.200254, + 0.197145, + 0.197905, + 0.197863, + 0.196785, + 0.199495, + 0.200287, + 0.200687, + 0.201389, + 0.202131, + 0.204371, + 0.203693, + 0.201785, + 0.199414, + 0.197886, + 0.19771, + 0.201656, + 0.205019, + 0.203894, + 0.197725, + 0.187575, + 0.201178, + 0.206744, + 0.208451, + 0.208425, + 0.206444, + 0.204806, + 0.202007, + 0.200805, + 0.20198, + 0.202191, + 0.202741, + 0.201803, + 0.202451, + 0.202432, + 0.204269, + 0.203143 + ], + "train_epoch_time": 4.8408043384552, + "train_loss": 2.2564530308311603, + "train_score": 0.33053151899293987, + "val_loss": 2.3155812620432314, + "val_score": 0.3140651910778576 + }, + { + "epoch": 9, + "grad_norm": 1.1109791994094849, + "learning_rate": 0.215, + "model_norm": 87.653564453125, + "step_logs": { + "grad_norm": { + "486": 1.274440884590149, + "487": 1.5721691846847534, + "488": 1.6626176834106445, + "489": 1.3811311721801758, + "490": 1.2371716499328613, + "491": 1.2855167388916016, + "492": 1.1732789278030396, + "493": 1.2300169467926025, + "494": 1.3019015789031982, + "495": 1.2852145433425903, + "496": 1.131698727607727, + "497": 1.0360064506530762, + "498": 1.1023600101470947, + "499": 1.217097282409668, + "500": 1.267598271369934, + "501": 1.1315768957138062, + "502": 1.0207968950271606, + "503": 0.9872194528579712, + "504": 0.8996853828430176, + "505": 0.9872053861618042, + "506": 1.1195306777954102, + "507": 1.2206776142120361, + "508": 1.2348135709762573, + "509": 1.0950978994369507, + "510": 1.0299195051193237, + "511": 0.9195706248283386, + "512": 0.9519891738891602, + "513": 1.1784167289733887, + "514": 1.181997299194336, + "515": 1.0261567831039429, + "516": 1.0205830335617065, + "517": 1.1702600717544556, + "518": 1.313822865486145, + "519": 1.4962973594665527, + "520": 1.5798176527023315, + "521": 1.3525158166885376, + "522": 1.2542475461959839, + "523": 1.3560701608657837, + "524": 1.3302637338638306, + "525": 1.183883547782898, + "526": 1.1254867315292358, + "527": 1.0960978269577026, + "528": 1.0458887815475464, + "529": 0.9787492752075195, + "530": 1.0804812908172607, + "531": 1.3128334283828735, + "532": 1.3985402584075928, + "533": 1.4587278366088867, + "534": 1.537503957748413, + "535": 1.4038366079330444, + "536": 1.2967747449874878, + "537": 1.1811518669128418, + "538": 1.1067805290222168, + "539": 1.1109791994094849 + }, + "loss": { + "486": 2.2600908279418945, + "487": 2.292318344116211, + "488": 2.319120168685913, + "489": 2.2957048416137695, + "490": 2.2406320571899414, + "491": 2.269606351852417, + "492": 2.263916015625, + "493": 2.237008571624756, + "494": 2.28615403175354, + "495": 2.264836311340332, + "496": 2.2612414360046387, + "497": 2.2186059951782227, + "498": 2.25152850151062, + "499": 2.1991467475891113, + "500": 2.2622876167297363, + "501": 2.2540721893310547, + "502": 2.2395267486572266, + "503": 2.232537031173706, + "504": 2.2400033473968506, + "505": 2.196216583251953, + "506": 2.228848934173584, + "507": 2.2368764877319336, + "508": 2.2414612770080566, + "509": 2.239764928817749, + "510": 2.2245795726776123, + "511": 2.192011833190918, + "512": 2.211658000946045, + "513": 2.2534379959106445, + "514": 2.246367931365967, + "515": 2.220266103744507, + "516": 2.1989667415618896, + "517": 2.2220592498779297, + "518": 2.246497631072998, + "519": 2.263000726699829, + "520": 2.2741599082946777, + "521": 2.2593367099761963, + "522": 2.2507152557373047, + "523": 2.260756492614746, + "524": 2.2669968605041504, + "525": 2.241084098815918, + "526": 2.2304186820983887, + "527": 2.1926121711730957, + "528": 2.201482057571411, + "529": 2.1710610389709473, + "530": 2.2309653759002686, + "531": 2.208761215209961, + "532": 2.2589619159698486, + "533": 2.2592267990112305, + "534": 2.260641098022461, + "535": 2.2182810306549072, + "536": 2.244582176208496, + "537": 2.210331439971924, + "538": 2.199983835220337, + "539": 2.19126033782959 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 0.199582, + 0.192667, + 0.19058, + 0.19737, + 0.200292, + 0.199393, + 0.201809, + 0.200428, + 0.199129, + 0.199369, + 0.202661, + 0.204371, + 0.20321, + 0.200483, + 0.199749, + 0.202626, + 0.204758, + 0.205363, + 0.206961, + 0.205211, + 0.202744, + 0.200633, + 0.200349, + 0.203298, + 0.204517, + 0.206439, + 0.205929, + 0.201642, + 0.201526, + 0.20457, + 0.204583, + 0.20164, + 0.198596, + 0.194332, + 0.192311, + 0.197785, + 0.199974, + 0.197712, + 0.198355, + 0.201456, + 0.202629, + 0.20304, + 0.204098, + 0.205264, + 0.20355, + 0.198361, + 0.196692, + 0.195233, + 0.193274, + 0.196257, + 0.198975, + 0.201339, + 0.202858, + 0.202725 + ], + "train_epoch_time": 4.840954065322876, + "train_loss": 2.2157062061889956, + "train_score": 0.3290026452654232, + "val_loss": 2.2771258649815103, + "val_score": 0.3184557977623288 + }, + { + "epoch": 10, + "grad_norm": 1.2484924793243408, + "learning_rate": 0.215, + "model_norm": 87.6837387084961, + "step_logs": { + "grad_norm": { + "540": 1.0730115175247192, + "541": 1.0840245485305786, + "542": 1.153657078742981, + "543": 1.2076923847198486, + "544": 1.3433432579040527, + "545": 1.5389057397842407, + "546": 1.326480746269226, + "547": 1.233006238937378, + "548": 1.295018196105957, + "549": 1.4531220197677612, + "550": 1.2425044775009155, + "551": 1.1023786067962646, + "552": 1.0534733533859253, + "553": 1.0195304155349731, + "554": 1.0069540739059448, + "555": 1.0201994180679321, + "556": 1.0369800329208374, + "557": 1.1429098844528198, + "558": 1.2822461128234863, + "559": 1.3738548755645752, + "560": 1.3105756044387817, + "561": 1.264311671257019, + "562": 1.191485047340393, + "563": 1.1162223815917969, + "564": 1.241092562675476, + "565": 1.3615766763687134, + "566": 1.4249286651611328, + "567": 1.2953282594680786, + "568": 1.240553617477417, + "569": 1.2388930320739746, + "570": 1.287211298942566, + "571": 1.2208629846572876, + "572": 1.1403542757034302, + "573": 1.0927703380584717, + "574": 1.0186707973480225, + "575": 1.097156047821045, + "576": 1.2424495220184326, + "577": 1.2379029989242554, + "578": 1.2404221296310425, + "579": 1.3435301780700684, + "580": 1.2760157585144043, + "581": 1.2772034406661987, + "582": 1.449062466621399, + "583": 1.4790936708450317, + "584": 1.2563692331314087, + "585": 1.3491219282150269, + "586": 1.3592236042022705, + "587": 1.6502348184585571, + "588": 1.714043378829956, + "589": 1.5939899682998657, + "590": 1.428162693977356, + "591": 1.483228087425232, + "592": 1.2308692932128906, + "593": 1.2484924793243408 + }, + "loss": { + "540": 2.224256753921509, + "541": 2.1956377029418945, + "542": 2.2281646728515625, + "543": 2.2300729751586914, + "544": 2.21303653717041, + "545": 2.217317581176758, + "546": 2.2570157051086426, + "547": 2.2219507694244385, + "548": 2.2281265258789062, + "549": 2.2299113273620605, + "550": 2.247556209564209, + "551": 2.2148499488830566, + "552": 2.188930034637451, + "553": 2.1814873218536377, + "554": 2.1891088485717773, + "555": 2.2002689838409424, + "556": 2.1906023025512695, + "557": 2.180880069732666, + "558": 2.1901936531066895, + "559": 2.2383034229278564, + "560": 2.192107677459717, + "561": 2.226154327392578, + "562": 2.2302803993225098, + "563": 2.1916139125823975, + "564": 2.2360498905181885, + "565": 2.1873154640197754, + "566": 2.2202963829040527, + "567": 2.2145023345947266, + "568": 2.2246246337890625, + "569": 2.217089891433716, + "570": 2.2217774391174316, + "571": 2.2137060165405273, + "572": 2.2099876403808594, + "573": 2.2021801471710205, + "574": 2.1595754623413086, + "575": 2.167999029159546, + "576": 2.1851797103881836, + "577": 2.1996870040893555, + "578": 2.2081122398376465, + "579": 2.218878984451294, + "580": 2.21826171875, + "581": 2.1950340270996094, + "582": 2.242854118347168, + "583": 2.1975955963134766, + "584": 2.2296929359436035, + "585": 2.1791646480560303, + "586": 2.203380584716797, + "587": 2.185145139694214, + "588": 2.2737479209899902, + "589": 2.2485265731811523, + "590": 2.2436585426330566, + "591": 2.2177751064300537, + "592": 2.20528507232666, + "593": 2.1836795806884766 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 0.203667, + 0.203303, + 0.202027, + 0.200877, + 0.197672, + 0.192857, + 0.198375, + 0.200269, + 0.198906, + 0.195136, + 0.200216, + 0.203025, + 0.203887, + 0.204524, + 0.204802, + 0.204596, + 0.204223, + 0.201994, + 0.198945, + 0.19713, + 0.198297, + 0.199593, + 0.20123, + 0.202617, + 0.200177, + 0.197046, + 0.195756, + 0.198807, + 0.200118, + 0.200108, + 0.199043, + 0.200489, + 0.202209, + 0.203157, + 0.20444, + 0.20289, + 0.199825, + 0.200021, + 0.200017, + 0.19771, + 0.199276, + 0.199095, + 0.19534, + 0.194216, + 0.199795, + 0.197286, + 0.197223, + 0.189599, + 0.188778, + 0.191712, + 0.19586, + 0.194282, + 0.200214, + 0.199678 + ], + "train_epoch_time": 4.840428113937378, + "train_loss": 2.2075779918960725, + "train_score": 0.34894749815529014, + "val_loss": 2.2799201676820915, + "val_score": 0.3267840485074627 + }, + { + "epoch": 11, + "grad_norm": 1.0301048755645752, + "learning_rate": 0.215, + "model_norm": 87.71573638916016, + "step_logs": { + "grad_norm": { + "594": 1.1950290203094482, + "595": 1.1960158348083496, + "596": 1.2385780811309814, + "597": 1.150543212890625, + "598": 1.0552761554718018, + "599": 1.118019700050354, + "600": 1.1891049146652222, + "601": 1.1616593599319458, + "602": 1.1191298961639404, + "603": 1.1218911409378052, + "604": 1.1111873388290405, + "605": 1.0850526094436646, + "606": 1.0855337381362915, + "607": 1.144742727279663, + "608": 1.219439148902893, + "609": 1.2101095914840698, + "610": 1.1669107675552368, + "611": 1.288949966430664, + "612": 1.4554662704467773, + "613": 1.5926470756530762, + "614": 1.5956976413726807, + "615": 1.4946579933166504, + "616": 1.100767731666565, + "617": 1.048630952835083, + "618": 1.1599191427230835, + "619": 1.1526316404342651, + "620": 1.1674768924713135, + "621": 1.1618609428405762, + "622": 1.0434201955795288, + "623": 1.0317045450210571, + "624": 1.1041157245635986, + "625": 1.245929479598999, + "626": 1.3019353151321411, + "627": 1.2685775756835938, + "628": 1.1965506076812744, + "629": 1.2156870365142822, + "630": 1.2426406145095825, + "631": 1.120285153388977, + "632": 1.0547648668289185, + "633": 1.0630483627319336, + "634": 1.0560157299041748, + "635": 1.1546316146850586, + "636": 1.1806838512420654, + "637": 1.0786075592041016, + "638": 0.9861094355583191, + "639": 1.0212830305099487, + "640": 1.0212528705596924, + "641": 1.0391095876693726, + "642": 1.1018105745315552, + "643": 1.253267765045166, + "644": 1.4574978351593018, + "645": 1.5392931699752808, + "646": 1.2028312683105469, + "647": 1.0301048755645752 + }, + "loss": { + "594": 2.202592611312866, + "595": 2.21405291557312, + "596": 2.1930947303771973, + "597": 2.1749229431152344, + "598": 2.1603639125823975, + "599": 2.1472935676574707, + "600": 2.159951686859131, + "601": 2.1965529918670654, + "602": 2.1713905334472656, + "603": 2.187828540802002, + "604": 2.150160789489746, + "605": 2.175309658050537, + "606": 2.1545028686523438, + "607": 2.1715760231018066, + "608": 2.1555254459381104, + "609": 2.2049925327301025, + "610": 2.1803390979766846, + "611": 2.1594817638397217, + "612": 2.2130517959594727, + "613": 2.2050609588623047, + "614": 2.2264647483825684, + "615": 2.2138705253601074, + "616": 2.1906564235687256, + "617": 2.174870014190674, + "618": 2.2114453315734863, + "619": 2.192110776901245, + "620": 2.173429012298584, + "621": 2.189335346221924, + "622": 2.1438052654266357, + "623": 2.148369312286377, + "624": 2.156013011932373, + "625": 2.1522488594055176, + "626": 2.18947172164917, + "627": 2.207270622253418, + "628": 2.1691670417785645, + "629": 2.1718530654907227, + "630": 2.148488998413086, + "631": 2.157902717590332, + "632": 2.1440021991729736, + "633": 2.1581244468688965, + "634": 2.121485948562622, + "635": 2.1491992473602295, + "636": 2.16536021232605, + "637": 2.173429250717163, + "638": 2.156740188598633, + "639": 2.1143622398376465, + "640": 2.1299097537994385, + "641": 2.1474719047546387, + "642": 2.1455202102661133, + "643": 2.1713802814483643, + "644": 2.16792631149292, + "645": 2.193194627761841, + "646": 2.1946706771850586, + "647": 2.1413674354553223 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 0.200991, + 0.201037, + 0.199963, + 0.201797, + 0.203712, + 0.202338, + 0.200865, + 0.20168, + 0.202447, + 0.202478, + 0.202499, + 0.203179, + 0.203061, + 0.201902, + 0.200156, + 0.200673, + 0.201474, + 0.198577, + 0.19494, + 0.191339, + 0.191462, + 0.19396, + 0.202934, + 0.203917, + 0.201802, + 0.201849, + 0.201421, + 0.201635, + 0.20387, + 0.204128, + 0.20268, + 0.199529, + 0.198482, + 0.199374, + 0.200756, + 0.200345, + 0.19958, + 0.202349, + 0.203641, + 0.203542, + 0.203501, + 0.201559, + 0.201084, + 0.203301, + 0.205061, + 0.204173, + 0.204248, + 0.203975, + 0.202672, + 0.199488, + 0.194511, + 0.192629, + 0.200772, + 0.204126 + ], + "train_epoch_time": 4.839927434921265, + "train_loss": 2.1419006591889915, + "train_score": 0.36307388820299286, + "val_loss": 2.2275397684489273, + "val_score": 0.3394446045878834 + }, + { + "epoch": 12, + "grad_norm": 0.8815606832504272, + "learning_rate": 0.215, + "model_norm": 87.74436950683594, + "step_logs": { + "grad_norm": { + "648": 1.1736186742782593, + "649": 1.1927375793457031, + "650": 1.192915678024292, + "651": 1.415801763534546, + "652": 1.352135419845581, + "653": 1.1469707489013672, + "654": 1.0822917222976685, + "655": 1.0758342742919922, + "656": 1.109924077987671, + "657": 1.0968343019485474, + "658": 1.1693978309631348, + "659": 1.1531552076339722, + "660": 1.1513526439666748, + "661": 1.1378060579299927, + "662": 1.0363950729370117, + "663": 1.019266963005066, + "664": 1.0714060068130493, + "665": 1.1098425388336182, + "666": 1.0873985290527344, + "667": 1.0098145008087158, + "668": 0.9995170831680298, + "669": 1.0532211065292358, + "670": 1.0094290971755981, + "671": 1.0100833177566528, + "672": 1.0575320720672607, + "673": 1.041836142539978, + "674": 0.9846584796905518, + "675": 0.9802210330963135, + "676": 1.006662130355835, + "677": 1.0164037942886353, + "678": 1.035056710243225, + "679": 0.9977219104766846, + "680": 0.9377416372299194, + "681": 0.9368072152137756, + "682": 1.042328119277954, + "683": 1.0932453870773315, + "684": 1.1079999208450317, + "685": 1.1131280660629272, + "686": 0.9918408989906311, + "687": 0.8160966038703918, + "688": 0.792847216129303, + "689": 0.8231911659240723, + "690": 0.8266491889953613, + "691": 0.8108084797859192, + "692": 0.8159105777740479, + "693": 0.812720775604248, + "694": 0.8996869325637817, + "695": 0.9295957088470459, + "696": 0.8622173070907593, + "697": 0.7823448777198792, + "698": 0.8126835823059082, + "699": 0.9474884271621704, + "700": 0.9538929462432861, + "701": 0.8815606832504272 + }, + "loss": { + "648": 2.140043020248413, + "649": 2.166339874267578, + "650": 2.1256985664367676, + "651": 2.16790509223938, + "652": 2.194870948791504, + "653": 2.138918399810791, + "654": 2.144619941711426, + "655": 2.1440577507019043, + "656": 2.148167610168457, + "657": 2.1287899017333984, + "658": 2.128760814666748, + "659": 2.150738477706909, + "660": 2.136873245239258, + "661": 2.110874891281128, + "662": 2.1498284339904785, + "663": 2.1460490226745605, + "664": 2.1357290744781494, + "665": 2.144239902496338, + "666": 2.107335090637207, + "667": 2.0888638496398926, + "668": 2.1202988624572754, + "669": 2.118359088897705, + "670": 2.1162853240966797, + "671": 2.123889446258545, + "672": 2.127480983734131, + "673": 2.094712257385254, + "674": 2.116825819015503, + "675": 2.101317882537842, + "676": 2.076444387435913, + "677": 2.095736503601074, + "678": 2.091125726699829, + "679": 2.1160941123962402, + "680": 2.101346969604492, + "681": 2.1070613861083984, + "682": 2.0930233001708984, + "683": 2.1106696128845215, + "684": 2.1121487617492676, + "685": 2.101912498474121, + "686": 2.1231493949890137, + "687": 2.1286134719848633, + "688": 2.078831434249878, + "689": 2.073833465576172, + "690": 2.080270290374756, + "691": 2.0773372650146484, + "692": 2.052178382873535, + "693": 2.049058675765991, + "694": 2.0780670642852783, + "695": 2.0962839126586914, + "696": 2.0633857250213623, + "697": 2.0995097160339355, + "698": 2.065528392791748, + "699": 2.1045262813568115, + "700": 2.093109369277954, + "701": 2.065265417098999 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 0.201087, + 0.199665, + 0.198254, + 0.192262, + 0.192849, + 0.195817, + 0.195958, + 0.194889, + 0.193068, + 0.192037, + 0.189453, + 0.18871, + 0.187497, + 0.186441, + 0.187233, + 0.186295, + 0.184175, + 0.182359, + 0.181386, + 0.181382, + 0.180445, + 0.178389, + 0.177847, + 0.176648, + 0.174737, + 0.173641, + 0.173329, + 0.172116, + 0.170443, + 0.169153, + 0.167661, + 0.167028, + 0.16652, + 0.16531, + 0.162703, + 0.160872, + 0.159472, + 0.158161, + 0.158524, + 0.159176, + 0.158045, + 0.156481, + 0.1552, + 0.154082, + 0.152726, + 0.151487, + 0.149469, + 0.147969, + 0.147282, + 0.146761, + 0.145192, + 0.142827, + 0.141498, + 0.140828 + ], + "train_epoch_time": 4.840087652206421, + "train_loss": 2.0689332987348865, + "train_score": 0.3863309272136031, + "val_loss": 2.1656318819481797, + "val_score": 0.35967548073501454 + }, + { + "epoch": 13, + "grad_norm": 0.5793240070343018, + "learning_rate": 0.14333333333333334, + "model_norm": 87.76300048828125, + "step_logs": { + "grad_norm": { + "702": 0.8874034285545349, + "703": 0.905583381652832, + "704": 0.8495563268661499, + "705": 0.7218671441078186, + "706": 0.6585786938667297, + "707": 0.6682102680206299, + "708": 0.7008153796195984, + "709": 0.7035977840423584, + "710": 0.6829692125320435, + "711": 0.638738214969635, + "712": 0.6198923587799072, + "713": 0.6988686323165894, + "714": 0.7239283323287964, + "715": 0.7918804287910461, + "716": 0.7934989929199219, + "717": 0.7834325432777405, + "718": 0.807168185710907, + "719": 0.7808082699775696, + "720": 0.6788971424102783, + "721": 0.6064842939376831, + "722": 0.6180140376091003, + "723": 0.6385231614112854, + "724": 0.6860033869743347, + "725": 0.7703426480293274, + "726": 0.8105504512786865, + "727": 0.8015021681785583, + "728": 0.8084497451782227, + "729": 0.8108120560646057, + "730": 0.6805449724197388, + "731": 0.6301560401916504, + "732": 0.6809912323951721, + "733": 0.647938072681427, + "734": 0.575066089630127, + "735": 0.5573794841766357, + "736": 0.6263303160667419, + "737": 0.6469056010246277, + "738": 0.6373700499534607, + "739": 0.6472172737121582, + "740": 0.5829676389694214, + "741": 0.6078576445579529, + "742": 0.5903823375701904, + "743": 0.6300957798957825, + "744": 0.6502521634101868, + "745": 0.6128755211830139, + "746": 0.5980471968650818, + "747": 0.5541452169418335, + "748": 0.5953189730644226, + "749": 0.5821541547775269, + "750": 0.598314106464386, + "751": 0.5398235321044922, + "752": 0.58330237865448, + "753": 0.5274991989135742, + "754": 0.5851951241493225, + "755": 0.5793240070343018 + }, + "loss": { + "702": 2.05574369430542, + "703": 2.0688095092773438, + "704": 2.0423386096954346, + "705": 2.0397229194641113, + "706": 2.0588929653167725, + "707": 2.0642752647399902, + "708": 2.0728094577789307, + "709": 2.0478193759918213, + "710": 2.0581283569335938, + "711": 2.05910062789917, + "712": 2.06070876121521, + "713": 2.0621001720428467, + "714": 2.043877601623535, + "715": 2.0738160610198975, + "716": 2.04972505569458, + "717": 2.0713717937469482, + "718": 2.0618996620178223, + "719": 2.0546112060546875, + "720": 2.0492255687713623, + "721": 2.0458288192749023, + "722": 2.041354179382324, + "723": 2.023507595062256, + "724": 2.042503595352173, + "725": 2.0306506156921387, + "726": 2.048412322998047, + "727": 2.0250191688537598, + "728": 2.0446135997772217, + "729": 2.0537240505218506, + "730": 2.0677785873413086, + "731": 2.021470546722412, + "732": 2.054034471511841, + "733": 2.035071611404419, + "734": 2.0356285572052, + "735": 2.0282442569732666, + "736": 2.0338058471679688, + "737": 2.0151429176330566, + "738": 2.027527332305908, + "739": 2.0506112575531006, + "740": 2.0555806159973145, + "741": 2.0270466804504395, + "742": 2.0533108711242676, + "743": 2.055283546447754, + "744": 2.03786039352417, + "745": 2.0498759746551514, + "746": 2.025909662246704, + "747": 2.0287299156188965, + "748": 2.026238441467285, + "749": 1.9891866445541382, + "750": 2.0195558071136475, + "751": 2.0114665031433105, + "752": 2.0062484741210938, + "753": 2.015991687774658, + "754": 2.008403778076172, + "755": 2.008389711380005 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 0.139504, + 0.138119, + 0.137267, + 0.136915, + 0.136047, + 0.134706, + 0.133234, + 0.131906, + 0.13075, + 0.129701, + 0.128503, + 0.126801, + 0.12536, + 0.123722, + 0.122408, + 0.121209, + 0.119788, + 0.118646, + 0.117861, + 0.116876, + 0.115528, + 0.114135, + 0.112655, + 0.110979, + 0.109523, + 0.108263, + 0.106967, + 0.105682, + 0.104925, + 0.103777, + 0.102323, + 0.101125, + 0.100045, + 0.098784, + 0.0972884, + 0.0959178, + 0.0946495, + 0.0933302, + 0.0921932, + 0.0908161, + 0.0895609, + 0.0881626, + 0.0868038, + 0.085589, + 0.0843062, + 0.0830854, + 0.0816961, + 0.0804023, + 0.0790716, + 0.0778608, + 0.0764761, + 0.0752553, + 0.073853, + 0.0725513 + ], + "train_epoch_time": 4.840941429138184, + "train_loss": 2.019192037691858, + "train_score": 0.40129461072951855, + "val_loss": 2.1238986813515663, + "val_score": 0.3741613454030384 + }, + { + "epoch": 14, + "grad_norm": 0.48611950874328613, + "learning_rate": 0.07166666666666667, + "model_norm": 87.7691650390625, + "step_logs": { + "grad_norm": { + "756": 0.6146167516708374, + "757": 0.5796166658401489, + "758": 0.5638355016708374, + "759": 0.6140334010124207, + "760": 0.5936705470085144, + "761": 0.5168383717536926, + "762": 0.5016950964927673, + "763": 0.5049790143966675, + "764": 0.5676010251045227, + "765": 0.5727619528770447, + "766": 0.5642577409744263, + "767": 0.5229026675224304, + "768": 0.5591195225715637, + "769": 0.5427458882331848, + "770": 0.5678499341011047, + "771": 0.5436962842941284, + "772": 0.5067341327667236, + "773": 0.5637845993041992, + "774": 0.5042881369590759, + "775": 0.531964123249054, + "776": 0.5424506664276123, + "777": 0.5415850877761841, + "778": 0.5109631419181824, + "779": 0.5317161083221436, + "780": 0.5138946771621704, + "781": 0.5129923820495605, + "782": 0.5475530028343201, + "783": 0.5945153832435608, + "784": 0.5471187233924866, + "785": 0.5442525744438171, + "786": 0.5080510973930359, + "787": 0.5497950911521912, + "788": 0.5333343744277954, + "789": 0.5105999112129211, + "790": 0.5084962248802185, + "791": 0.5214089751243591, + "792": 0.48692893981933594, + "793": 0.47998690605163574, + "794": 0.5257403254508972, + "795": 0.49471163749694824, + "796": 0.5057258009910583, + "797": 0.505268394947052, + "798": 0.4844566881656647, + "799": 0.5054225325584412, + "800": 0.47709977626800537, + "801": 0.481972336769104, + "802": 0.5098870992660522, + "803": 0.48065146803855896, + "804": 0.49018529057502747, + "805": 0.5300552248954773, + "806": 0.4903004765510559, + "807": 0.5177924036979675, + "808": 0.5555498003959656, + "809": 0.48611950874328613 + }, + "loss": { + "756": 2.026135206222534, + "757": 2.0169997215270996, + "758": 2.0174005031585693, + "759": 2.0081300735473633, + "760": 1.9965903759002686, + "761": 2.020857334136963, + "762": 2.0221657752990723, + "763": 2.0069189071655273, + "764": 2.016416072845459, + "765": 2.008983612060547, + "766": 2.039663314819336, + "767": 2.0164713859558105, + "768": 2.0417206287384033, + "769": 1.9965007305145264, + "770": 2.0104260444641113, + "771": 1.990578293800354, + "772": 2.0226285457611084, + "773": 2.006784200668335, + "774": 2.0326991081237793, + "775": 2.0209670066833496, + "776": 2.0039942264556885, + "777": 2.0059654712677, + "778": 2.0240859985351562, + "779": 2.0161609649658203, + "780": 2.0291223526000977, + "781": 2.000490665435791, + "782": 1.9832587242126465, + "783": 2.021136999130249, + "784": 2.014061689376831, + "785": 2.0211124420166016, + "786": 1.9889352321624756, + "787": 2.0232739448547363, + "788": 2.015451431274414, + "789": 2.0262560844421387, + "790": 1.9943532943725586, + "791": 2.020418643951416, + "792": 2.0105700492858887, + "793": 2.015556573867798, + "794": 2.0287084579467773, + "795": 2.0081353187561035, + "796": 1.9942378997802734, + "797": 2.015990972518921, + "798": 2.0090925693511963, + "799": 1.9954313039779663, + "800": 1.9980233907699585, + "801": 2.00476336479187, + "802": 1.9976757764816284, + "803": 1.9871928691864014, + "804": 1.9835243225097656, + "805": 2.0080370903015137, + "806": 2.0102531909942627, + "807": 1.9849202632904053, + "808": 2.0381040573120117, + "809": 2.0150413513183594 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 0.0711911, + 0.0699299, + 0.0686391, + 0.0672578, + 0.0659716, + 0.0647526, + 0.0634521, + 0.0621303, + 0.0607531, + 0.0594324, + 0.0581301, + 0.056848, + 0.0555039, + 0.054196, + 0.0528614, + 0.0515611, + 0.0502712, + 0.0489147, + 0.0476354, + 0.0463, + 0.0449745, + 0.0436565, + 0.0423531, + 0.0410236, + 0.0397119, + 0.0383905, + 0.0370564, + 0.0357214, + 0.0344179, + 0.0330985, + 0.0317862, + 0.0304552, + 0.0291375, + 0.0278205, + 0.0264976, + 0.0251733, + 0.0238553, + 0.0225327, + 0.0212039, + 0.0198833, + 0.0185581, + 0.0172343, + 0.0159111, + 0.0145851, + 0.0132616, + 0.0119362, + 0.01061, + 0.00928511, + 0.00795912, + 0.00663272, + 0.00530696, + 0.00398041, + 0.00265379, + 0.00132706 + ], + "train_epoch_time": 4.8407135009765625, + "train_loss": 2.0041962477876947, + "train_score": 0.40498004843035934, + "val_loss": 2.112407008761243, + "val_score": 0.3762467705081169 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:27:03.667579", + "final_model_norm": 87.7691650390625, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:25:22.141603", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 8.009259223937988, + "learning_rate": 2.15e-11, + "model_norm": 87.43960571289062, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.895608901977539, + "3": 8.547172546386719, + "4": 20.509349822998047, + "5": 8.080146789550781, + "6": 5.149311542510986, + "7": 4.696757793426514, + "8": 4.1927337646484375, + "9": 6.789407253265381, + "10": 4.837518215179443, + "11": 10.126346588134766, + "12": 1.9325710535049438, + "13": 4.5322651863098145, + "14": 7.024761199951172, + "15": 25.329877853393555, + "16": 7.749931812286377, + "17": 25.037580490112305, + "18": 4.084115505218506, + "19": 5.071617603302002, + "20": 5.837102890014648, + "21": 6.439196586608887, + "22": 4.204246997833252, + "23": 5.8123674392700195, + "24": 3.6390187740325928, + "25": 24.066499710083008, + "26": 3.986567974090576, + "27": 13.763236045837402, + "28": 5.870517253875732, + "29": 27.57763671875, + "30": 3.9071195125579834, + "31": 3.903594732284546, + "32": 5.579819202423096, + "33": 3.242835760116577, + "34": 8.32712459564209, + "35": 3.9553658962249756, + "36": 2.4306905269622803, + "37": 6.125897407531738, + "38": 3.652567148208618, + "39": 4.9440388679504395, + "40": 4.961220741271973, + "41": 5.906675815582275, + "42": 4.929117679595947, + "43": 4.015328884124756, + "44": 3.1502163410186768, + "45": 3.341864585876465, + "46": 3.961202383041382, + "47": 3.1543006896972656, + "48": 5.882992744445801, + "49": 4.062252998352051, + "50": 10.771195411682129, + "51": 5.780614852905273, + "52": 2.9459245204925537, + "53": 8.009259223937988 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.855247974395752, + "3": 3.6929891109466553, + "4": 4.129785537719727, + "5": 4.1043596267700195, + "6": 3.5702595710754395, + "7": 3.5731899738311768, + "8": 3.539045810699463, + "9": 3.4656357765197754, + "10": 3.7069530487060547, + "11": 3.3295164108276367, + "12": 3.3303349018096924, + "13": 3.270383358001709, + "14": 3.419666290283203, + "15": 5.847019672393799, + "16": 3.521577835083008, + "17": 4.828075408935547, + "18": 3.4318320751190186, + "19": 3.0727391242980957, + "20": 3.274855613708496, + "21": 3.561856269836426, + "22": 3.0707850456237793, + "23": 3.165107011795044, + "24": 2.922550678253174, + "25": 5.4841156005859375, + "26": 3.095470428466797, + "27": 3.479790210723877, + "28": 3.0608644485473633, + "29": 4.4806013107299805, + "30": 3.3425827026367188, + "31": 3.0492196083068848, + "32": 3.2768490314483643, + "33": 3.048379421234131, + "34": 3.403010845184326, + "35": 3.2578158378601074, + "36": 2.8687338829040527, + "37": 3.1075387001037598, + "38": 2.9389617443084717, + "39": 3.2360987663269043, + "40": 3.3155179023742676, + "41": 3.0283236503601074, + "42": 3.1334774494171143, + "43": 3.3440723419189453, + "44": 2.9469733238220215, + "45": 2.946594715118408, + "46": 3.0986151695251465, + "47": 2.9633421897888184, + "48": 3.1910109519958496, + "49": 3.0943214893341064, + "50": 3.6288628578186035, + "51": 3.363668441772461, + "52": 3.04264235496521, + "53": 3.458778142929077 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 2.15e-11, + 0.00345153, + 0.00816687, + 0.0114403, + 0.00916873, + 0.0183603, + 0.0235443, + 0.0275411, + 0.0316924, + 0.0307785, + 0.0378612, + 0.0273667, + 0.050149, + 0.047552, + 0.0419701, + 0.0142107, + 0.0433604, + 0.0127226, + 0.0651462, + 0.0608817, + 0.0594179, + 0.0591903, + 0.0743557, + 0.0647328, + 0.0836436, + 0.0161007, + 0.0868686, + 0.0279085, + 0.0717604, + 0.0107657, + 0.0996469, + 0.0999944, + 0.0832078, + 0.113998, + 0.0587264, + 0.11055, + 0.133516, + 0.0811468, + 0.119194, + 0.102672, + 0.104978, + 0.0874694, + 0.106225, + 0.127894, + 0.14349, + 0.141582, + 0.131795, + 0.150902, + 0.0973905, + 0.134906, + 0.0484574, + 0.103969, + 0.164547, + 0.0718163 + ], + "train_epoch_time": 4.842504978179932, + "train_loss": 2.7792877371034117, + "train_score": 0.22943754480080078, + "val_loss": 2.8057689620213995, + "val_score": 0.2243514995418925 + }, + { + "epoch": 1, + "grad_norm": 1.44816255569458, + "learning_rate": 0.215, + "model_norm": 87.4570083618164, + "step_logs": { + "grad_norm": { + "54": 1.894862413406372, + "55": 4.422108173370361, + "56": 2.442340850830078, + "57": 1.8581420183181763, + "58": 2.252784252166748, + "59": 2.876319646835327, + "60": 2.35105562210083, + "61": 2.063795328140259, + "62": 2.091115713119507, + "63": 1.8670570850372314, + "64": 1.9457156658172607, + "65": 3.7123773097991943, + "66": 2.3150274753570557, + "67": 1.7374805212020874, + "68": 1.8530274629592896, + "69": 2.2747626304626465, + "70": 2.5020837783813477, + "71": 1.7543612718582153, + "72": 1.455753207206726, + "73": 1.6155232191085815, + "74": 1.8848108053207397, + "75": 1.824452519416809, + "76": 2.532111644744873, + "77": 1.7775555849075317, + "78": 1.2957557439804077, + "79": 1.6579508781433105, + "80": 2.500919818878174, + "81": 1.7985024452209473, + "82": 1.5269691944122314, + "83": 1.5915164947509766, + "84": 2.01928448677063, + "85": 1.7772475481033325, + "86": 1.4115211963653564, + "87": 1.5029006004333496, + "88": 2.4845919609069824, + "89": 1.6661263704299927, + "90": 1.1188994646072388, + "91": 0.9967756867408752, + "92": 1.0742534399032593, + "93": 1.3105775117874146, + "94": 1.556591272354126, + "95": 1.9750018119812012, + "96": 1.867174744606018, + "97": 1.6376919746398926, + "98": 1.5593616962432861, + "99": 1.676844835281372, + "100": 1.7076938152313232, + "101": 1.5882302522659302, + "102": 1.5186654329299927, + "103": 1.5854926109313965, + "104": 1.5250271558761597, + "105": 1.397335171699524, + "106": 1.3582278490066528, + "107": 1.44816255569458 + }, + "loss": { + "54": 2.7846832275390625, + "55": 3.0264081954956055, + "56": 3.1928818225860596, + "57": 2.8545303344726562, + "58": 2.768885612487793, + "59": 3.0259578227996826, + "60": 2.9479548931121826, + "61": 2.8161587715148926, + "62": 2.7697196006774902, + "63": 2.816161632537842, + "64": 2.7519495487213135, + "65": 2.9004955291748047, + "66": 3.116636037826538, + "67": 2.734819173812866, + "68": 2.7392799854278564, + "69": 2.8012518882751465, + "70": 2.82958984375, + "71": 2.8844799995422363, + "72": 2.680906057357788, + "73": 2.6877853870391846, + "74": 2.7061243057250977, + "75": 2.78086519241333, + "76": 2.7352066040039062, + "77": 2.876542568206787, + "78": 2.6281580924987793, + "79": 2.6675047874450684, + "80": 2.7303972244262695, + "81": 2.8466410636901855, + "82": 2.658008575439453, + "83": 2.649508476257324, + "84": 2.6930394172668457, + "85": 2.755890369415283, + "86": 2.6223273277282715, + "87": 2.6280159950256348, + "88": 2.692025661468506, + "89": 2.847379207611084, + "90": 2.6395182609558105, + "91": 2.556950092315674, + "92": 2.6009671688079834, + "93": 2.5640203952789307, + "94": 2.634103298187256, + "95": 2.6527695655822754, + "96": 2.7670319080352783, + "97": 2.6478259563446045, + "98": 2.6880149841308594, + "99": 2.621617317199707, + "100": 2.731639862060547, + "101": 2.6194186210632324, + "102": 2.682933807373047, + "103": 2.592667818069458, + "104": 2.6661670207977295, + "105": 2.589637041091919, + "106": 2.6064839363098145, + "107": 2.599349021911621 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.188827, + 0.126873, + 0.179042, + 0.190261, + 0.179611, + 0.166163, + 0.178933, + 0.184932, + 0.183805, + 0.189751, + 0.187301, + 0.14231, + 0.181457, + 0.192194, + 0.189469, + 0.179379, + 0.173689, + 0.192876, + 0.198161, + 0.194678, + 0.188411, + 0.190489, + 0.171727, + 0.192294, + 0.201184, + 0.193558, + 0.172517, + 0.191596, + 0.196473, + 0.194964, + 0.184904, + 0.191416, + 0.198766, + 0.196816, + 0.172481, + 0.194605, + 0.204569, + 0.206379, + 0.205212, + 0.200557, + 0.195653, + 0.185654, + 0.189353, + 0.193888, + 0.195945, + 0.192773, + 0.192866, + 0.194831, + 0.196812, + 0.194706, + 0.196567, + 0.19888, + 0.199798, + 0.197841 + ], + "train_epoch_time": 4.840651988983154, + "train_loss": 2.6558046503764827, + "train_score": 0.229878048754833, + "val_loss": 2.692034591900358, + "val_score": 0.22710515921411503 + }, + { + "epoch": 2, + "grad_norm": 1.271525502204895, + "learning_rate": 0.215, + "model_norm": 87.47675323486328, + "step_logs": { + "grad_norm": { + "108": 1.4894136190414429, + "109": 1.3828215599060059, + "110": 1.4760452508926392, + "111": 1.4988921880722046, + "112": 1.4483243227005005, + "113": 1.403370976448059, + "114": 1.3012850284576416, + "115": 1.4540739059448242, + "116": 1.492295742034912, + "117": 1.346686601638794, + "118": 1.2770353555679321, + "119": 1.5110337734222412, + "120": 1.3763647079467773, + "121": 1.1737613677978516, + "122": 1.269608974456787, + "123": 1.3859057426452637, + "124": 1.3578894138336182, + "125": 1.1743577718734741, + "126": 1.2089983224868774, + "127": 1.333441972732544, + "128": 1.6433228254318237, + "129": 1.5958722829818726, + "130": 1.5101783275604248, + "131": 1.4649394750595093, + "132": 1.4888304471969604, + "133": 1.6221187114715576, + "134": 1.5748037099838257, + "135": 1.4491182565689087, + "136": 1.34010648727417, + "137": 1.4760727882385254, + "138": 1.827445387840271, + "139": 1.5479952096939087, + "140": 1.0878485441207886, + "141": 1.0872464179992676, + "142": 1.3751699924468994, + "143": 1.4036115407943726, + "144": 1.2593539953231812, + "145": 1.2724465131759644, + "146": 1.360815405845642, + "147": 1.260218858718872, + "148": 1.1086946725845337, + "149": 1.2605623006820679, + "150": 1.6293182373046875, + "151": 1.35448157787323, + "152": 0.9810760021209717, + "153": 0.9596835374832153, + "154": 1.2186592817306519, + "155": 1.3001915216445923, + "156": 1.3584920167922974, + "157": 1.4139901399612427, + "158": 1.8265085220336914, + "159": 1.5426403284072876, + "160": 1.20428466796875, + "161": 1.271525502204895 + }, + "loss": { + "108": 2.6572370529174805, + "109": 2.57897686958313, + "110": 2.6341605186462402, + "111": 2.6219968795776367, + "112": 2.6115472316741943, + "113": 2.593916416168213, + "114": 2.607882022857666, + "115": 2.5862903594970703, + "116": 2.674384117126465, + "117": 2.586663007736206, + "118": 2.607825756072998, + "119": 2.576197385787964, + "120": 2.636868715286255, + "121": 2.555208444595337, + "122": 2.557699203491211, + "123": 2.583102226257324, + "124": 2.5912489891052246, + "125": 2.570688486099243, + "126": 2.564873695373535, + "127": 2.588277816772461, + "128": 2.5981221199035645, + "129": 2.6688568592071533, + "130": 2.564763069152832, + "131": 2.6411280632019043, + "132": 2.5746936798095703, + "133": 2.6381752490997314, + "134": 2.6091103553771973, + "135": 2.5993170738220215, + "136": 2.562162399291992, + "137": 2.598769187927246, + "138": 2.600484848022461, + "139": 2.6600613594055176, + "140": 2.555112361907959, + "141": 2.544739246368408, + "142": 2.562129020690918, + "143": 2.6135807037353516, + "144": 2.5602288246154785, + "145": 2.5906286239624023, + "146": 2.5725152492523193, + "147": 2.5803475379943848, + "148": 2.5332067012786865, + "149": 2.552424430847168, + "150": 2.5680408477783203, + "151": 2.6129841804504395, + "152": 2.525791645050049, + "153": 2.5124363899230957, + "154": 2.525808811187744, + "155": 2.5710768699645996, + "156": 2.546027421951294, + "157": 2.5899622440338135, + "158": 2.575076103210449, + "159": 2.6611618995666504, + "160": 2.541957139968872, + "161": 2.5649991035461426 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 0.197294, + 0.199128, + 0.197445, + 0.196866, + 0.197911, + 0.198776, + 0.200972, + 0.197632, + 0.197336, + 0.199931, + 0.201457, + 0.196298, + 0.199586, + 0.203221, + 0.201358, + 0.199086, + 0.199722, + 0.203277, + 0.202589, + 0.200214, + 0.193391, + 0.194996, + 0.196241, + 0.197729, + 0.196787, + 0.19418, + 0.195068, + 0.19782, + 0.199935, + 0.197225, + 0.188919, + 0.196018, + 0.204803, + 0.204774, + 0.199195, + 0.198884, + 0.201577, + 0.201464, + 0.199558, + 0.201658, + 0.204341, + 0.201514, + 0.193497, + 0.199911, + 0.206539, + 0.206849, + 0.202218, + 0.200807, + 0.199458, + 0.198525, + 0.188717, + 0.196144, + 0.202575, + 0.201356 + ], + "train_epoch_time": 4.840525388717651, + "train_loss": 2.5537030063366446, + "train_score": 0.24254617999325181, + "val_loss": 2.5877756248522297, + "val_score": 0.23805252567840363 + }, + { + "epoch": 3, + "grad_norm": 1.1912970542907715, + "learning_rate": 0.215, + "model_norm": 87.49517822265625, + "step_logs": { + "grad_norm": { + "162": 1.2298322916030884, + "163": 1.1380009651184082, + "164": 1.181266188621521, + "165": 1.3488095998764038, + "166": 1.589025616645813, + "167": 1.3674647808074951, + "168": 1.069467306137085, + "169": 1.0639724731445312, + "170": 1.2408781051635742, + "171": 1.2359596490859985, + "172": 1.1599916219711304, + "173": 1.1227874755859375, + "174": 1.137825846672058, + "175": 1.204203486442566, + "176": 1.2972830533981323, + "177": 1.2549153566360474, + "178": 1.2810430526733398, + "179": 1.2485711574554443, + "180": 1.2574368715286255, + "181": 1.1830329895019531, + "182": 1.1039347648620605, + "183": 1.1476048231124878, + "184": 1.2330516576766968, + "185": 1.2602540254592896, + "186": 1.1536614894866943, + "187": 1.218313455581665, + "188": 1.2707144021987915, + "189": 1.3096917867660522, + "190": 1.197945237159729, + "191": 1.0904420614242554, + "192": 1.0734726190567017, + "193": 1.043286681175232, + "194": 0.9201653003692627, + "195": 0.9867708086967468, + "196": 1.0302892923355103, + "197": 1.052281379699707, + "198": 1.070786476135254, + "199": 1.1601042747497559, + "200": 1.2059375047683716, + "201": 1.265130877494812, + "202": 1.1423442363739014, + "203": 1.0491350889205933, + "204": 1.209293007850647, + "205": 1.464426040649414, + "206": 1.348534107208252, + "207": 1.0735282897949219, + "208": 1.0005972385406494, + "209": 1.0789453983306885, + "210": 1.2591506242752075, + "211": 1.2928924560546875, + "212": 1.428507924079895, + "213": 1.4925732612609863, + "214": 1.4553636312484741, + "215": 1.1912970542907715 + }, + "loss": { + "162": 2.5573983192443848, + "163": 2.5325074195861816, + "164": 2.540314197540283, + "165": 2.571876049041748, + "166": 2.57242488861084, + "167": 2.5828452110290527, + "168": 2.541640520095825, + "169": 2.550708293914795, + "170": 2.532912492752075, + "171": 2.567884922027588, + "172": 2.5111184120178223, + "173": 2.5614099502563477, + "174": 2.5236496925354004, + "175": 2.5592501163482666, + "176": 2.529876232147217, + "177": 2.5750584602355957, + "178": 2.5294413566589355, + "179": 2.5599637031555176, + "180": 2.5318305492401123, + "181": 2.5704944133758545, + "182": 2.5227646827697754, + "183": 2.5377259254455566, + "184": 2.5326521396636963, + "185": 2.556995391845703, + "186": 2.5199294090270996, + "187": 2.5326504707336426, + "188": 2.524649143218994, + "189": 2.5595850944519043, + "190": 2.538721799850464, + "191": 2.511291265487671, + "192": 2.517062187194824, + "193": 2.51430082321167, + "194": 2.491483688354492, + "195": 2.5122554302215576, + "196": 2.5037684440612793, + "197": 2.4930014610290527, + "198": 2.5202560424804688, + "199": 2.5206403732299805, + "200": 2.538309097290039, + "201": 2.524566888809204, + "202": 2.543318748474121, + "203": 2.4883763790130615, + "204": 2.5055623054504395, + "205": 2.5367953777313232, + "206": 2.5493545532226562, + "207": 2.521625518798828, + "208": 2.501934051513672, + "209": 2.4813055992126465, + "210": 2.536578416824341, + "211": 2.5319321155548096, + "212": 2.531804084777832, + "213": 2.5508384704589844, + "214": 2.542196273803711, + "215": 2.5534985065460205 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 0.202148, + 0.203797, + 0.203012, + 0.199806, + 0.194479, + 0.199475, + 0.205079, + 0.205209, + 0.201812, + 0.202077, + 0.20329, + 0.204196, + 0.203763, + 0.202656, + 0.200651, + 0.201737, + 0.200983, + 0.20179, + 0.201474, + 0.203112, + 0.204386, + 0.203639, + 0.201966, + 0.201543, + 0.203449, + 0.202257, + 0.201169, + 0.200552, + 0.202684, + 0.204587, + 0.204915, + 0.205439, + 0.207422, + 0.2064, + 0.205628, + 0.205202, + 0.204975, + 0.203329, + 0.202526, + 0.201282, + 0.203761, + 0.205241, + 0.202307, + 0.197089, + 0.199687, + 0.204932, + 0.206133, + 0.204677, + 0.201463, + 0.200752, + 0.197857, + 0.196547, + 0.197326, + 0.202879 + ], + "train_epoch_time": 4.8404014110565186, + "train_loss": 2.5008059670627545, + "train_score": 0.25275399024722567, + "val_loss": 2.54108883291654, + "val_score": 0.2455959390040517 + }, + { + "epoch": 4, + "grad_norm": 1.2254489660263062, + "learning_rate": 0.215, + "model_norm": 87.51608276367188, + "step_logs": { + "grad_norm": { + "216": 1.0964171886444092, + "217": 1.1696780920028687, + "218": 1.4042595624923706, + "219": 1.2265911102294922, + "220": 1.0236988067626953, + "221": 1.1062026023864746, + "222": 1.31465744972229, + "223": 1.2222636938095093, + "224": 1.0862435102462769, + "225": 1.0885816812515259, + "226": 1.2770341634750366, + "227": 1.379095196723938, + "228": 1.425399661064148, + "229": 1.24801766872406, + "230": 0.9755950570106506, + "231": 1.0528799295425415, + "232": 1.2775955200195312, + "233": 1.3246861696243286, + "234": 1.0958911180496216, + "235": 1.0020177364349365, + "236": 1.0134462118148804, + "237": 1.1333707571029663, + "238": 1.1260290145874023, + "239": 1.1110936403274536, + "240": 1.23879873752594, + "241": 1.2753078937530518, + "242": 1.4504419565200806, + "243": 1.3589779138565063, + "244": 1.1650276184082031, + "245": 1.2573984861373901, + "246": 1.3570982217788696, + "247": 1.2701681852340698, + "248": 1.1740875244140625, + "249": 1.1700470447540283, + "250": 1.244688868522644, + "251": 1.152511477470398, + "252": 1.1083216667175293, + "253": 1.1838114261627197, + "254": 1.3190199136734009, + "255": 1.2746427059173584, + "256": 1.272968053817749, + "257": 1.1549628973007202, + "258": 0.9561015963554382, + "259": 0.9624854922294617, + "260": 1.0395781993865967, + "261": 1.0860049724578857, + "262": 1.1191737651824951, + "263": 1.153929591178894, + "264": 1.173606276512146, + "265": 1.2698942422866821, + "266": 1.4086724519729614, + "267": 1.3684756755828857, + "268": 1.2066751718521118, + "269": 1.2254489660263062 + }, + "loss": { + "216": 2.507450580596924, + "217": 2.51652455329895, + "218": 2.531987428665161, + "219": 2.5652501583099365, + "220": 2.4923508167266846, + "221": 2.5162174701690674, + "222": 2.5088624954223633, + "223": 2.5503973960876465, + "224": 2.513488292694092, + "225": 2.513392448425293, + "226": 2.517491579055786, + "227": 2.541356086730957, + "228": 2.529808521270752, + "229": 2.549508571624756, + "230": 2.4958877563476562, + "231": 2.4786903858184814, + "232": 2.512308120727539, + "233": 2.5520105361938477, + "234": 2.4845728874206543, + "235": 2.4899091720581055, + "236": 2.4968483448028564, + "237": 2.494290828704834, + "238": 2.5014572143554688, + "239": 2.512244701385498, + "240": 2.5045242309570312, + "241": 2.524292230606079, + "242": 2.5382938385009766, + "243": 2.568535327911377, + "244": 2.5154201984405518, + "245": 2.5185720920562744, + "246": 2.509398937225342, + "247": 2.531099319458008, + "248": 2.499382972717285, + "249": 2.494936466217041, + "250": 2.497504711151123, + "251": 2.4962716102600098, + "252": 2.4703593254089355, + "253": 2.4983773231506348, + "254": 2.4947195053100586, + "255": 2.540478467941284, + "256": 2.4801793098449707, + "257": 2.5115981101989746, + "258": 2.4627747535705566, + "259": 2.4627232551574707, + "260": 2.4589145183563232, + "261": 2.486844539642334, + "262": 2.4743926525115967, + "263": 2.4681613445281982, + "264": 2.462559700012207, + "265": 2.4892711639404297, + "266": 2.491407871246338, + "267": 2.4934535026550293, + "268": 2.4579625129699707, + "269": 2.473228931427002 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 0.204462, + 0.203128, + 0.19839, + 0.202248, + 0.205702, + 0.204318, + 0.200176, + 0.202264, + 0.204671, + 0.204629, + 0.201003, + 0.198991, + 0.197913, + 0.20175, + 0.206533, + 0.205137, + 0.200964, + 0.200201, + 0.20438, + 0.206067, + 0.205895, + 0.203722, + 0.20389, + 0.204212, + 0.201713, + 0.201073, + 0.197411, + 0.199574, + 0.203213, + 0.201408, + 0.199278, + 0.201213, + 0.202966, + 0.203024, + 0.201559, + 0.203367, + 0.204091, + 0.202773, + 0.200006, + 0.20117, + 0.20089, + 0.203388, + 0.20675, + 0.206644, + 0.2053, + 0.20457, + 0.203904, + 0.203214, + 0.202806, + 0.201002, + 0.198043, + 0.198938, + 0.202128, + 0.201826 + ], + "train_epoch_time": 4.840526580810547, + "train_loss": 2.4780933330869743, + "train_score": 0.25790553260567883, + "val_loss": 2.5173659404027613, + "val_score": 0.2527671138126185 + }, + { + "epoch": 5, + "grad_norm": 1.0441583395004272, + "learning_rate": 0.215, + "model_norm": 87.5417251586914, + "step_logs": { + "grad_norm": { + "270": 1.4257049560546875, + "271": 1.4027704000473022, + "272": 1.2084821462631226, + "273": 1.3101686239242554, + "274": 1.5494297742843628, + "275": 1.5516151189804077, + "276": 1.2794824838638306, + "277": 1.0849683284759521, + "278": 1.3223330974578857, + "279": 1.2245690822601318, + "280": 0.9998918771743774, + "281": 0.9589459300041199, + "282": 1.0402448177337646, + "283": 1.1229039430618286, + "284": 1.0915417671203613, + "285": 1.1122839450836182, + "286": 1.222483515739441, + "287": 1.3077948093414307, + "288": 1.4777703285217285, + "289": 1.2915337085723877, + "290": 1.184860110282898, + "291": 1.3461042642593384, + "292": 1.8270072937011719, + "293": 1.5730558633804321, + "294": 1.1169146299362183, + "295": 1.2847164869308472, + "296": 1.4979956150054932, + "297": 1.4181334972381592, + "298": 1.3067734241485596, + "299": 1.2265006303787231, + "300": 1.2106889486312866, + "301": 1.155633568763733, + "302": 1.1394784450531006, + "303": 1.2937053442001343, + "304": 1.1369987726211548, + "305": 1.0279014110565186, + "306": 1.3103567361831665, + "307": 1.5478700399398804, + "308": 2.0410711765289307, + "309": 1.5554461479187012, + "310": 1.065980076789856, + "311": 1.0688998699188232, + "312": 1.1154381036758423, + "313": 1.3959089517593384, + "314": 1.3033171892166138, + "315": 1.1002565622329712, + "316": 1.0401891469955444, + "317": 1.254061222076416, + "318": 1.4104152917861938, + "319": 1.422568440437317, + "320": 1.5920617580413818, + "321": 1.3658921718597412, + "322": 1.0920586585998535, + "323": 1.0441583395004272 + }, + "loss": { + "270": 2.4630017280578613, + "271": 2.5044703483581543, + "272": 2.46856689453125, + "273": 2.481046199798584, + "274": 2.509547710418701, + "275": 2.5006208419799805, + "276": 2.486436605453491, + "277": 2.4444892406463623, + "278": 2.4406542778015137, + "279": 2.4858808517456055, + "280": 2.427652597427368, + "281": 2.425168752670288, + "282": 2.430178642272949, + "283": 2.4459736347198486, + "284": 2.411555767059326, + "285": 2.4211831092834473, + "286": 2.424304962158203, + "287": 2.430389404296875, + "288": 2.446146249771118, + "289": 2.4616544246673584, + "290": 2.4561920166015625, + "291": 2.4272093772888184, + "292": 2.466007709503174, + "293": 2.524806499481201, + "294": 2.4194936752319336, + "295": 2.439279556274414, + "296": 2.4590671062469482, + "297": 2.4588913917541504, + "298": 2.471344470977783, + "299": 2.4302077293395996, + "300": 2.4199886322021484, + "301": 2.4159021377563477, + "302": 2.401740789413452, + "303": 2.4411728382110596, + "304": 2.4191832542419434, + "305": 2.391284465789795, + "306": 2.388394594192505, + "307": 2.481267213821411, + "308": 2.495596408843994, + "309": 2.5316739082336426, + "310": 2.4105443954467773, + "311": 2.408486843109131, + "312": 2.395228624343872, + "313": 2.416781425476074, + "314": 2.440106153488159, + "315": 2.4157421588897705, + "316": 2.391627788543701, + "317": 2.411679744720459, + "318": 2.4394097328186035, + "319": 2.4460887908935547, + "320": 2.437913656234741, + "321": 2.482391595840454, + "322": 2.4095778465270996, + "323": 2.383333683013916 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 0.19748, + 0.198255, + 0.202144, + 0.200116, + 0.194951, + 0.194835, + 0.200789, + 0.204418, + 0.199626, + 0.201907, + 0.205885, + 0.206579, + 0.205179, + 0.203711, + 0.204157, + 0.203805, + 0.201638, + 0.199879, + 0.196173, + 0.200402, + 0.202554, + 0.199028, + 0.187689, + 0.194507, + 0.203709, + 0.200422, + 0.195793, + 0.197624, + 0.200134, + 0.201586, + 0.201857, + 0.20294, + 0.203191, + 0.200242, + 0.20332, + 0.205251, + 0.199576, + 0.194781, + 0.182288, + 0.19497, + 0.20463, + 0.204568, + 0.203629, + 0.197852, + 0.200031, + 0.20401, + 0.205029, + 0.200916, + 0.197671, + 0.19744, + 0.193386, + 0.198928, + 0.204139, + 0.204923 + ], + "train_epoch_time": 4.843405485153198, + "train_loss": 2.3760311295004453, + "train_score": 0.29545709300930567, + "val_loss": 2.432093657253804, + "val_score": 0.28322779143445115 + }, + { + "epoch": 6, + "grad_norm": 1.1946622133255005, + "learning_rate": 0.215, + "model_norm": 87.57019805908203, + "step_logs": { + "grad_norm": { + "324": 1.209341287612915, + "325": 1.2679269313812256, + "326": 1.4041210412979126, + "327": 1.3800965547561646, + "328": 1.187725305557251, + "329": 1.2039364576339722, + "330": 1.3025007247924805, + "331": 1.2542200088500977, + "332": 1.2112531661987305, + "333": 1.1725139617919922, + "334": 1.1050729751586914, + "335": 1.1464771032333374, + "336": 1.2001454830169678, + "337": 1.114606499671936, + "338": 1.0524088144302368, + "339": 1.194870114326477, + "340": 1.422607183456421, + "341": 1.3638899326324463, + "342": 1.287209391593933, + "343": 1.2944172620773315, + "344": 1.3072091341018677, + "345": 1.230619192123413, + "346": 1.1629935503005981, + "347": 1.3371115922927856, + "348": 1.3612323999404907, + "349": 1.4867256879806519, + "350": 1.2252507209777832, + "351": 1.1664153337478638, + "352": 1.478360652923584, + "353": 1.4365437030792236, + "354": 1.2908767461776733, + "355": 1.244454026222229, + "356": 1.1970903873443604, + "357": 1.2073019742965698, + "358": 1.1434781551361084, + "359": 1.2041680812835693, + "360": 1.3366196155548096, + "361": 1.353727102279663, + "362": 1.359007477760315, + "363": 1.4227417707443237, + "364": 1.2858036756515503, + "365": 1.372061014175415, + "366": 1.3342736959457397, + "367": 1.3146791458129883, + "368": 1.150631308555603, + "369": 1.3066638708114624, + "370": 1.6268926858901978, + "371": 1.4209587574005127, + "372": 1.132925033569336, + "373": 1.2390443086624146, + "374": 1.3550221920013428, + "375": 1.1910380125045776, + "376": 1.0154519081115723, + "377": 1.1946622133255005 + }, + "loss": { + "324": 2.3801145553588867, + "325": 2.4180331230163574, + "326": 2.3995344638824463, + "327": 2.4644646644592285, + "328": 2.3831417560577393, + "329": 2.4095840454101562, + "330": 2.392375946044922, + "331": 2.441563367843628, + "332": 2.373298168182373, + "333": 2.3981404304504395, + "334": 2.3750412464141846, + "335": 2.372262954711914, + "336": 2.3678417205810547, + "337": 2.3876588344573975, + "338": 2.358581066131592, + "339": 2.3888301849365234, + "340": 2.393829107284546, + "341": 2.4342761039733887, + "342": 2.3749585151672363, + "343": 2.4016263484954834, + "344": 2.4009885787963867, + "345": 2.397040605545044, + "346": 2.389812469482422, + "347": 2.3754165172576904, + "348": 2.388915777206421, + "349": 2.398926258087158, + "350": 2.3806843757629395, + "351": 2.3700313568115234, + "352": 2.3611607551574707, + "353": 2.404709815979004, + "354": 2.401388168334961, + "355": 2.3730459213256836, + "356": 2.344261884689331, + "357": 2.3675432205200195, + "358": 2.357637405395508, + "359": 2.3717997074127197, + "360": 2.3724632263183594, + "361": 2.3820204734802246, + "362": 2.3731064796447754, + "363": 2.389777183532715, + "364": 2.3574070930480957, + "365": 2.366885185241699, + "366": 2.3855202198028564, + "367": 2.344898223876953, + "368": 2.3641915321350098, + "369": 2.3563342094421387, + "370": 2.3924169540405273, + "371": 2.4021754264831543, + "372": 2.343140125274658, + "373": 2.3370323181152344, + "374": 2.318009614944458, + "375": 2.394998073577881, + "376": 2.328307628631592, + "377": 2.349825143814087 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 0.201678, + 0.200659, + 0.197551, + 0.198508, + 0.202137, + 0.201941, + 0.199771, + 0.201073, + 0.201603, + 0.202519, + 0.203739, + 0.202914, + 0.201804, + 0.203611, + 0.204668, + 0.20202, + 0.197088, + 0.198679, + 0.2, + 0.2, + 0.19972, + 0.201326, + 0.202669, + 0.198906, + 0.198453, + 0.195624, + 0.201351, + 0.202503, + 0.195543, + 0.196841, + 0.200075, + 0.200905, + 0.201743, + 0.201654, + 0.202903, + 0.201741, + 0.198899, + 0.198577, + 0.198401, + 0.197057, + 0.199927, + 0.198065, + 0.199032, + 0.199215, + 0.202792, + 0.199463, + 0.192148, + 0.197183, + 0.203044, + 0.200819, + 0.198129, + 0.20213, + 0.205229, + 0.201823 + ], + "train_epoch_time": 4.841535329818726, + "train_loss": 2.3503131663269086, + "train_score": 0.3076892037388242, + "val_loss": 2.3986090609181487, + "val_score": 0.29954793368360344 + }, + { + "epoch": 7, + "grad_norm": 1.055579662322998, + "learning_rate": 0.215, + "model_norm": 87.59964752197266, + "step_logs": { + "grad_norm": { + "378": 1.3451377153396606, + "379": 1.1908732652664185, + "380": 1.2701956033706665, + "381": 1.2041716575622559, + "382": 1.0092896223068237, + "383": 1.0356189012527466, + "384": 1.1023406982421875, + "385": 1.0996164083480835, + "386": 1.043758511543274, + "387": 1.098724603652954, + "388": 1.2740857601165771, + "389": 1.294141173362732, + "390": 1.4785574674606323, + "391": 1.2953040599822998, + "392": 1.2396036386489868, + "393": 1.4562002420425415, + "394": 1.3477267026901245, + "395": 1.2786006927490234, + "396": 1.4313055276870728, + "397": 1.4589734077453613, + "398": 1.3237029314041138, + "399": 1.2125753164291382, + "400": 1.0950778722763062, + "401": 1.11845064163208, + "402": 1.1946892738342285, + "403": 1.3943697214126587, + "404": 1.3681559562683105, + "405": 1.3384937047958374, + "406": 1.1290283203125, + "407": 1.0381335020065308, + "408": 1.0361014604568481, + "409": 1.1948539018630981, + "410": 1.45245361328125, + "411": 1.2811055183410645, + "412": 1.1782766580581665, + "413": 1.1835105419158936, + "414": 1.1730504035949707, + "415": 1.244701862335205, + "416": 1.2350660562515259, + "417": 1.165492296218872, + "418": 1.124158501625061, + "419": 1.1993390321731567, + "420": 1.2762608528137207, + "421": 1.2548555135726929, + "422": 1.0719361305236816, + "423": 1.0902810096740723, + "424": 1.0921398401260376, + "425": 1.0465158224105835, + "426": 1.0896083116531372, + "427": 1.1653156280517578, + "428": 1.2361220121383667, + "429": 0.9908849000930786, + "430": 0.8620224595069885, + "431": 1.055579662322998 + }, + "loss": { + "378": 2.3538331985473633, + "379": 2.360398292541504, + "380": 2.3280625343322754, + "381": 2.3678481578826904, + "382": 2.3326385021209717, + "383": 2.313075065612793, + "384": 2.3166229724884033, + "385": 2.320791721343994, + "386": 2.3170175552368164, + "387": 2.3302412033081055, + "388": 2.315654754638672, + "389": 2.3693933486938477, + "390": 2.306424617767334, + "391": 2.3838586807250977, + "392": 2.3433754444122314, + "393": 2.338879108428955, + "394": 2.3733837604522705, + "395": 2.3242640495300293, + "396": 2.356722116470337, + "397": 2.3543171882629395, + "398": 2.369826078414917, + "399": 2.3480446338653564, + "400": 2.3046045303344727, + "401": 2.3149924278259277, + "402": 2.3065247535705566, + "403": 2.302978038787842, + "404": 2.333252429962158, + "405": 2.3221964836120605, + "406": 2.3024978637695312, + "407": 2.278294086456299, + "408": 2.2879509925842285, + "409": 2.298394203186035, + "410": 2.336047649383545, + "411": 2.3536183834075928, + "412": 2.305198907852173, + "413": 2.3184239864349365, + "414": 2.3139631748199463, + "415": 2.3176610469818115, + "416": 2.2976155281066895, + "417": 2.315744400024414, + "418": 2.281510591506958, + "419": 2.301280975341797, + "420": 2.307081699371338, + "421": 2.312774896621704, + "422": 2.3024771213531494, + "423": 2.2889058589935303, + "424": 2.284330368041992, + "425": 2.2692503929138184, + "426": 2.2893426418304443, + "427": 2.301239013671875, + "428": 2.283637046813965, + "429": 2.302217483520508, + "430": 2.2413077354431152, + "431": 2.2743871212005615 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 0.198589, + 0.201956, + 0.200093, + 0.201721, + 0.205359, + 0.204792, + 0.203524, + 0.203597, + 0.204656, + 0.203658, + 0.199933, + 0.199817, + 0.195119, + 0.199877, + 0.200842, + 0.195906, + 0.198656, + 0.199886, + 0.196626, + 0.195954, + 0.199169, + 0.20144, + 0.203611, + 0.203197, + 0.20159, + 0.197111, + 0.19793, + 0.198534, + 0.202923, + 0.204596, + 0.204676, + 0.201542, + 0.195975, + 0.200007, + 0.201927, + 0.201888, + 0.202082, + 0.200586, + 0.200678, + 0.202247, + 0.202917, + 0.201463, + 0.199833, + 0.200337, + 0.204053, + 0.203632, + 0.203573, + 0.204396, + 0.203647, + 0.202175, + 0.200573, + 0.205575, + 0.207601, + 0.204243 + ], + "train_epoch_time": 4.840423345565796, + "train_loss": 2.297888960421, + "train_score": 0.3165317879089153, + "val_loss": 2.355872270844697, + "val_score": 0.3031581878046217 + }, + { + "epoch": 8, + "grad_norm": 1.1550438404083252, + "learning_rate": 0.215, + "model_norm": 87.62825012207031, + "step_logs": { + "grad_norm": { + "432": 1.365153193473816, + "433": 1.3560038805007935, + "434": 1.368341088294983, + "435": 1.3174641132354736, + "436": 1.146405577659607, + "437": 1.0643583536148071, + "438": 1.0985276699066162, + "439": 1.2951935529708862, + "440": 1.3509509563446045, + "441": 1.3641767501831055, + "442": 1.2514740228652954, + "443": 1.1132721900939941, + "444": 1.1015610694885254, + "445": 1.2110471725463867, + "446": 1.303719401359558, + "447": 1.410711407661438, + "448": 1.2663205862045288, + "449": 1.5121196508407593, + "450": 1.2035973072052002, + "451": 1.2652722597122192, + "452": 1.3201920986175537, + "453": 1.1836808919906616, + "454": 1.1488877534866333, + "455": 1.12118661403656, + "456": 1.0805782079696655, + "457": 1.07243812084198, + "458": 1.1381369829177856, + "459": 1.186643362045288, + "460": 1.3064751625061035, + "461": 1.2733253240585327, + "462": 1.2638254165649414, + "463": 1.2106198072433472, + "464": 1.1537352800369263, + "465": 1.130597710609436, + "466": 1.1394665241241455, + "467": 1.2455463409423828, + "468": 1.1818020343780518, + "469": 1.0450785160064697, + "470": 1.0058342218399048, + "471": 1.0737264156341553, + "472": 1.1295397281646729, + "473": 1.4419820308685303, + "474": 1.3979628086090088, + "475": 1.0647788047790527, + "476": 0.9907328486442566, + "477": 1.1762112379074097, + "478": 1.1809062957763672, + "479": 1.027660846710205, + "480": 1.0184491872787476, + "481": 1.176929235458374, + "482": 1.2625895738601685, + "483": 1.3737468719482422, + "484": 1.1834872961044312, + "485": 1.1550438404083252 + }, + "loss": { + "432": 2.3010950088500977, + "433": 2.330348014831543, + "434": 2.313136100769043, + "435": 2.305276870727539, + "436": 2.3067116737365723, + "437": 2.2573041915893555, + "438": 2.2753310203552246, + "439": 2.26004695892334, + "440": 2.326977252960205, + "441": 2.2865724563598633, + "442": 2.302429676055908, + "443": 2.262026309967041, + "444": 2.3001110553741455, + "445": 2.2458291053771973, + "446": 2.293147563934326, + "447": 2.274738311767578, + "448": 2.3027405738830566, + "449": 2.3040218353271484, + "450": 2.344588279724121, + "451": 2.261910915374756, + "452": 2.3321967124938965, + "453": 2.269744873046875, + "454": 2.2844362258911133, + "455": 2.259955883026123, + "456": 2.2510881423950195, + "457": 2.2372021675109863, + "458": 2.2662062644958496, + "459": 2.2566254138946533, + "460": 2.279414176940918, + "461": 2.288125991821289, + "462": 2.2763781547546387, + "463": 2.271963119506836, + "464": 2.252501964569092, + "465": 2.269219160079956, + "466": 2.2501087188720703, + "467": 2.260176658630371, + "468": 2.2875587940216064, + "469": 2.252750873565674, + "470": 2.235483169555664, + "471": 2.267218589782715, + "472": 2.2557997703552246, + "473": 2.2781195640563965, + "474": 2.320575714111328, + "475": 2.249013662338257, + "476": 2.237639904022217, + "477": 2.2713351249694824, + "478": 2.2765660285949707, + "479": 2.2186012268066406, + "480": 2.2426602840423584, + "481": 2.236692428588867, + "482": 2.2795021533966064, + "483": 2.296290397644043, + "484": 2.2837154865264893, + "485": 2.263822317123413 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 0.197781, + 0.198189, + 0.197789, + 0.198901, + 0.202592, + 0.203994, + 0.203403, + 0.199112, + 0.198282, + 0.197703, + 0.200349, + 0.203041, + 0.203461, + 0.200897, + 0.199133, + 0.196518, + 0.200026, + 0.194274, + 0.201609, + 0.199798, + 0.199012, + 0.201621, + 0.202427, + 0.202869, + 0.203645, + 0.20374, + 0.202554, + 0.201485, + 0.198982, + 0.199782, + 0.19992, + 0.201057, + 0.202158, + 0.202724, + 0.202442, + 0.200226, + 0.201758, + 0.20435, + 0.205025, + 0.203856, + 0.202677, + 0.195789, + 0.197151, + 0.203948, + 0.205318, + 0.201787, + 0.201717, + 0.204534, + 0.204817, + 0.20158, + 0.199967, + 0.197547, + 0.201702, + 0.202191 + ], + "train_epoch_time": 4.840659856796265, + "train_loss": 2.265298646354949, + "train_score": 0.33486594321738017, + "val_loss": 2.3233141140877587, + "val_score": 0.31987747632129593 + }, + { + "epoch": 9, + "grad_norm": 1.2153007984161377, + "learning_rate": 0.215, + "model_norm": 87.65998077392578, + "step_logs": { + "grad_norm": { + "486": 1.233638882637024, + "487": 1.2141677141189575, + "488": 1.2064309120178223, + "489": 1.191043734550476, + "490": 1.1241681575775146, + "491": 1.0938643217086792, + "492": 1.2959375381469727, + "493": 1.3498784303665161, + "494": 1.279057502746582, + "495": 1.1091612577438354, + "496": 1.1037935018539429, + "497": 1.1127700805664062, + "498": 1.1932390928268433, + "499": 1.2705918550491333, + "500": 1.3297266960144043, + "501": 1.190787434577942, + "502": 1.0845469236373901, + "503": 1.155734896659851, + "504": 1.386297583580017, + "505": 1.594787359237671, + "506": 1.3469895124435425, + "507": 1.053652048110962, + "508": 1.1587133407592773, + "509": 1.2236223220825195, + "510": 1.1223140954971313, + "511": 1.035866141319275, + "512": 1.042704701423645, + "513": 1.0608100891113281, + "514": 1.095118761062622, + "515": 1.1616548299789429, + "516": 1.1331512928009033, + "517": 1.0162184238433838, + "518": 1.0010215044021606, + "519": 1.3247815370559692, + "520": 1.68272864818573, + "521": 1.511479377746582, + "522": 1.2918812036514282, + "523": 1.2313283681869507, + "524": 1.375147819519043, + "525": 1.3673951625823975, + "526": 1.2110605239868164, + "527": 1.150638461112976, + "528": 1.1753302812576294, + "529": 1.12422776222229, + "530": 1.2279469966888428, + "531": 1.2788223028182983, + "532": 1.2954473495483398, + "533": 1.2269160747528076, + "534": 1.0226807594299316, + "535": 1.04542875289917, + "536": 1.160315752029419, + "537": 1.2120037078857422, + "538": 1.2115342617034912, + "539": 1.2153007984161377 + }, + "loss": { + "486": 2.271240234375, + "487": 2.2528562545776367, + "488": 2.2755236625671387, + "489": 2.2544641494750977, + "490": 2.235544443130493, + "491": 2.2373178005218506, + "492": 2.252967357635498, + "493": 2.288891315460205, + "494": 2.2466163635253906, + "495": 2.2325289249420166, + "496": 2.2303414344787598, + "497": 2.237607717514038, + "498": 2.219994068145752, + "499": 2.2615089416503906, + "500": 2.243333339691162, + "501": 2.2542519569396973, + "502": 2.2028679847717285, + "503": 2.2445478439331055, + "504": 2.240562677383423, + "505": 2.289966106414795, + "506": 2.2813310623168945, + "507": 2.2160959243774414, + "508": 2.2253313064575195, + "509": 2.2478623390197754, + "510": 2.2122726440429688, + "511": 2.2347638607025146, + "512": 2.200835704803467, + "513": 2.2348477840423584, + "514": 2.2356576919555664, + "515": 2.218681812286377, + "516": 2.206712245941162, + "517": 2.1890621185302734, + "518": 2.1951513290405273, + "519": 2.2241313457489014, + "520": 2.2651076316833496, + "521": 2.264780044555664, + "522": 2.2271225452423096, + "523": 2.2212657928466797, + "524": 2.248021125793457, + "525": 2.278855323791504, + "526": 2.2251272201538086, + "527": 2.2155160903930664, + "528": 2.2381434440612793, + "529": 2.230454206466675, + "530": 2.2266294956207275, + "531": 2.232898235321045, + "532": 2.2049005031585693, + "533": 2.2213573455810547, + "534": 2.1983048915863037, + "535": 2.211486339569092, + "536": 2.213494300842285, + "537": 2.2402520179748535, + "538": 2.2305421829223633, + "539": 2.2173516750335693 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 0.200554, + 0.20087, + 0.201168, + 0.201378, + 0.202683, + 0.203311, + 0.199049, + 0.198051, + 0.199391, + 0.202976, + 0.203075, + 0.202928, + 0.201133, + 0.199677, + 0.198206, + 0.201383, + 0.203329, + 0.202073, + 0.196849, + 0.192068, + 0.198066, + 0.204013, + 0.201905, + 0.200634, + 0.2026, + 0.204447, + 0.204158, + 0.20396, + 0.203278, + 0.201805, + 0.202343, + 0.204623, + 0.204943, + 0.198188, + 0.18953, + 0.193966, + 0.198971, + 0.200303, + 0.19717, + 0.197574, + 0.200774, + 0.202022, + 0.201622, + 0.202655, + 0.200411, + 0.199308, + 0.198739, + 0.200401, + 0.204539, + 0.204154, + 0.201805, + 0.200843, + 0.200796, + 0.200634 + ], + "train_epoch_time": 4.840081453323364, + "train_loss": 2.2086696682222606, + "train_score": 0.35030039462502754, + "val_loss": 2.2746743570102206, + "val_score": 0.3350764209860091 + }, + { + "epoch": 10, + "grad_norm": 1.1135305166244507, + "learning_rate": 0.215, + "model_norm": 87.6910629272461, + "step_logs": { + "grad_norm": { + "540": 1.1924806833267212, + "541": 1.2105599641799927, + "542": 1.3154199123382568, + "543": 1.315085768699646, + "544": 1.369020700454712, + "545": 1.4047414064407349, + "546": 1.2444480657577515, + "547": 1.0646004676818848, + "548": 1.072293758392334, + "549": 1.1664706468582153, + "550": 1.284791350364685, + "551": 1.4361481666564941, + "552": 1.721239686012268, + "553": 1.5366016626358032, + "554": 1.2604020833969116, + "555": 1.208587646484375, + "556": 1.2766797542572021, + "557": 1.367066502571106, + "558": 1.3245970010757446, + "559": 1.2827086448669434, + "560": 1.23690927028656, + "561": 1.0754646062850952, + "562": 1.1085658073425293, + "563": 1.15536367893219, + "564": 1.195185661315918, + "565": 1.199941635131836, + "566": 1.1892069578170776, + "567": 1.1026349067687988, + "568": 1.0973957777023315, + "569": 1.1464769840240479, + "570": 1.1958860158920288, + "571": 1.1105296611785889, + "572": 1.0993361473083496, + "573": 1.131054401397705, + "574": 1.1951467990875244, + "575": 1.2088366746902466, + "576": 1.193009853363037, + "577": 1.228737711906433, + "578": 1.2117626667022705, + "579": 1.3306143283843994, + "580": 1.319515347480774, + "581": 1.3573763370513916, + "582": 1.191267728805542, + "583": 1.140681505203247, + "584": 1.2900415658950806, + "585": 1.2409026622772217, + "586": 1.0990839004516602, + "587": 1.152770757675171, + "588": 1.1782840490341187, + "589": 1.2219172716140747, + "590": 1.2784974575042725, + "591": 1.2382553815841675, + "592": 1.2049047946929932, + "593": 1.1135305166244507 + }, + "loss": { + "540": 2.213007926940918, + "541": 2.212836265563965, + "542": 2.208578109741211, + "543": 2.2307372093200684, + "544": 2.1928415298461914, + "545": 2.2565293312072754, + "546": 2.224583148956299, + "547": 2.2006492614746094, + "548": 2.173956871032715, + "549": 2.208059310913086, + "550": 2.198654890060425, + "551": 2.2533912658691406, + "552": 2.217897891998291, + "553": 2.2837929725646973, + "554": 2.219416618347168, + "555": 2.191779136657715, + "556": 2.2330005168914795, + "557": 2.2229256629943848, + "558": 2.2020926475524902, + "559": 2.1982059478759766, + "560": 2.2267231941223145, + "561": 2.181406021118164, + "562": 2.1991848945617676, + "563": 2.1945605278015137, + "564": 2.1959872245788574, + "565": 2.1900382041931152, + "566": 2.2034618854522705, + "567": 2.199899196624756, + "568": 2.197300910949707, + "569": 2.1927294731140137, + "570": 2.1972815990448, + "571": 2.1866087913513184, + "572": 2.190741539001465, + "573": 2.1684489250183105, + "574": 2.176062822341919, + "575": 2.1910810470581055, + "576": 2.2188782691955566, + "577": 2.2018861770629883, + "578": 2.1876392364501953, + "579": 2.20829439163208, + "580": 2.2024965286254883, + "581": 2.231093406677246, + "582": 2.2048001289367676, + "583": 2.161870241165161, + "584": 2.213850975036621, + "585": 2.209580183029175, + "586": 2.2153677940368652, + "587": 2.149094820022583, + "588": 2.1656312942504883, + "589": 2.2077109813690186, + "590": 2.1999168395996094, + "591": 2.18867826461792, + "592": 2.178313732147217, + "593": 2.17741060256958 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 0.201108, + 0.200711, + 0.198299, + 0.19846, + 0.196908, + 0.196525, + 0.20003, + 0.203721, + 0.203433, + 0.201642, + 0.198944, + 0.19574, + 0.188003, + 0.193495, + 0.199639, + 0.200627, + 0.199357, + 0.197179, + 0.198038, + 0.198989, + 0.200212, + 0.203406, + 0.202816, + 0.201804, + 0.200948, + 0.200808, + 0.201123, + 0.202943, + 0.203038, + 0.201984, + 0.200941, + 0.202709, + 0.202964, + 0.202178, + 0.200829, + 0.200617, + 0.201131, + 0.20024, + 0.200531, + 0.19794, + 0.19816, + 0.19747, + 0.201086, + 0.201935, + 0.198925, + 0.200016, + 0.203095, + 0.201599, + 0.201138, + 0.200428, + 0.199097, + 0.199943, + 0.200626, + 0.202598 + ], + "train_epoch_time": 4.840947866439819, + "train_loss": 2.16334964568851, + "train_score": 0.3608444673686664, + "val_loss": 2.2341386738108167, + "val_score": 0.3450416187604998 + }, + { + "epoch": 11, + "grad_norm": 1.312681794166565, + "learning_rate": 0.215, + "model_norm": 87.7225341796875, + "step_logs": { + "grad_norm": { + "594": 1.008500099182129, + "595": 1.0446864366531372, + "596": 1.1473726034164429, + "597": 1.2513790130615234, + "598": 1.241938829421997, + "599": 1.3187003135681152, + "600": 1.3406378030776978, + "601": 1.1958459615707397, + "602": 1.173087477684021, + "603": 1.232597827911377, + "604": 1.2778140306472778, + "605": 1.2159404754638672, + "606": 1.1979267597198486, + "607": 1.2797011137008667, + "608": 1.179030418395996, + "609": 1.2141413688659668, + "610": 1.3440357446670532, + "611": 1.5633234977722168, + "612": 1.482245922088623, + "613": 1.1294686794281006, + "614": 1.0476479530334473, + "615": 1.1184669733047485, + "616": 1.195744514465332, + "617": 1.306510329246521, + "618": 1.3374907970428467, + "619": 1.2111223936080933, + "620": 1.089604139328003, + "621": 1.0641323328018188, + "622": 1.2376214265823364, + "623": 1.367131233215332, + "624": 1.3086905479431152, + "625": 1.2927950620651245, + "626": 1.3221948146820068, + "627": 1.1964964866638184, + "628": 1.1774989366531372, + "629": 1.1938879489898682, + "630": 1.165571689605713, + "631": 1.2354921102523804, + "632": 1.168449878692627, + "633": 1.0851802825927734, + "634": 1.0655956268310547, + "635": 1.0434281826019287, + "636": 1.0470391511917114, + "637": 1.1096960306167603, + "638": 1.128406047821045, + "639": 1.202164649963379, + "640": 1.2895901203155518, + "641": 1.3830327987670898, + "642": 1.2526203393936157, + "643": 1.2544100284576416, + "644": 1.288190245628357, + "645": 1.3050748109817505, + "646": 1.2612011432647705, + "647": 1.312681794166565 + }, + "loss": { + "594": 2.1835596561431885, + "595": 2.1635217666625977, + "596": 2.15520977973938, + "597": 2.2058725357055664, + "598": 2.162757635116577, + "599": 2.1719326972961426, + "600": 2.2152485847473145, + "601": 2.183899164199829, + "602": 2.1836190223693848, + "603": 2.188340187072754, + "604": 2.182103157043457, + "605": 2.1895699501037598, + "606": 2.1926074028015137, + "607": 2.1965651512145996, + "608": 2.1853103637695312, + "609": 2.175611972808838, + "610": 2.1703147888183594, + "611": 2.2041213512420654, + "612": 2.2091188430786133, + "613": 2.1748290061950684, + "614": 2.1578235626220703, + "615": 2.12998628616333, + "616": 2.177206516265869, + "617": 2.161860942840576, + "618": 2.1763672828674316, + "619": 2.1999690532684326, + "620": 2.154531478881836, + "621": 2.1640496253967285, + "622": 2.1672894954681396, + "623": 2.2073581218719482, + "624": 2.1857070922851562, + "625": 2.1778488159179688, + "626": 2.15925931930542, + "627": 2.1949963569641113, + "628": 2.1528680324554443, + "629": 2.15946888923645, + "630": 2.137371301651001, + "631": 2.164529323577881, + "632": 2.1640875339508057, + "633": 2.15228271484375, + "634": 2.143721580505371, + "635": 2.1501126289367676, + "636": 2.116665840148926, + "637": 2.1433191299438477, + "638": 2.146568536758423, + "639": 2.1621246337890625, + "640": 2.1805591583251953, + "641": 2.1786248683929443, + "642": 2.1812148094177246, + "643": 2.1379170417785645, + "644": 2.1585628986358643, + "645": 2.1806044578552246, + "646": 2.1574254035949707, + "647": 2.160951852798462 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 0.204748, + 0.203941, + 0.201752, + 0.199756, + 0.199691, + 0.197961, + 0.197752, + 0.200861, + 0.201358, + 0.200068, + 0.198993, + 0.200449, + 0.200868, + 0.199047, + 0.201239, + 0.200403, + 0.197343, + 0.192102, + 0.194234, + 0.202247, + 0.203853, + 0.202232, + 0.200823, + 0.198179, + 0.197545, + 0.200621, + 0.202976, + 0.20355, + 0.199819, + 0.197063, + 0.198297, + 0.198615, + 0.197786, + 0.200913, + 0.201079, + 0.200755, + 0.201249, + 0.199849, + 0.201345, + 0.203057, + 0.203417, + 0.203901, + 0.203661, + 0.202493, + 0.202112, + 0.200587, + 0.198709, + 0.196458, + 0.199567, + 0.199236, + 0.198588, + 0.198346, + 0.199211, + 0.198025 + ], + "train_epoch_time": 4.842037916183472, + "train_loss": 2.1703756035485946, + "train_score": 0.36013719511339964, + "val_loss": 2.2594807024801367, + "val_score": 0.33866425164805214 + }, + { + "epoch": 12, + "grad_norm": 0.8645442128181458, + "learning_rate": 0.215, + "model_norm": 87.7514877319336, + "step_logs": { + "grad_norm": { + "648": 1.3653533458709717, + "649": 1.3219352960586548, + "650": 1.5024542808532715, + "651": 1.3224575519561768, + "652": 1.1388252973556519, + "653": 1.1506034135818481, + "654": 1.1217355728149414, + "655": 1.1146156787872314, + "656": 1.0351256132125854, + "657": 1.056700348854065, + "658": 1.1291779279708862, + "659": 1.0608233213424683, + "660": 1.0120782852172852, + "661": 1.086010217666626, + "662": 1.2112925052642822, + "663": 1.2657173871994019, + "664": 1.3186125755310059, + "665": 1.314749836921692, + "666": 1.1181504726409912, + "667": 0.9255827069282532, + "668": 0.8506559133529663, + "669": 0.8628355860710144, + "670": 0.9333071708679199, + "671": 0.9687804579734802, + "672": 0.9678217768669128, + "673": 0.8736199736595154, + "674": 0.8916498422622681, + "675": 0.9800131320953369, + "676": 1.0898487567901611, + "677": 1.135873794555664, + "678": 1.0493063926696777, + "679": 0.9587380886077881, + "680": 0.9366052746772766, + "681": 1.002776026725769, + "682": 0.9248806238174438, + "683": 0.86803138256073, + "684": 0.9090822339057922, + "685": 0.9373408555984497, + "686": 0.9818825721740723, + "687": 0.9594290256500244, + "688": 0.8473807573318481, + "689": 0.7880852818489075, + "690": 0.831360399723053, + "691": 0.8623039722442627, + "692": 0.859737753868103, + "693": 0.8209106922149658, + "694": 0.7813437581062317, + "695": 0.8045181035995483, + "696": 0.9573120474815369, + "697": 0.9565431475639343, + "698": 0.9431483745574951, + "699": 0.9549505710601807, + "700": 0.9098271727561951, + "701": 0.8645442128181458 + }, + "loss": { + "648": 2.1861355304718018, + "649": 2.2000722885131836, + "650": 2.1804940700531006, + "651": 2.205758810043335, + "652": 2.141606330871582, + "653": 2.12888765335083, + "654": 2.1415884494781494, + "655": 2.1430020332336426, + "656": 2.1317567825317383, + "657": 2.122035026550293, + "658": 2.1527321338653564, + "659": 2.117032527923584, + "660": 2.106912612915039, + "661": 2.1272120475769043, + "662": 2.1285524368286133, + "663": 2.127065420150757, + "664": 2.1504104137420654, + "665": 2.1715259552001953, + "666": 2.135071039199829, + "667": 2.128135919570923, + "668": 2.089106321334839, + "669": 2.098954916000366, + "670": 2.121504306793213, + "671": 2.1078615188598633, + "672": 2.0789291858673096, + "673": 2.078354835510254, + "674": 2.0973331928253174, + "675": 2.0997142791748047, + "676": 2.1036458015441895, + "677": 2.1171813011169434, + "678": 2.126020669937134, + "679": 2.066249370574951, + "680": 2.073198080062866, + "681": 2.0999155044555664, + "682": 2.1347076892852783, + "683": 2.081172466278076, + "684": 2.0877301692962646, + "685": 2.087977170944214, + "686": 2.1000242233276367, + "687": 2.0988271236419678, + "688": 2.0919158458709717, + "689": 2.0790212154388428, + "690": 2.0437426567077637, + "691": 2.067035675048828, + "692": 2.082629919052124, + "693": 2.0848536491394043, + "694": 2.079010009765625, + "695": 2.1066741943359375, + "696": 2.0822031497955322, + "697": 2.07090163230896, + "698": 2.0942249298095703, + "699": 2.0699968338012695, + "700": 2.0855460166931152, + "701": 2.0823583602905273 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 0.196946, + 0.196959, + 0.191317, + 0.194728, + 0.197172, + 0.195686, + 0.195167, + 0.194134, + 0.194398, + 0.192758, + 0.190356, + 0.190267, + 0.189885, + 0.18747, + 0.183966, + 0.18174, + 0.179689, + 0.178748, + 0.180984, + 0.182801, + 0.1825, + 0.181119, + 0.178976, + 0.177198, + 0.175891, + 0.175947, + 0.174523, + 0.172114, + 0.169338, + 0.167503, + 0.167583, + 0.167385, + 0.166455, + 0.164462, + 0.164285, + 0.163559, + 0.161865, + 0.1603, + 0.158578, + 0.157599, + 0.157536, + 0.156825, + 0.155083, + 0.15357, + 0.152371, + 0.151476, + 0.150548, + 0.149127, + 0.146421, + 0.14516, + 0.144093, + 0.142685, + 0.141877, + 0.141 + ], + "train_epoch_time": 4.842416763305664, + "train_loss": 2.0689756368804013, + "train_score": 0.38654949774530045, + "val_loss": 2.1623969784286623, + "val_score": 0.3602854122934878 + }, + { + "epoch": 13, + "grad_norm": 0.5363101959228516, + "learning_rate": 0.14333333333333334, + "model_norm": 87.76978302001953, + "step_logs": { + "grad_norm": { + "702": 0.8883509635925293, + "703": 0.8592520356178284, + "704": 0.769087016582489, + "705": 0.6632691621780396, + "706": 0.7133074402809143, + "707": 0.7297330498695374, + "708": 0.7723709344863892, + "709": 0.6901921629905701, + "710": 0.7359227538108826, + "711": 0.7262988090515137, + "712": 0.651531994342804, + "713": 0.7605281472206116, + "714": 0.9197530150413513, + "715": 0.9430732727050781, + "716": 0.8162515759468079, + "717": 0.7407421469688416, + "718": 0.7444875836372375, + "719": 0.7681788206100464, + "720": 0.7688164710998535, + "721": 0.7129867672920227, + "722": 0.645167887210846, + "723": 0.6192553639411926, + "724": 0.6808722019195557, + "725": 0.7311127185821533, + "726": 0.7050772905349731, + "727": 0.6581369638442993, + "728": 0.6261205077171326, + "729": 0.6191504001617432, + "730": 0.6982353925704956, + "731": 0.621631383895874, + "732": 0.5973417162895203, + "733": 0.5936336517333984, + "734": 0.5752581357955933, + "735": 0.575713038444519, + "736": 0.5697436928749084, + "737": 0.5716831684112549, + "738": 0.6064039468765259, + "739": 0.57911616563797, + "740": 0.5792168378829956, + "741": 0.5748128890991211, + "742": 0.5458971858024597, + "743": 0.5080474019050598, + "744": 0.5709893107414246, + "745": 0.5712481141090393, + "746": 0.5845890641212463, + "747": 0.6277804374694824, + "748": 0.6384970545768738, + "749": 0.6545924544334412, + "750": 0.5954940915107727, + "751": 0.6030061841011047, + "752": 0.6112725734710693, + "753": 0.5923959612846375, + "754": 0.5564180612564087, + "755": 0.5363101959228516 + }, + "loss": { + "702": 2.0729596614837646, + "703": 2.0633277893066406, + "704": 2.0597636699676514, + "705": 2.0561351776123047, + "706": 2.0582871437072754, + "707": 2.040992498397827, + "708": 2.048515796661377, + "709": 2.033328056335449, + "710": 2.052187442779541, + "711": 2.045327663421631, + "712": 2.022686004638672, + "713": 2.05265212059021, + "714": 2.0748143196105957, + "715": 2.0561935901641846, + "716": 2.0642476081848145, + "717": 2.0598320960998535, + "718": 2.0539934635162354, + "719": 2.0780582427978516, + "720": 2.059189796447754, + "721": 2.0391459465026855, + "722": 2.0503721237182617, + "723": 2.0561342239379883, + "724": 2.060713768005371, + "725": 2.0360946655273438, + "726": 2.0340356826782227, + "727": 2.0316989421844482, + "728": 2.05830717086792, + "729": 2.0323402881622314, + "730": 2.04949688911438, + "731": 2.0531349182128906, + "732": 2.0473358631134033, + "733": 2.030392646789551, + "734": 2.043651580810547, + "735": 2.0507802963256836, + "736": 2.0280494689941406, + "737": 2.0345144271850586, + "738": 2.025296688079834, + "739": 2.053833484649658, + "740": 2.0281972885131836, + "741": 2.020223617553711, + "742": 2.0417661666870117, + "743": 2.0308613777160645, + "744": 2.0394160747528076, + "745": 2.0192008018493652, + "746": 2.0360751152038574, + "747": 2.0164196491241455, + "748": 2.034536838531494, + "749": 2.03755784034729, + "750": 2.0405497550964355, + "751": 2.0115280151367188, + "752": 2.039904832839966, + "753": 2.0261573791503906, + "754": 2.0093178749084473, + "755": 2.014651298522949 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 0.139527, + 0.138488, + 0.137894, + 0.137305, + 0.13571, + 0.134303, + 0.132754, + 0.131971, + 0.130432, + 0.1292, + 0.128311, + 0.126441, + 0.124182, + 0.122734, + 0.122291, + 0.12143, + 0.12012, + 0.118736, + 0.117431, + 0.116403, + 0.115422, + 0.114233, + 0.112689, + 0.111163, + 0.109983, + 0.108876, + 0.107711, + 0.106421, + 0.104849, + 0.103821, + 0.102593, + 0.101293, + 0.100047, + 0.0987428, + 0.097444, + 0.0961346, + 0.0947338, + 0.093509, + 0.0921929, + 0.0908934, + 0.0896564, + 0.0884201, + 0.0869836, + 0.0856683, + 0.0843371, + 0.0829335, + 0.0816111, + 0.0802735, + 0.0790825, + 0.0777522, + 0.0764364, + 0.0751558, + 0.0738979, + 0.0726155 + ], + "train_epoch_time": 4.841723442077637, + "train_loss": 2.0200369407319956, + "train_score": 0.40081263441095394, + "val_loss": 2.1211726931014647, + "val_score": 0.3735514136050517 + }, + { + "epoch": 14, + "grad_norm": 0.49820977449417114, + "learning_rate": 0.07166666666666667, + "model_norm": 87.77592468261719, + "step_logs": { + "grad_norm": { + "756": 0.5221066474914551, + "757": 0.5132834911346436, + "758": 0.5706493258476257, + "759": 0.535114586353302, + "760": 0.5421645641326904, + "761": 0.5559265613555908, + "762": 0.526034951210022, + "763": 0.550947904586792, + "764": 0.5373929738998413, + "765": 0.5140271782875061, + "766": 0.5464844703674316, + "767": 0.549755334854126, + "768": 0.5052742958068848, + "769": 0.5164358019828796, + "770": 0.5272457599639893, + "771": 0.5559020042419434, + "772": 0.5261601805686951, + "773": 0.5093954205513, + "774": 0.5267736911773682, + "775": 0.5561476349830627, + "776": 0.5652735233306885, + "777": 0.4991748332977295, + "778": 0.5283232927322388, + "779": 0.5367175936698914, + "780": 0.5495347380638123, + "781": 0.5226330161094666, + "782": 0.5215216875076294, + "783": 0.5344538688659668, + "784": 0.5237383842468262, + "785": 0.49886298179626465, + "786": 0.5775716304779053, + "787": 0.4731007516384125, + "788": 0.5129083395004272, + "789": 0.5332512855529785, + "790": 0.5280617475509644, + "791": 0.5684983134269714, + "792": 0.5158213376998901, + "793": 0.47962474822998047, + "794": 0.5290770530700684, + "795": 0.5021599531173706, + "796": 0.5128231644630432, + "797": 0.5109617114067078, + "798": 0.484877347946167, + "799": 0.46217435598373413, + "800": 0.5111664533615112, + "801": 0.49971768260002136, + "802": 0.5418329834938049, + "803": 0.4835483133792877, + "804": 0.5274619460105896, + "805": 0.4836483299732208, + "806": 0.4814581871032715, + "807": 0.5172290802001953, + "808": 0.48542389273643494, + "809": 0.49820977449417114 + }, + "loss": { + "756": 1.9961459636688232, + "757": 2.016197443008423, + "758": 2.027376651763916, + "759": 2.009145498275757, + "760": 1.9886105060577393, + "761": 2.033357858657837, + "762": 2.028230667114258, + "763": 2.021200180053711, + "764": 2.0305662155151367, + "765": 2.0066072940826416, + "766": 2.0238027572631836, + "767": 2.0330982208251953, + "768": 2.0092780590057373, + "769": 1.9869295358657837, + "770": 2.0419557094573975, + "771": 2.0181655883789062, + "772": 2.033578395843506, + "773": 2.0078439712524414, + "774": 2.0236825942993164, + "775": 2.0226707458496094, + "776": 2.017505645751953, + "777": 2.0299360752105713, + "778": 1.9937987327575684, + "779": 2.0203371047973633, + "780": 1.9887791872024536, + "781": 1.9968193769454956, + "782": 2.0263595581054688, + "783": 2.0171070098876953, + "784": 2.040105104446411, + "785": 2.0039658546447754, + "786": 2.0116939544677734, + "787": 2.015235424041748, + "788": 2.0473623275756836, + "789": 2.015364646911621, + "790": 2.009763717651367, + "791": 2.0257201194763184, + "792": 2.0156891345977783, + "793": 2.0145578384399414, + "794": 2.0035433769226074, + "795": 2.0284392833709717, + "796": 1.9969291687011719, + "797": 2.025338888168335, + "798": 2.0100743770599365, + "799": 1.9956440925598145, + "800": 2.01975679397583, + "801": 2.017876148223877, + "802": 1.9919517040252686, + "803": 1.9907593727111816, + "804": 2.0122876167297363, + "805": 2.001923084259033, + "806": 2.0100719928741455, + "807": 2.010221481323242, + "808": 1.9846417903900146, + "809": 2.003322124481201 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 0.0713177, + 0.0700177, + 0.068632, + 0.0673603, + 0.0660342, + 0.0647111, + 0.0634281, + 0.0620857, + 0.0607855, + 0.0594883, + 0.0581445, + 0.0568269, + 0.055544, + 0.0542156, + 0.0528953, + 0.051555, + 0.0502596, + 0.0489496, + 0.0476218, + 0.0462862, + 0.0449628, + 0.0436789, + 0.0423433, + 0.0410217, + 0.0396948, + 0.0383866, + 0.0370681, + 0.0357426, + 0.0344263, + 0.0331108, + 0.031768, + 0.030473, + 0.0291429, + 0.0278157, + 0.0264944, + 0.0251654, + 0.0238513, + 0.0225327, + 0.0212031, + 0.0198828, + 0.0185575, + 0.0172339, + 0.0159111, + 0.0145874, + 0.0132602, + 0.0119356, + 0.010609, + 0.00928506, + 0.00795858, + 0.00663323, + 0.00530702, + 0.00398043, + 0.0026539, + 0.00132705 + ], + "train_epoch_time": 4.842728853225708, + "train_loss": 2.0071171917908504, + "train_score": 0.40409791955304797, + "val_loss": 2.1130431497685533, + "val_score": 0.3750403628146744 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:28:45.356462", + "final_model_norm": 87.77592468261719, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:27:03.811394", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.215, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 4.224915981292725, + "learning_rate": 2.15e-11, + "model_norm": 87.43364715576172, + "step_logs": { + "grad_norm": { + "0": 22.7664794921875, + "1": 23.4499454498291, + "2": 6.8804192543029785, + "3": 7.361940383911133, + "4": 20.986963272094727, + "5": 7.570193290710449, + "6": 5.781525611877441, + "7": 4.410154819488525, + "8": 3.9404349327087402, + "9": 7.543168067932129, + "10": 5.914219379425049, + "11": 6.213420391082764, + "12": 49.07276153564453, + "13": 3.979478359222412, + "14": 6.725850582122803, + "15": 31.08464241027832, + "16": 3.8007287979125977, + "17": 31.767662048339844, + "18": 3.544416666030884, + "19": 12.058910369873047, + "20": 4.551290512084961, + "21": 12.3528470993042, + "22": 5.656938552856445, + "23": 37.410377502441406, + "24": 3.778252363204956, + "25": 5.534794330596924, + "26": 3.278383255004883, + "27": 7.334910869598389, + "28": 4.0505690574646, + "29": 16.37187385559082, + "30": 5.312236309051514, + "31": 11.506085395812988, + "32": 4.718480587005615, + "33": 11.081938743591309, + "34": 4.557792663574219, + "35": 16.363351821899414, + "36": 3.371377468109131, + "37": 11.182097434997559, + "38": 6.526354789733887, + "39": 3.1674888134002686, + "40": 15.734713554382324, + "41": 5.038153648376465, + "42": 4.464365482330322, + "43": 2.5911953449249268, + "44": 5.960587501525879, + "45": 3.347862720489502, + "46": 3.0237228870391846, + "47": 1.7852531671524048, + "48": 2.6196510791778564, + "49": 3.746157169342041, + "50": 3.715843677520752, + "51": 2.8751227855682373, + "52": 2.096085548400879, + "53": 4.224915981292725 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.8451218605041504, + "3": 3.6794261932373047, + "4": 4.073674201965332, + "5": 4.087124824523926, + "6": 3.5744667053222656, + "7": 3.6075944900512695, + "8": 3.501760959625244, + "9": 3.513716459274292, + "10": 3.7672653198242188, + "11": 3.391489028930664, + "12": 3.6313395500183105, + "13": 3.4073574542999268, + "14": 3.5225577354431152, + "15": 3.5085549354553223, + "16": 3.3431077003479004, + "17": 3.6108317375183105, + "18": 3.094069004058838, + "19": 3.347799777984619, + "20": 3.0556721687316895, + "21": 3.530245780944824, + "22": 3.380490303039551, + "23": 4.97763204574585, + "24": 3.108004331588745, + "25": 3.260448455810547, + "26": 2.997645854949951, + "27": 3.225484848022461, + "28": 2.9407646656036377, + "29": 3.5613508224487305, + "30": 3.257999897003174, + "31": 3.193094491958618, + "32": 3.066549777984619, + "33": 3.2439041137695312, + "34": 3.0571742057800293, + "35": 3.4001336097717285, + "36": 2.9874606132507324, + "37": 3.6888062953948975, + "38": 3.831803321838379, + "39": 2.945518970489502, + "40": 3.4076015949249268, + "41": 3.117316246032715, + "42": 3.391188859939575, + "43": 3.0278728008270264, + "44": 3.1067731380462646, + "45": 3.3740577697753906, + "46": 3.062196731567383, + "47": 2.8193845748901367, + "48": 2.8466029167175293, + "49": 3.1355485916137695, + "50": 3.2099781036376953, + "51": 3.2593750953674316, + "52": 2.848984479904175, + "53": 3.01206636428833 + }, + "lr": { + "0": 2.15e-11, + "1": 0.00430000002107, + "2": 0.00860000002064, + "3": 0.01290000002021, + "4": 0.017200000019779997, + "5": 0.02150000001935, + "6": 0.025800000018919998, + "7": 0.03010000001849, + "8": 0.03440000001806, + "9": 0.03870000001763, + "10": 0.0430000000172, + "11": 0.04730000001677, + "12": 0.05160000001634, + "13": 0.055900000015909994, + "14": 0.060200000015479996, + "15": 0.06450000001505, + "16": 0.06880000001462, + "17": 0.07310000001419, + "18": 0.07740000001376, + "19": 0.08170000001333, + "20": 0.08600000001290001, + "21": 0.09030000001246999, + "22": 0.09460000001204, + "23": 0.09890000001161, + "24": 0.10320000001118, + "25": 0.10750000001074997, + "26": 0.11180000001031999, + "27": 0.11610000000989, + "28": 0.12040000000946, + "29": 0.12470000000902998, + "30": 0.12900000000859999, + "31": 0.13330000000817, + "32": 0.13760000000774, + "33": 0.14190000000730998, + "34": 0.14620000000687997, + "35": 0.15050000000645, + "36": 0.15480000000602, + "37": 0.15910000000559, + "38": 0.16340000000516, + "39": 0.16770000000472998, + "40": 0.17200000000430002, + "41": 0.17630000000387, + "42": 0.18060000000343998, + "43": 0.18490000000301, + "44": 0.18920000000258, + "45": 0.19350000000214998, + "46": 0.19780000000172002, + "47": 0.20210000000129, + "48": 0.20640000000086, + "49": 0.21070000000043, + "50": 0.215, + "51": 0.215, + "52": 0.215, + "53": 0.215 + } + }, + "step_size_list": [ + 2.15e-11, + 0.00341036, + 0.0081676, + 0.0117807, + 0.00891262, + 0.0186838, + 0.0230227, + 0.027841, + 0.0319624, + 0.0294668, + 0.0358446, + 0.0372671, + 0.00284935, + 0.0494733, + 0.0434172, + 0.00652726, + 0.0598968, + 0.00651789, + 0.0668894, + 0.0294479, + 0.0665895, + 0.0305938, + 0.0653424, + 0.00663597, + 0.0834276, + 0.0714279, + 0.0931337, + 0.0589858, + 0.0901287, + 0.0219054, + 0.0827623, + 0.0354201, + 0.0917634, + 0.0384964, + 0.0976807, + 0.02173, + 0.119585, + 0.0430407, + 0.0856324, + 0.130444, + 0.0237294, + 0.102633, + 0.117985, + 0.153443, + 0.0908815, + 0.146437, + 0.152707, + 0.181381, + 0.165279, + 0.143186, + 0.147018, + 0.16894, + 0.184426, + 0.131333 + ], + "train_epoch_time": 4.843566179275513, + "train_loss": 3.192642595709824, + "train_score": 0.19196556671449067, + "val_loss": 3.212363445936744, + "val_score": 0.18875574033993942 + }, + { + "epoch": 1, + "grad_norm": 1.5797064304351807, + "learning_rate": 0.215, + "model_norm": 87.45377349853516, + "step_logs": { + "grad_norm": { + "54": 2.7509515285491943, + "55": 2.1297144889831543, + "56": 2.7332794666290283, + "57": 2.204525947570801, + "58": 2.327118158340454, + "59": 2.423321008682251, + "60": 3.5714168548583984, + "61": 1.8112183809280396, + "62": 1.8024959564208984, + "63": 1.5884958505630493, + "64": 1.8096168041229248, + "65": 2.4035353660583496, + "66": 2.1440398693084717, + "67": 2.1138064861297607, + "68": 2.0728063583374023, + "69": 1.8628382682800293, + "70": 1.760241150856018, + "71": 1.7336857318878174, + "72": 3.0480399131774902, + "73": 1.9429903030395508, + "74": 1.538813591003418, + "75": 1.3420686721801758, + "76": 1.5077552795410156, + "77": 1.8091076612472534, + "78": 3.002013921737671, + "79": 1.5222104787826538, + "80": 1.3437747955322266, + "81": 1.3172543048858643, + "82": 1.4621851444244385, + "83": 1.7923827171325684, + "84": 1.7363418340682983, + "85": 1.4273935556411743, + "86": 1.4675780534744263, + "87": 1.8746922016143799, + "88": 1.5804861783981323, + "89": 1.6660451889038086, + "90": 1.8579421043395996, + "91": 1.568577766418457, + "92": 1.5494729280471802, + "93": 2.0584964752197266, + "94": 1.6027863025665283, + "95": 0.9710261821746826, + "96": 1.1346977949142456, + "97": 1.7505210638046265, + "98": 1.586259365081787, + "99": 1.5141584873199463, + "100": 1.4724820852279663, + "101": 2.00040340423584, + "102": 1.7313096523284912, + "103": 1.2881773710250854, + "104": 1.1386187076568604, + "105": 1.1593906879425049, + "106": 1.3849852085113525, + "107": 1.5797064304351807 + }, + "loss": { + "54": 3.1894376277923584, + "55": 2.894500255584717, + "56": 2.8264591693878174, + "57": 2.9473814964294434, + "58": 2.8314361572265625, + "59": 2.8074607849121094, + "60": 3.004971981048584, + "61": 2.9587607383728027, + "62": 2.744166135787964, + "63": 2.7421693801879883, + "64": 2.710906505584717, + "65": 2.782461166381836, + "66": 2.880190372467041, + "67": 2.7372612953186035, + "68": 2.8365378379821777, + "69": 2.7197723388671875, + "70": 2.76515793800354, + "71": 2.7187962532043457, + "72": 2.7817392349243164, + "73": 2.9693241119384766, + "74": 2.708263874053955, + "75": 2.6438140869140625, + "76": 2.653427839279175, + "77": 2.7037103176116943, + "78": 2.806432008743286, + "79": 2.8769922256469727, + "80": 2.66206693649292, + "81": 2.6320762634277344, + "82": 2.6135997772216797, + "83": 2.6842079162597656, + "84": 2.6965930461883545, + "85": 2.662733554840088, + "86": 2.6265344619750977, + "87": 2.6330149173736572, + "88": 2.7358503341674805, + "89": 2.618044853210449, + "90": 2.745748519897461, + "91": 2.6436691284179688, + "92": 2.6546928882598877, + "93": 2.650205612182617, + "94": 2.7685561180114746, + "95": 2.5784668922424316, + "96": 2.57856822013855, + "97": 2.6211800575256348, + "98": 2.70285701751709, + "99": 2.6242856979370117, + "100": 2.6563167572021484, + "101": 2.629248857498169, + "102": 2.775672435760498, + "103": 2.590315341949463, + "104": 2.598947048187256, + "105": 2.555227279663086, + "106": 2.5942485332489014, + "107": 2.60394287109375 + }, + "lr": { + "54": 0.215, + "55": 0.215, + "56": 0.215, + "57": 0.215, + "58": 0.215, + "59": 0.215, + "60": 0.215, + "61": 0.215, + "62": 0.215, + "63": 0.215, + "64": 0.215, + "65": 0.215, + "66": 0.215, + "67": 0.215, + "68": 0.215, + "69": 0.215, + "70": 0.215, + "71": 0.215, + "72": 0.215, + "73": 0.215, + "74": 0.215, + "75": 0.215, + "76": 0.215, + "77": 0.215, + "78": 0.215, + "79": 0.215, + "80": 0.215, + "81": 0.215, + "82": 0.215, + "83": 0.215, + "84": 0.215, + "85": 0.215, + "86": 0.215, + "87": 0.215, + "88": 0.215, + "89": 0.215, + "90": 0.215, + "91": 0.215, + "92": 0.215, + "93": 0.215, + "94": 0.215, + "95": 0.215, + "96": 0.215, + "97": 0.215, + "98": 0.215, + "99": 0.215, + "100": 0.215, + "101": 0.215, + "102": 0.215, + "103": 0.215, + "104": 0.215, + "105": 0.215, + "106": 0.215, + "107": 0.215 + } + }, + "step_size_list": [ + 0.171305, + 0.184004, + 0.167427, + 0.182628, + 0.178333, + 0.17553, + 0.147635, + 0.192103, + 0.190725, + 0.195647, + 0.190289, + 0.175769, + 0.183514, + 0.182904, + 0.184894, + 0.189068, + 0.191886, + 0.192163, + 0.158201, + 0.189148, + 0.196528, + 0.200329, + 0.196868, + 0.190244, + 0.159827, + 0.197868, + 0.200388, + 0.200772, + 0.197622, + 0.190491, + 0.191932, + 0.198659, + 0.197583, + 0.188021, + 0.195784, + 0.193003, + 0.189402, + 0.195446, + 0.195949, + 0.183466, + 0.195499, + 0.206868, + 0.204047, + 0.190997, + 0.195441, + 0.196542, + 0.197656, + 0.18477, + 0.192637, + 0.201148, + 0.204057, + 0.203492, + 0.199169, + 0.194919 + ], + "train_epoch_time": 4.842210292816162, + "train_loss": 2.6319974529862917, + "train_score": 0.23389302367288378, + "val_loss": 2.651239775625901, + "val_score": 0.23102037911436998 + }, + { + "epoch": 2, + "grad_norm": 1.315531849861145, + "learning_rate": 0.215, + "model_norm": 87.47314453125, + "step_logs": { + "grad_norm": { + "108": 1.344770908355713, + "109": 1.875128984451294, + "110": 1.6731008291244507, + "111": 1.3293333053588867, + "112": 1.5637351274490356, + "113": 1.8029847145080566, + "114": 1.530908465385437, + "115": 1.2929627895355225, + "116": 1.3823858499526978, + "117": 1.8939834833145142, + "118": 1.5422918796539307, + "119": 1.1374588012695312, + "120": 1.1340739727020264, + "121": 1.4493074417114258, + "122": 1.4997245073318481, + "123": 1.5211377143859863, + "124": 1.4198424816131592, + "125": 1.3821847438812256, + "126": 1.4348351955413818, + "127": 1.4656531810760498, + "128": 1.5007617473602295, + "129": 1.3856004476547241, + "130": 1.358123779296875, + "131": 1.5242176055908203, + "132": 1.4696683883666992, + "133": 1.2413662672042847, + "134": 1.2722327709197998, + "135": 1.395194411277771, + "136": 1.2747689485549927, + "137": 1.288352131843567, + "138": 1.2747585773468018, + "139": 1.2749489545822144, + "140": 1.355198621749878, + "141": 1.5357475280761719, + "142": 1.4174522161483765, + "143": 1.322439193725586, + "144": 1.3464287519454956, + "145": 1.3782477378845215, + "146": 1.2899798154830933, + "147": 1.0685778856277466, + "148": 1.161523699760437, + "149": 1.4050099849700928, + "150": 1.5366697311401367, + "151": 1.5470987558364868, + "152": 1.642154335975647, + "153": 1.3751894235610962, + "154": 1.1721516847610474, + "155": 1.329103946685791, + "156": 1.5129611492156982, + "157": 1.3387285470962524, + "158": 1.2138770818710327, + "159": 1.4187896251678467, + "160": 1.427746295928955, + "161": 1.315531849861145 + }, + "loss": { + "108": 2.6293787956237793, + "109": 2.581925868988037, + "110": 2.7473692893981934, + "111": 2.5824594497680664, + "112": 2.63314151763916, + "113": 2.5941081047058105, + "114": 2.693816900253296, + "115": 2.579030752182007, + "116": 2.611847400665283, + "117": 2.5991437435150146, + "118": 2.704798698425293, + "119": 2.560504913330078, + "120": 2.5768022537231445, + "121": 2.572042942047119, + "122": 2.6388392448425293, + "123": 2.6021575927734375, + "124": 2.6545071601867676, + "125": 2.5796961784362793, + "126": 2.5913963317871094, + "127": 2.5851573944091797, + "128": 2.6309282779693604, + "129": 2.6035070419311523, + "130": 2.593778133392334, + "131": 2.5900230407714844, + "132": 2.628330707550049, + "133": 2.5558037757873535, + "134": 2.5774760246276855, + "135": 2.5583245754241943, + "136": 2.596064329147339, + "137": 2.549643039703369, + "138": 2.5992588996887207, + "139": 2.552520751953125, + "140": 2.5835907459259033, + "141": 2.572484254837036, + "142": 2.6159684658050537, + "143": 2.560361385345459, + "144": 2.587024211883545, + "145": 2.550192356109619, + "146": 2.572631359100342, + "147": 2.51448392868042, + "148": 2.536811351776123, + "149": 2.555095911026001, + "150": 2.5925002098083496, + "151": 2.60998797416687, + "152": 2.5830564498901367, + "153": 2.59608793258667, + "154": 2.5334532260894775, + "155": 2.570136308670044, + "156": 2.571843385696411, + "157": 2.6207480430603027, + "158": 2.5341358184814453, + "159": 2.592061996459961, + "160": 2.580261707305908, + "161": 2.5741782188415527 + }, + "lr": { + "108": 0.215, + "109": 0.215, + "110": 0.215, + "111": 0.215, + "112": 0.215, + "113": 0.215, + "114": 0.215, + "115": 0.215, + "116": 0.215, + "117": 0.215, + "118": 0.215, + "119": 0.215, + "120": 0.215, + "121": 0.215, + "122": 0.215, + "123": 0.215, + "124": 0.215, + "125": 0.215, + "126": 0.215, + "127": 0.215, + "128": 0.215, + "129": 0.215, + "130": 0.215, + "131": 0.215, + "132": 0.215, + "133": 0.215, + "134": 0.215, + "135": 0.215, + "136": 0.215, + "137": 0.215, + "138": 0.215, + "139": 0.215, + "140": 0.215, + "141": 0.215, + "142": 0.215, + "143": 0.215, + "144": 0.215, + "145": 0.215, + "146": 0.215, + "147": 0.215, + "148": 0.215, + "149": 0.215, + "150": 0.215, + "151": 0.215, + "152": 0.215, + "153": 0.215, + "154": 0.215, + "155": 0.215, + "156": 0.215, + "157": 0.215, + "158": 0.215, + "159": 0.215, + "160": 0.215, + "161": 0.215 + } + }, + "step_size_list": [ + 0.200198, + 0.187544, + 0.193776, + 0.200268, + 0.195485, + 0.189475, + 0.196611, + 0.200994, + 0.199323, + 0.187223, + 0.19643, + 0.203923, + 0.204052, + 0.197648, + 0.196954, + 0.196241, + 0.198772, + 0.199146, + 0.198083, + 0.19737, + 0.196881, + 0.199208, + 0.199731, + 0.196091, + 0.197548, + 0.201913, + 0.201404, + 0.198744, + 0.201445, + 0.200938, + 0.20146, + 0.201225, + 0.199737, + 0.195711, + 0.198603, + 0.200293, + 0.199938, + 0.19906, + 0.201022, + 0.204993, + 0.203373, + 0.198513, + 0.195826, + 0.195706, + 0.193306, + 0.199386, + 0.203156, + 0.200207, + 0.196225, + 0.200277, + 0.202352, + 0.198434, + 0.19817, + 0.200509 + ], + "train_epoch_time": 4.8452136516571045, + "train_loss": 2.555558303507363, + "train_score": 0.24031563843339895, + "val_loss": 2.590563240062219, + "val_score": 0.23925444873821858 + }, + { + "epoch": 3, + "grad_norm": 1.2829318046569824, + "learning_rate": 0.215, + "model_norm": 87.49092102050781, + "step_logs": { + "grad_norm": { + "162": 1.302155613899231, + "163": 1.2679505348205566, + "164": 1.3365378379821777, + "165": 1.2978893518447876, + "166": 1.2945623397827148, + "167": 1.3376919031143188, + "168": 1.3314517736434937, + "169": 1.249277949333191, + "170": 1.1479874849319458, + "171": 1.2287659645080566, + "172": 1.18733811378479, + "173": 1.2171608209609985, + "174": 1.301889181137085, + "175": 1.6929446458816528, + "176": 1.4803028106689453, + "177": 1.2344516515731812, + "178": 1.195341944694519, + "179": 1.4658128023147583, + "180": 1.4468327760696411, + "181": 1.0305976867675781, + "182": 1.0223102569580078, + "183": 1.2272998094558716, + "184": 1.2391563653945923, + "185": 1.3416391611099243, + "186": 1.2720328569412231, + "187": 0.9583979845046997, + "188": 1.1231491565704346, + "189": 1.4031203985214233, + "190": 1.357681393623352, + "191": 1.1329631805419922, + "192": 1.0563315153121948, + "193": 1.01853346824646, + "194": 1.0779166221618652, + "195": 1.1308420896530151, + "196": 1.203302025794983, + "197": 1.6176344156265259, + "198": 1.5035041570663452, + "199": 1.155487298965454, + "200": 1.0421441793441772, + "201": 1.0787928104400635, + "202": 1.1190375089645386, + "203": 1.1839451789855957, + "204": 1.1982773542404175, + "205": 1.2874412536621094, + "206": 1.2700939178466797, + "207": 1.2709908485412598, + "208": 1.2595946788787842, + "209": 1.1214065551757812, + "210": 1.0963120460510254, + "211": 1.0826447010040283, + "212": 1.2664756774902344, + "213": 1.446083903312683, + "214": 1.4617469310760498, + "215": 1.2829318046569824 + }, + "loss": { + "162": 2.5503251552581787, + "163": 2.580986738204956, + "164": 2.54641056060791, + "165": 2.565549373626709, + "166": 2.5534493923187256, + "167": 2.5993361473083496, + "168": 2.543041706085205, + "169": 2.5455493927001953, + "170": 2.539292573928833, + "171": 2.5354411602020264, + "172": 2.5436558723449707, + "173": 2.5123326778411865, + "174": 2.557433605194092, + "175": 2.5449934005737305, + "176": 2.618886709213257, + "177": 2.523719549179077, + "178": 2.5625057220458984, + "179": 2.5322012901306152, + "180": 2.5992798805236816, + "181": 2.5401182174682617, + "182": 2.5114431381225586, + "183": 2.5168349742889404, + "184": 2.559504508972168, + "185": 2.5291378498077393, + "186": 2.5774779319763184, + "187": 2.520627737045288, + "188": 2.5269546508789062, + "189": 2.5501954555511475, + "190": 2.573432445526123, + "191": 2.5298590660095215, + "192": 2.5226101875305176, + "193": 2.5114099979400635, + "194": 2.499330520629883, + "195": 2.517226219177246, + "196": 2.507829189300537, + "197": 2.5550994873046875, + "198": 2.6146035194396973, + "199": 2.535062074661255, + "200": 2.511296272277832, + "201": 2.500337600708008, + "202": 2.531972885131836, + "203": 2.514383316040039, + "204": 2.522951126098633, + "205": 2.5224952697753906, + "206": 2.5570836067199707, + "207": 2.5087690353393555, + "208": 2.5390076637268066, + "209": 2.5294992923736572, + "210": 2.5221924781799316, + "211": 2.489377975463867, + "212": 2.520740032196045, + "213": 2.553650140762329, + "214": 2.57137131690979, + "215": 2.544015407562256 + }, + "lr": { + "162": 0.215, + "163": 0.215, + "164": 0.215, + "165": 0.215, + "166": 0.215, + "167": 0.215, + "168": 0.215, + "169": 0.215, + "170": 0.215, + "171": 0.215, + "172": 0.215, + "173": 0.215, + "174": 0.215, + "175": 0.215, + "176": 0.215, + "177": 0.215, + "178": 0.215, + "179": 0.215, + "180": 0.215, + "181": 0.215, + "182": 0.215, + "183": 0.215, + "184": 0.215, + "185": 0.215, + "186": 0.215, + "187": 0.215, + "188": 0.215, + "189": 0.215, + "190": 0.215, + "191": 0.215, + "192": 0.215, + "193": 0.215, + "194": 0.215, + "195": 0.215, + "196": 0.215, + "197": 0.215, + "198": 0.215, + "199": 0.215, + "200": 0.215, + "201": 0.215, + "202": 0.215, + "203": 0.215, + "204": 0.215, + "205": 0.215, + "206": 0.215, + "207": 0.215, + "208": 0.215, + "209": 0.215, + "210": 0.215, + "211": 0.215, + "212": 0.215, + "213": 0.215, + "214": 0.215, + "215": 0.215 + } + }, + "step_size_list": [ + 0.200658, + 0.201507, + 0.199923, + 0.200825, + 0.20083, + 0.200185, + 0.200011, + 0.201706, + 0.203639, + 0.202064, + 0.202911, + 0.202183, + 0.200701, + 0.191782, + 0.197257, + 0.201895, + 0.202841, + 0.197028, + 0.197869, + 0.205751, + 0.205794, + 0.202004, + 0.201974, + 0.19972, + 0.201408, + 0.206895, + 0.20405, + 0.198524, + 0.199629, + 0.20388, + 0.205241, + 0.205859, + 0.204767, + 0.203866, + 0.202435, + 0.193677, + 0.196717, + 0.203479, + 0.205449, + 0.204755, + 0.204146, + 0.202844, + 0.202605, + 0.200815, + 0.201345, + 0.201081, + 0.201467, + 0.204092, + 0.204523, + 0.204642, + 0.201235, + 0.197605, + 0.197369, + 0.201019 + ], + "train_epoch_time": 4.84174919128418, + "train_loss": 2.5135812423492605, + "train_score": 0.2482077205198225, + "val_loss": 2.5549608303402924, + "val_score": 0.23722284038291322 + }, + { + "epoch": 4, + "grad_norm": 1.3852770328521729, + "learning_rate": 0.215, + "model_norm": 87.51226806640625, + "step_logs": { + "grad_norm": { + "216": 0.9911021590232849, + "217": 1.1388626098632812, + "218": 1.18673574924469, + "219": 1.2927554845809937, + "220": 1.1694165468215942, + "221": 0.9886003732681274, + "222": 1.147566795349121, + "223": 1.5065957307815552, + "224": 1.444945216178894, + "225": 1.1592024564743042, + "226": 1.0360867977142334, + "227": 1.1043791770935059, + "228": 1.1281208992004395, + "229": 1.2424534559249878, + "230": 1.3199564218521118, + "231": 1.2597553730010986, + "232": 1.3101727962493896, + "233": 1.3297395706176758, + "234": 1.3228331804275513, + "235": 1.2477182149887085, + "236": 1.287459135055542, + "237": 1.2524323463439941, + "238": 1.1583845615386963, + "239": 1.1117467880249023, + "240": 1.2913349866867065, + "241": 1.2728441953659058, + "242": 1.282461404800415, + "243": 1.2495893239974976, + "244": 1.227732539176941, + "245": 1.1781437397003174, + "246": 1.1705435514450073, + "247": 1.1541047096252441, + "248": 1.1340049505233765, + "249": 1.0704351663589478, + "250": 1.1875613927841187, + "251": 1.326641321182251, + "252": 1.13332998752594, + "253": 1.1530803442001343, + "254": 1.3963634967803955, + "255": 1.4765300750732422, + "256": 1.243006944656372, + "257": 1.167756199836731, + "258": 1.150063395500183, + "259": 1.087386965751648, + "260": 1.0243992805480957, + "261": 1.2357460260391235, + "262": 1.351833701133728, + "263": 1.4577537775039673, + "264": 1.3989040851593018, + "265": 1.4537324905395508, + "266": 1.3404828310012817, + "267": 1.2918037176132202, + "268": 1.3404955863952637, + "269": 1.3852770328521729 + }, + "loss": { + "216": 2.512786865234375, + "217": 2.483055591583252, + "218": 2.51798677444458, + "219": 2.528566837310791, + "220": 2.562972068786621, + "221": 2.485072612762451, + "222": 2.5267343521118164, + "223": 2.5232694149017334, + "224": 2.591841220855713, + "225": 2.508546829223633, + "226": 2.5094411373138428, + "227": 2.4954824447631836, + "228": 2.513897180557251, + "229": 2.500267505645752, + "230": 2.53218936920166, + "231": 2.524029493331909, + "232": 2.5123438835144043, + "233": 2.5255627632141113, + "234": 2.547271490097046, + "235": 2.5202784538269043, + "236": 2.5422465801239014, + "237": 2.532721996307373, + "238": 2.5069632530212402, + "239": 2.501102924346924, + "240": 2.5159387588500977, + "241": 2.5408389568328857, + "242": 2.49662446975708, + "243": 2.527021884918213, + "244": 2.4945502281188965, + "245": 2.5129857063293457, + "246": 2.4755072593688965, + "247": 2.500941276550293, + "248": 2.5067288875579834, + "249": 2.4871864318847656, + "250": 2.477766275405884, + "251": 2.5294623374938965, + "252": 2.518540382385254, + "253": 2.5060808658599854, + "254": 2.49104905128479, + "255": 2.545215129852295, + "256": 2.500241279602051, + "257": 2.4953794479370117, + "258": 2.480581283569336, + "259": 2.4736239910125732, + "260": 2.458857297897339, + "261": 2.4802355766296387, + "262": 2.4805707931518555, + "263": 2.5296616554260254, + "264": 2.5329012870788574, + "265": 2.5064330101013184, + "266": 2.500066041946411, + "267": 2.478817939758301, + "268": 2.4949803352355957, + "269": 2.4904167652130127 + }, + "lr": { + "216": 0.215, + "217": 0.215, + "218": 0.215, + "219": 0.215, + "220": 0.215, + "221": 0.215, + "222": 0.215, + "223": 0.215, + "224": 0.215, + "225": 0.215, + "226": 0.215, + "227": 0.215, + "228": 0.215, + "229": 0.215, + "230": 0.215, + "231": 0.215, + "232": 0.215, + "233": 0.215, + "234": 0.215, + "235": 0.215, + "236": 0.215, + "237": 0.215, + "238": 0.215, + "239": 0.215, + "240": 0.215, + "241": 0.215, + "242": 0.215, + "243": 0.215, + "244": 0.215, + "245": 0.215, + "246": 0.215, + "247": 0.215, + "248": 0.215, + "249": 0.215, + "250": 0.215, + "251": 0.215, + "252": 0.215, + "253": 0.215, + "254": 0.215, + "255": 0.215, + "256": 0.215, + "257": 0.215, + "258": 0.215, + "259": 0.215, + "260": 0.215, + "261": 0.215, + "262": 0.215, + "263": 0.215, + "264": 0.215, + "265": 0.215, + "266": 0.215, + "267": 0.215, + "268": 0.215, + "269": 0.215 + } + }, + "step_size_list": [ + 0.206329, + 0.203569, + 0.202806, + 0.200738, + 0.203337, + 0.206279, + 0.203593, + 0.196042, + 0.197865, + 0.203293, + 0.205548, + 0.204268, + 0.203903, + 0.201618, + 0.200193, + 0.201388, + 0.200289, + 0.199951, + 0.200214, + 0.201612, + 0.200918, + 0.201579, + 0.203302, + 0.204155, + 0.2007, + 0.201208, + 0.200781, + 0.201608, + 0.201886, + 0.20295, + 0.202926, + 0.203357, + 0.203763, + 0.204855, + 0.202603, + 0.200038, + 0.203825, + 0.203399, + 0.198313, + 0.196872, + 0.201607, + 0.20307, + 0.203344, + 0.204492, + 0.205569, + 0.201653, + 0.199222, + 0.197192, + 0.198513, + 0.197132, + 0.19958, + 0.200491, + 0.19955, + 0.198553 + ], + "train_epoch_time": 4.84600830078125, + "train_loss": 2.502915991669577, + "train_score": 0.2600542502389577, + "val_loss": 2.5505322227521825, + "val_score": 0.2530810491157317 + }, + { + "epoch": 5, + "grad_norm": 1.2029547691345215, + "learning_rate": 0.215, + "model_norm": 87.5400161743164, + "step_logs": { + "grad_norm": { + "270": 1.2710802555084229, + "271": 1.0045161247253418, + "272": 1.0132417678833008, + "273": 1.248389482498169, + "274": 1.4885855913162231, + "275": 1.428642988204956, + "276": 1.2419459819793701, + "277": 1.2277100086212158, + "278": 1.286665916442871, + "279": 1.2028756141662598, + "280": 1.1912678480148315, + "281": 1.3207236528396606, + "282": 1.27871835231781, + "283": 1.315139651298523, + "284": 1.3207634687423706, + "285": 1.2287064790725708, + "286": 1.2362735271453857, + "287": 1.2319828271865845, + "288": 1.3179576396942139, + "289": 1.3505678176879883, + "290": 1.2994858026504517, + "291": 1.1427247524261475, + "292": 1.0707684755325317, + "293": 1.394778847694397, + "294": 1.3099271059036255, + "295": 1.4401239156723022, + "296": 1.7194749116897583, + "297": 1.9601112604141235, + "298": 1.5322904586791992, + "299": 1.1947455406188965, + "300": 1.0073742866516113, + "301": 1.1618094444274902, + "302": 1.2410342693328857, + "303": 1.247981071472168, + "304": 1.283972144126892, + "305": 1.3671889305114746, + "306": 1.4527076482772827, + "307": 1.4652153253555298, + "308": 1.5101890563964844, + "309": 1.1943949460983276, + "310": 1.0305434465408325, + "311": 1.1106972694396973, + "312": 1.3709852695465088, + "313": 1.3301037549972534, + "314": 1.1110860109329224, + "315": 1.0978580713272095, + "316": 1.2091288566589355, + "317": 1.2651363611221313, + "318": 1.3794142007827759, + "319": 1.2932034730911255, + "320": 1.249191164970398, + "321": 1.2317206859588623, + "322": 1.1675301790237427, + "323": 1.2029547691345215 + }, + "loss": { + "270": 2.504241943359375, + "271": 2.4517226219177246, + "272": 2.438788414001465, + "273": 2.4542157649993896, + "274": 2.4831953048706055, + "275": 2.4865403175354004, + "276": 2.471459150314331, + "277": 2.444843053817749, + "278": 2.45448637008667, + "279": 2.4569814205169678, + "280": 2.440514087677002, + "281": 2.451827049255371, + "282": 2.475576639175415, + "283": 2.4644479751586914, + "284": 2.4695143699645996, + "285": 2.4339611530303955, + "286": 2.4734182357788086, + "287": 2.4525671005249023, + "288": 2.4568240642547607, + "289": 2.4482290744781494, + "290": 2.457190752029419, + "291": 2.422393321990967, + "292": 2.3946518898010254, + "293": 2.418074369430542, + "294": 2.4740211963653564, + "295": 2.4526937007904053, + "296": 2.485193967819214, + "297": 2.529479503631592, + "298": 2.470250129699707, + "299": 2.4665398597717285, + "300": 2.398270845413208, + "301": 2.4219326972961426, + "302": 2.439095973968506, + "303": 2.4235827922821045, + "304": 2.4364137649536133, + "305": 2.450216770172119, + "306": 2.4460811614990234, + "307": 2.4381558895111084, + "308": 2.467790126800537, + "309": 2.481377601623535, + "310": 2.4137468338012695, + "311": 2.418330669403076, + "312": 2.413196563720703, + "313": 2.468334197998047, + "314": 2.4007620811462402, + "315": 2.3954718112945557, + "316": 2.363215923309326, + "317": 2.4446158409118652, + "318": 2.4106626510620117, + "319": 2.423877716064453, + "320": 2.414515972137451, + "321": 2.4114866256713867, + "322": 2.380112648010254, + "323": 2.4021129608154297 + }, + "lr": { + "270": 0.215, + "271": 0.215, + "272": 0.215, + "273": 0.215, + "274": 0.215, + "275": 0.215, + "276": 0.215, + "277": 0.215, + "278": 0.215, + "279": 0.215, + "280": 0.215, + "281": 0.215, + "282": 0.215, + "283": 0.215, + "284": 0.215, + "285": 0.215, + "286": 0.215, + "287": 0.215, + "288": 0.215, + "289": 0.215, + "290": 0.215, + "291": 0.215, + "292": 0.215, + "293": 0.215, + "294": 0.215, + "295": 0.215, + "296": 0.215, + "297": 0.215, + "298": 0.215, + "299": 0.215, + "300": 0.215, + "301": 0.215, + "302": 0.215, + "303": 0.215, + "304": 0.215, + "305": 0.215, + "306": 0.215, + "307": 0.215, + "308": 0.215, + "309": 0.215, + "310": 0.215, + "311": 0.215, + "312": 0.215, + "313": 0.215, + "314": 0.215, + "315": 0.215, + "316": 0.215, + "317": 0.215, + "318": 0.215, + "319": 0.215, + "320": 0.215, + "321": 0.215, + "322": 0.215, + "323": 0.215 + } + }, + "step_size_list": [ + 0.201056, + 0.205891, + 0.205692, + 0.201261, + 0.196181, + 0.197567, + 0.201482, + 0.201637, + 0.200465, + 0.202199, + 0.202351, + 0.199725, + 0.200746, + 0.199917, + 0.199826, + 0.20156, + 0.201608, + 0.201589, + 0.199813, + 0.199057, + 0.200209, + 0.203223, + 0.204476, + 0.197886, + 0.200082, + 0.197085, + 0.190621, + 0.184822, + 0.195069, + 0.202408, + 0.205646, + 0.202847, + 0.201333, + 0.201107, + 0.200421, + 0.198704, + 0.196752, + 0.196409, + 0.19557, + 0.202486, + 0.20529, + 0.203823, + 0.198389, + 0.199619, + 0.203738, + 0.203968, + 0.201593, + 0.200863, + 0.198184, + 0.200154, + 0.201033, + 0.20138, + 0.202531, + 0.201923 + ], + "train_epoch_time": 4.844873905181885, + "train_loss": 2.4011798890112463, + "train_score": 0.2974533715754363, + "val_loss": 2.45778798865396, + "val_score": 0.28674834922178466 + }, + { + "epoch": 6, + "grad_norm": 1.1454983949661255, + "learning_rate": 0.215, + "model_norm": 87.56878662109375, + "step_logs": { + "grad_norm": { + "324": 1.4478660821914673, + "325": 1.3053901195526123, + "326": 1.0898138284683228, + "327": 1.1261799335479736, + "328": 1.1343061923980713, + "329": 1.0687384605407715, + "330": 1.074279546737671, + "331": 1.2334868907928467, + "332": 1.3701967000961304, + "333": 1.2915104627609253, + "334": 1.1126514673233032, + "335": 1.2157806158065796, + "336": 1.2364264726638794, + "337": 1.2313041687011719, + "338": 1.2272017002105713, + "339": 1.2962377071380615, + "340": 1.2160311937332153, + "341": 1.167911171913147, + "342": 1.1176592111587524, + "343": 1.202584981918335, + "344": 1.5088257789611816, + "345": 1.7485594749450684, + "346": 1.632184624671936, + "347": 1.4440006017684937, + "348": 1.311724066734314, + "349": 1.2276780605316162, + "350": 1.0833379030227661, + "351": 1.1216859817504883, + "352": 1.2419434785842896, + "353": 1.236767292022705, + "354": 1.2276660203933716, + "355": 1.3100180625915527, + "356": 1.3939276933670044, + "357": 1.2280195951461792, + "358": 1.0774376392364502, + "359": 1.0626447200775146, + "360": 1.0826159715652466, + "361": 1.1186443567276, + "362": 1.115387201309204, + "363": 1.1995201110839844, + "364": 1.2868213653564453, + "365": 1.1960668563842773, + "366": 1.037724256515503, + "367": 1.0093685388565063, + "368": 1.0928775072097778, + "369": 1.211077094078064, + "370": 1.5211516618728638, + "371": 1.4537298679351807, + "372": 1.4707303047180176, + "373": 1.3578100204467773, + "374": 1.4943362474441528, + "375": 1.4854881763458252, + "376": 1.292373776435852, + "377": 1.1454983949661255 + }, + "loss": { + "324": 2.412602186203003, + "325": 2.4778757095336914, + "326": 2.3745248317718506, + "327": 2.4018149375915527, + "328": 2.3857524394989014, + "329": 2.3892507553100586, + "330": 2.371295928955078, + "331": 2.3904948234558105, + "332": 2.3940982818603516, + "333": 2.402587413787842, + "334": 2.3605637550354004, + "335": 2.402249813079834, + "336": 2.385800838470459, + "337": 2.3857994079589844, + "338": 2.4248242378234863, + "339": 2.3977396488189697, + "340": 2.401632308959961, + "341": 2.3702032566070557, + "342": 2.3574700355529785, + "343": 2.3406589031219482, + "344": 2.3903648853302, + "345": 2.4242103099823, + "346": 2.423222303390503, + "347": 2.4280123710632324, + "348": 2.3788585662841797, + "349": 2.3968396186828613, + "350": 2.340715169906616, + "351": 2.3773956298828125, + "352": 2.356738328933716, + "353": 2.387390613555908, + "354": 2.356386184692383, + "355": 2.393049478530884, + "356": 2.3868567943573, + "357": 2.3960678577423096, + "358": 2.3528735637664795, + "359": 2.331757068634033, + "360": 2.3170411586761475, + "361": 2.3428142070770264, + "362": 2.3230068683624268, + "363": 2.351250410079956, + "364": 2.34890079498291, + "365": 2.3764071464538574, + "366": 2.2935149669647217, + "367": 2.337144374847412, + "368": 2.326967716217041, + "369": 2.349787950515747, + "370": 2.361410140991211, + "371": 2.4077606201171875, + "372": 2.349144697189331, + "373": 2.40689754486084, + "374": 2.3618967533111572, + "375": 2.355367660522461, + "376": 2.3666179180145264, + "377": 2.3410162925720215 + }, + "lr": { + "324": 0.215, + "325": 0.215, + "326": 0.215, + "327": 0.215, + "328": 0.215, + "329": 0.215, + "330": 0.215, + "331": 0.215, + "332": 0.215, + "333": 0.215, + "334": 0.215, + "335": 0.215, + "336": 0.215, + "337": 0.215, + "338": 0.215, + "339": 0.215, + "340": 0.215, + "341": 0.215, + "342": 0.215, + "343": 0.215, + "344": 0.215, + "345": 0.215, + "346": 0.215, + "347": 0.215, + "348": 0.215, + "349": 0.215, + "350": 0.215, + "351": 0.215, + "352": 0.215, + "353": 0.215, + "354": 0.215, + "355": 0.215, + "356": 0.215, + "357": 0.215, + "358": 0.215, + "359": 0.215, + "360": 0.215, + "361": 0.215, + "362": 0.215, + "363": 0.215, + "364": 0.215, + "365": 0.215, + "366": 0.215, + "367": 0.215, + "368": 0.215, + "369": 0.215, + "370": 0.215, + "371": 0.215, + "372": 0.215, + "373": 0.215, + "374": 0.215, + "375": 0.215, + "376": 0.215, + "377": 0.215 + } + }, + "step_size_list": [ + 0.196633, + 0.2002, + 0.204029, + 0.203451, + 0.203218, + 0.204491, + 0.204311, + 0.201232, + 0.198284, + 0.200068, + 0.203526, + 0.201661, + 0.201145, + 0.201252, + 0.201544, + 0.199938, + 0.201653, + 0.202474, + 0.203413, + 0.201609, + 0.195032, + 0.18933, + 0.192276, + 0.196829, + 0.199489, + 0.201387, + 0.204004, + 0.203427, + 0.200868, + 0.201146, + 0.201168, + 0.199611, + 0.197699, + 0.201375, + 0.204171, + 0.204361, + 0.203912, + 0.203325, + 0.203296, + 0.201729, + 0.199854, + 0.201932, + 0.204669, + 0.205376, + 0.203757, + 0.201481, + 0.194511, + 0.196463, + 0.195635, + 0.198643, + 0.195164, + 0.195328, + 0.199839, + 0.202781 + ], + "train_epoch_time": 4.844247579574585, + "train_loss": 2.331601727060129, + "train_score": 0.3248217809234493, + "val_loss": 2.379627516841779, + "val_score": 0.316186496126118 + }, + { + "epoch": 7, + "grad_norm": 1.1997441053390503, + "learning_rate": 0.215, + "model_norm": 87.59695434570312, + "step_logs": { + "grad_norm": { + "378": 1.3484280109405518, + "379": 1.3991032838821411, + "380": 1.3478426933288574, + "381": 1.436978816986084, + "382": 1.3199915885925293, + "383": 1.2213108539581299, + "384": 1.1229854822158813, + "385": 1.082373857498169, + "386": 1.0697990655899048, + "387": 1.146734595298767, + "388": 1.2959370613098145, + "389": 1.4532639980316162, + "390": 1.5401716232299805, + "391": 1.5075747966766357, + "392": 1.3791413307189941, + "393": 1.3971352577209473, + "394": 1.4198503494262695, + "395": 1.461948037147522, + "396": 1.371685266494751, + "397": 1.4157965183258057, + "398": 1.2385632991790771, + "399": 1.07338547706604, + "400": 1.1034038066864014, + "401": 1.126296043395996, + "402": 1.076011061668396, + "403": 1.0979341268539429, + "404": 1.1755038499832153, + "405": 1.176401972770691, + "406": 1.1992491483688354, + "407": 1.1118355989456177, + "408": 1.024641752243042, + "409": 1.0313398838043213, + "410": 1.1729899644851685, + "411": 1.3220034837722778, + "412": 1.275770664215088, + "413": 1.2952038049697876, + "414": 1.1650899648666382, + "415": 1.0833736658096313, + "416": 1.0968871116638184, + "417": 1.1617414951324463, + "418": 1.24214768409729, + "419": 1.406369686126709, + "420": 1.3495177030563354, + "421": 1.168258786201477, + "422": 1.0976866483688354, + "423": 1.0422807931900024, + "424": 1.0422507524490356, + "425": 1.169421911239624, + "426": 1.1457267999649048, + "427": 1.0372921228408813, + "428": 1.2229158878326416, + "429": 1.2568180561065674, + "430": 1.32062566280365, + "431": 1.1997441053390503 + }, + "loss": { + "378": 2.3507919311523438, + "379": 2.368234157562256, + "380": 2.349613666534424, + "381": 2.355195999145508, + "382": 2.3437561988830566, + "383": 2.3469855785369873, + "384": 2.31719970703125, + "385": 2.312052011489868, + "386": 2.3113436698913574, + "387": 2.295637607574463, + "388": 2.291583299636841, + "389": 2.346353530883789, + "390": 2.401693820953369, + "391": 2.352247714996338, + "392": 2.356387138366699, + "393": 2.3271470069885254, + "394": 2.369628667831421, + "395": 2.3422141075134277, + "396": 2.3602776527404785, + "397": 2.331357955932617, + "398": 2.349625587463379, + "399": 2.3089780807495117, + "400": 2.3075170516967773, + "401": 2.30657958984375, + "402": 2.3218934535980225, + "403": 2.3027148246765137, + "404": 2.342536449432373, + "405": 2.2852530479431152, + "406": 2.313821792602539, + "407": 2.309011459350586, + "408": 2.3086814880371094, + "409": 2.2737903594970703, + "410": 2.292351722717285, + "411": 2.317483425140381, + "412": 2.3365187644958496, + "413": 2.295337677001953, + "414": 2.3287415504455566, + "415": 2.2811338901519775, + "416": 2.3108086585998535, + "417": 2.298628330230713, + "418": 2.295947313308716, + "419": 2.320661783218384, + "420": 2.3425865173339844, + "421": 2.2825095653533936, + "422": 2.305323600769043, + "423": 2.279123067855835, + "424": 2.2789125442504883, + "425": 2.2769432067871094, + "426": 2.2836215496063232, + "427": 2.2680306434631348, + "428": 2.2916526794433594, + "429": 2.3019516468048096, + "430": 2.30794358253479, + "431": 2.2952566146850586 + }, + "lr": { + "378": 0.215, + "379": 0.215, + "380": 0.215, + "381": 0.215, + "382": 0.215, + "383": 0.215, + "384": 0.215, + "385": 0.215, + "386": 0.215, + "387": 0.215, + "388": 0.215, + "389": 0.215, + "390": 0.215, + "391": 0.215, + "392": 0.215, + "393": 0.215, + "394": 0.215, + "395": 0.215, + "396": 0.215, + "397": 0.215, + "398": 0.215, + "399": 0.215, + "400": 0.215, + "401": 0.215, + "402": 0.215, + "403": 0.215, + "404": 0.215, + "405": 0.215, + "406": 0.215, + "407": 0.215, + "408": 0.215, + "409": 0.215, + "410": 0.215, + "411": 0.215, + "412": 0.215, + "413": 0.215, + "414": 0.215, + "415": 0.215, + "416": 0.215, + "417": 0.215, + "418": 0.215, + "419": 0.215, + "420": 0.215, + "421": 0.215, + "422": 0.215, + "423": 0.215, + "424": 0.215, + "425": 0.215, + "426": 0.215, + "427": 0.215, + "428": 0.215, + "429": 0.215, + "430": 0.215, + "431": 0.215 + } + }, + "step_size_list": [ + 0.198496, + 0.197455, + 0.198501, + 0.196482, + 0.199089, + 0.20125, + 0.203117, + 0.203894, + 0.204134, + 0.202529, + 0.199298, + 0.196032, + 0.194363, + 0.19477, + 0.197834, + 0.197217, + 0.196985, + 0.195794, + 0.19803, + 0.196809, + 0.2009, + 0.204054, + 0.20346, + 0.202998, + 0.204061, + 0.203545, + 0.202179, + 0.201859, + 0.201534, + 0.2033, + 0.204979, + 0.204706, + 0.201968, + 0.198877, + 0.200022, + 0.199339, + 0.202322, + 0.203731, + 0.203604, + 0.202235, + 0.200514, + 0.196955, + 0.198417, + 0.202015, + 0.203562, + 0.20452, + 0.20452, + 0.20196, + 0.202487, + 0.204567, + 0.200906, + 0.20023, + 0.198847, + 0.201421 + ], + "train_epoch_time": 4.845145225524902, + "train_loss": 2.282484034452069, + "train_score": 0.3152248475353208, + "val_loss": 2.3400606446643923, + "val_score": 0.3001668338592236 + }, + { + "epoch": 8, + "grad_norm": 1.2059179544448853, + "learning_rate": 0.215, + "model_norm": 87.62821197509766, + "step_logs": { + "grad_norm": { + "432": 1.154915690422058, + "433": 1.2224822044372559, + "434": 1.1708451509475708, + "435": 1.1733704805374146, + "436": 1.1595726013183594, + "437": 1.1824321746826172, + "438": 1.3086246252059937, + "439": 1.2058050632476807, + "440": 1.0663050413131714, + "441": 1.2326864004135132, + "442": 1.2345457077026367, + "443": 1.1186063289642334, + "444": 1.098375678062439, + "445": 1.1730140447616577, + "446": 1.1214861869812012, + "447": 1.0590879917144775, + "448": 0.9779362082481384, + "449": 0.9481033682823181, + "450": 1.04584801197052, + "451": 1.252267837524414, + "452": 1.6064000129699707, + "453": 1.492759346961975, + "454": 1.1854383945465088, + "455": 1.2818635702133179, + "456": 1.5860165357589722, + "457": 1.3650422096252441, + "458": 1.0735995769500732, + "459": 1.0504001379013062, + "460": 1.329114556312561, + "461": 1.4292060136795044, + "462": 1.2894105911254883, + "463": 1.1807360649108887, + "464": 1.2821755409240723, + "465": 1.2632641792297363, + "466": 1.2759512662887573, + "467": 1.2425559759140015, + "468": 1.1334298849105835, + "469": 1.1011444330215454, + "470": 1.0455904006958008, + "471": 1.1691581010818481, + "472": 1.2184985876083374, + "473": 1.2237861156463623, + "474": 1.2811400890350342, + "475": 1.1922094821929932, + "476": 1.069293737411499, + "477": 1.1363435983657837, + "478": 1.1625710725784302, + "479": 1.167516827583313, + "480": 1.1135964393615723, + "481": 1.115341305732727, + "482": 1.1914392709732056, + "483": 1.310146450996399, + "484": 1.2625014781951904, + "485": 1.2059179544448853 + }, + "loss": { + "432": 2.306880474090576, + "433": 2.2761852741241455, + "434": 2.3028855323791504, + "435": 2.2981693744659424, + "436": 2.29217529296875, + "437": 2.2850661277770996, + "438": 2.257185459136963, + "439": 2.294344186782837, + "440": 2.2642316818237305, + "441": 2.253075122833252, + "442": 2.3124477863311768, + "443": 2.2656140327453613, + "444": 2.2863221168518066, + "445": 2.2782537937164307, + "446": 2.269493579864502, + "447": 2.2647018432617188, + "448": 2.2654755115509033, + "449": 2.2519307136535645, + "450": 2.242961883544922, + "451": 2.2560482025146484, + "452": 2.2995927333831787, + "453": 2.3216710090637207, + "454": 2.2739310264587402, + "455": 2.2942895889282227, + "456": 2.282039165496826, + "457": 2.334156036376953, + "458": 2.2432713508605957, + "459": 2.2499780654907227, + "460": 2.242152690887451, + "461": 2.321077585220337, + "462": 2.2890729904174805, + "463": 2.261343002319336, + "464": 2.257908344268799, + "465": 2.269758462905884, + "466": 2.2366390228271484, + "467": 2.2978789806365967, + "468": 2.2593274116516113, + "469": 2.2600831985473633, + "470": 2.2695047855377197, + "471": 2.2583765983581543, + "472": 2.2594573497772217, + "473": 2.260195732116699, + "474": 2.241128921508789, + "475": 2.2822160720825195, + "476": 2.2174103260040283, + "477": 2.2257683277130127, + "478": 2.252816915512085, + "479": 2.252662181854248, + "480": 2.2483038902282715, + "481": 2.2260961532592773, + "482": 2.259723424911499, + "483": 2.2424588203430176, + "484": 2.248912811279297, + "485": 2.243582248687744 + }, + "lr": { + "432": 0.215, + "433": 0.215, + "434": 0.215, + "435": 0.215, + "436": 0.215, + "437": 0.215, + "438": 0.215, + "439": 0.215, + "440": 0.215, + "441": 0.215, + "442": 0.215, + "443": 0.215, + "444": 0.215, + "445": 0.215, + "446": 0.215, + "447": 0.215, + "448": 0.215, + "449": 0.215, + "450": 0.215, + "451": 0.215, + "452": 0.215, + "453": 0.215, + "454": 0.215, + "455": 0.215, + "456": 0.215, + "457": 0.215, + "458": 0.215, + "459": 0.215, + "460": 0.215, + "461": 0.215, + "462": 0.215, + "463": 0.215, + "464": 0.215, + "465": 0.215, + "466": 0.215, + "467": 0.215, + "468": 0.215, + "469": 0.215, + "470": 0.215, + "471": 0.215, + "472": 0.215, + "473": 0.215, + "474": 0.215, + "475": 0.215, + "476": 0.215, + "477": 0.215, + "478": 0.215, + "479": 0.215, + "480": 0.215, + "481": 0.215, + "482": 0.215, + "483": 0.215, + "484": 0.215, + "485": 0.215 + } + }, + "step_size_list": [ + 0.202418, + 0.200826, + 0.202069, + 0.201991, + 0.202246, + 0.201731, + 0.198787, + 0.201287, + 0.203988, + 0.200466, + 0.200775, + 0.202951, + 0.203459, + 0.201892, + 0.202911, + 0.204131, + 0.205667, + 0.206154, + 0.20429, + 0.200052, + 0.191856, + 0.194891, + 0.201607, + 0.19963, + 0.192223, + 0.198008, + 0.203746, + 0.204234, + 0.198212, + 0.196418, + 0.199429, + 0.201637, + 0.199393, + 0.199892, + 0.199397, + 0.200517, + 0.202615, + 0.203276, + 0.204414, + 0.201865, + 0.200814, + 0.200704, + 0.199309, + 0.201509, + 0.203708, + 0.202378, + 0.201974, + 0.201869, + 0.202965, + 0.202816, + 0.201399, + 0.198654, + 0.199779, + 0.200995 + ], + "train_epoch_time": 4.844937801361084, + "train_loss": 2.249316280917766, + "train_score": 0.3493364419023823, + "val_loss": 2.307672290933392, + "val_score": 0.3339014068742023 + }, + { + "epoch": 9, + "grad_norm": 1.286625862121582, + "learning_rate": 0.215, + "model_norm": 87.65879821777344, + "step_logs": { + "grad_norm": { + "486": 1.2811017036437988, + "487": 1.3381850719451904, + "488": 1.2197372913360596, + "489": 1.3329378366470337, + "490": 1.308487057685852, + "491": 1.2579445838928223, + "492": 1.291610836982727, + "493": 1.2727007865905762, + "494": 1.0571719408035278, + "495": 0.931128740310669, + "496": 0.9739916920661926, + "497": 1.1750648021697998, + "498": 1.3887135982513428, + "499": 1.6758337020874023, + "500": 1.4978593587875366, + "501": 1.3222256898880005, + "502": 1.4410287141799927, + "503": 1.1668554544448853, + "504": 1.191144347190857, + "505": 1.255637764930725, + "506": 1.1259422302246094, + "507": 1.1139298677444458, + "508": 1.0737437009811401, + "509": 1.1874620914459229, + "510": 1.1882264614105225, + "511": 1.1086351871490479, + "512": 1.03429114818573, + "513": 1.0379016399383545, + "514": 1.0770936012268066, + "515": 1.1197972297668457, + "516": 1.1270424127578735, + "517": 1.1074938774108887, + "518": 1.2652866840362549, + "519": 1.5173068046569824, + "520": 1.2729284763336182, + "521": 1.2219595909118652, + "522": 1.2521469593048096, + "523": 1.2389447689056396, + "524": 1.3149099349975586, + "525": 1.2944488525390625, + "526": 1.1764169931411743, + "527": 1.2254124879837036, + "528": 1.2436047792434692, + "529": 1.1616783142089844, + "530": 1.1216086149215698, + "531": 1.1146761178970337, + "532": 1.1473681926727295, + "533": 1.1905994415283203, + "534": 1.2116689682006836, + "535": 1.1863447427749634, + "536": 1.06602144241333, + "537": 1.0756251811981201, + "538": 1.1655296087265015, + "539": 1.286625862121582 + }, + "loss": { + "486": 2.263685703277588, + "487": 2.27846622467041, + "488": 2.246555805206299, + "489": 2.237314462661743, + "490": 2.2342114448547363, + "491": 2.255732536315918, + "492": 2.261526107788086, + "493": 2.274289131164551, + "494": 2.20943546295166, + "495": 2.2043192386627197, + "496": 2.227146625518799, + "497": 2.228538990020752, + "498": 2.2645938396453857, + "499": 2.265127182006836, + "500": 2.301616668701172, + "501": 2.262078046798706, + "502": 2.2398734092712402, + "503": 2.2471923828125, + "504": 2.20629620552063, + "505": 2.246720552444458, + "506": 2.226177215576172, + "507": 2.1992220878601074, + "508": 2.2379846572875977, + "509": 2.2320992946624756, + "510": 2.228458881378174, + "511": 2.2272465229034424, + "512": 2.2293572425842285, + "513": 2.2162818908691406, + "514": 2.2113728523254395, + "515": 2.2047367095947266, + "516": 2.2201035022735596, + "517": 2.209986686706543, + "518": 2.2332091331481934, + "519": 2.2668092250823975, + "520": 2.2332048416137695, + "521": 2.256260871887207, + "522": 2.2383127212524414, + "523": 2.253067970275879, + "524": 2.256917953491211, + "525": 2.244154691696167, + "526": 2.2304253578186035, + "527": 2.211859941482544, + "528": 2.212076187133789, + "529": 2.2092349529266357, + "530": 2.215790271759033, + "531": 2.1990294456481934, + "532": 2.2107343673706055, + "533": 2.223140001296997, + "534": 2.229137659072876, + "535": 2.23358154296875, + "536": 2.225706100463867, + "537": 2.1915171146392822, + "538": 2.1892237663269043, + "539": 2.2547144889831543 + }, + "lr": { + "486": 0.215, + "487": 0.215, + "488": 0.215, + "489": 0.215, + "490": 0.215, + "491": 0.215, + "492": 0.215, + "493": 0.215, + "494": 0.215, + "495": 0.215, + "496": 0.215, + "497": 0.215, + "498": 0.215, + "499": 0.215, + "500": 0.215, + "501": 0.215, + "502": 0.215, + "503": 0.215, + "504": 0.215, + "505": 0.215, + "506": 0.215, + "507": 0.215, + "508": 0.215, + "509": 0.215, + "510": 0.215, + "511": 0.215, + "512": 0.215, + "513": 0.215, + "514": 0.215, + "515": 0.215, + "516": 0.215, + "517": 0.215, + "518": 0.215, + "519": 0.215, + "520": 0.215, + "521": 0.215, + "522": 0.215, + "523": 0.215, + "524": 0.215, + "525": 0.215, + "526": 0.215, + "527": 0.215, + "528": 0.215, + "529": 0.215, + "530": 0.215, + "531": 0.215, + "532": 0.215, + "533": 0.215, + "534": 0.215, + "535": 0.215, + "536": 0.215, + "537": 0.215, + "538": 0.215, + "539": 0.215 + } + }, + "step_size_list": [ + 0.199455, + 0.19825, + 0.200711, + 0.198089, + 0.198636, + 0.199923, + 0.199203, + 0.19971, + 0.203912, + 0.206278, + 0.205586, + 0.201574, + 0.196968, + 0.189714, + 0.194607, + 0.198507, + 0.195515, + 0.201853, + 0.201098, + 0.199919, + 0.202597, + 0.202705, + 0.203718, + 0.201328, + 0.20129, + 0.20296, + 0.204453, + 0.204324, + 0.203522, + 0.202612, + 0.202542, + 0.202895, + 0.199617, + 0.193837, + 0.199444, + 0.20072, + 0.199944, + 0.200328, + 0.198641, + 0.199025, + 0.201556, + 0.200376, + 0.199971, + 0.201752, + 0.202633, + 0.202689, + 0.202065, + 0.201208, + 0.200784, + 0.20136, + 0.203813, + 0.203453, + 0.201555, + 0.199272 + ], + "train_epoch_time": 4.84569525718689, + "train_loss": 2.228900132459753, + "train_score": 0.33784522952958196, + "val_loss": 2.2953676372663847, + "val_score": 0.3241425087818733 + }, + { + "epoch": 10, + "grad_norm": 1.5282254219055176, + "learning_rate": 0.215, + "model_norm": 87.6897201538086, + "step_logs": { + "grad_norm": { + "540": 1.3236253261566162, + "541": 1.2120963335037231, + "542": 1.3293075561523438, + "543": 1.2659614086151123, + "544": 1.2643650770187378, + "545": 1.2919769287109375, + "546": 1.3321635723114014, + "547": 1.2872507572174072, + "548": 1.1092441082000732, + "549": 1.2507811784744263, + "550": 1.2238914966583252, + "551": 1.1918277740478516, + "552": 1.3765634298324585, + "553": 1.4541363716125488, + "554": 1.3285415172576904, + "555": 1.2294840812683105, + "556": 1.0333061218261719, + "557": 0.9750503897666931, + "558": 1.0579694509506226, + "559": 1.1781774759292603, + "560": 1.0836007595062256, + "561": 1.0941779613494873, + "562": 1.1210981607437134, + "563": 1.170119047164917, + "564": 1.2395886182785034, + "565": 1.2561867237091064, + "566": 1.2560819387435913, + "567": 1.234580159187317, + "568": 1.1695517301559448, + "569": 1.204097032546997, + "570": 1.2253378629684448, + "571": 1.235628604888916, + "572": 1.2304635047912598, + "573": 1.261124610900879, + "574": 1.1713411808013916, + "575": 1.154990553855896, + "576": 1.1073288917541504, + "577": 1.108127236366272, + "578": 1.1391375064849854, + "579": 1.1087826490402222, + "580": 1.0753718614578247, + "581": 1.0891958475112915, + "582": 1.1358745098114014, + "583": 1.29420006275177, + "584": 1.4496811628341675, + "585": 1.3629112243652344, + "586": 1.2241178750991821, + "587": 1.6359233856201172, + "588": 1.7339699268341064, + "589": 2.3059768676757812, + "590": 1.75924813747406, + "591": 1.3533680438995361, + "592": 1.40185546875, + "593": 1.5282254219055176 + }, + "loss": { + "540": 2.24470591545105, + "541": 2.2166566848754883, + "542": 2.202448844909668, + "543": 2.2326884269714355, + "544": 2.1964781284332275, + "545": 2.2362253665924072, + "546": 2.237544059753418, + "547": 2.2418558597564697, + "548": 2.19227933883667, + "549": 2.1998894214630127, + "550": 2.2158803939819336, + "551": 2.2009477615356445, + "552": 2.218970775604248, + "553": 2.254849910736084, + "554": 2.222987174987793, + "555": 2.2264702320098877, + "556": 2.2049458026885986, + "557": 2.1714818477630615, + "558": 2.1553432941436768, + "559": 2.1979007720947266, + "560": 2.191707134246826, + "561": 2.1716389656066895, + "562": 2.214081048965454, + "563": 2.2235708236694336, + "564": 2.200239658355713, + "565": 2.190506935119629, + "566": 2.224316120147705, + "567": 2.1955504417419434, + "568": 2.181828022003174, + "569": 2.1953487396240234, + "570": 2.2270801067352295, + "571": 2.1956965923309326, + "572": 2.197404384613037, + "573": 2.172541618347168, + "574": 2.1874992847442627, + "575": 2.194261074066162, + "576": 2.159250259399414, + "577": 2.166497230529785, + "578": 2.174556016921997, + "579": 2.1848740577697754, + "580": 2.1783227920532227, + "581": 2.1724984645843506, + "582": 2.157999038696289, + "583": 2.1706554889678955, + "584": 2.2155213356018066, + "585": 2.1973178386688232, + "586": 2.200434684753418, + "587": 2.2382147312164307, + "588": 2.245429039001465, + "589": 2.2540335655212402, + "590": 2.3192644119262695, + "591": 2.239567279815674, + "592": 2.198662757873535, + "593": 2.2130331993103027 + }, + "lr": { + "540": 0.215, + "541": 0.215, + "542": 0.215, + "543": 0.215, + "544": 0.215, + "545": 0.215, + "546": 0.215, + "547": 0.215, + "548": 0.215, + "549": 0.215, + "550": 0.215, + "551": 0.215, + "552": 0.215, + "553": 0.215, + "554": 0.215, + "555": 0.215, + "556": 0.215, + "557": 0.215, + "558": 0.215, + "559": 0.215, + "560": 0.215, + "561": 0.215, + "562": 0.215, + "563": 0.215, + "564": 0.215, + "565": 0.215, + "566": 0.215, + "567": 0.215, + "568": 0.215, + "569": 0.215, + "570": 0.215, + "571": 0.215, + "572": 0.215, + "573": 0.215, + "574": 0.215, + "575": 0.215, + "576": 0.215, + "577": 0.215, + "578": 0.215, + "579": 0.215, + "580": 0.215, + "581": 0.215, + "582": 0.215, + "583": 0.215, + "584": 0.215, + "585": 0.215, + "586": 0.215, + "587": 0.215, + "588": 0.215, + "589": 0.215, + "590": 0.215, + "591": 0.215, + "592": 0.215, + "593": 0.215 + } + }, + "step_size_list": [ + 0.198357, + 0.2007, + 0.197929, + 0.199598, + 0.199399, + 0.199029, + 0.198109, + 0.199174, + 0.202766, + 0.199731, + 0.200435, + 0.201051, + 0.196922, + 0.195311, + 0.198092, + 0.200375, + 0.204362, + 0.205336, + 0.203632, + 0.201331, + 0.203292, + 0.202971, + 0.202634, + 0.201652, + 0.199986, + 0.199547, + 0.199767, + 0.200069, + 0.201425, + 0.200748, + 0.200471, + 0.200046, + 0.200173, + 0.199315, + 0.201419, + 0.201811, + 0.20263, + 0.202652, + 0.202039, + 0.202737, + 0.203393, + 0.203079, + 0.202016, + 0.198532, + 0.195105, + 0.197089, + 0.200334, + 0.190512, + 0.187946, + 0.171505, + 0.188027, + 0.197625, + 0.196153, + 0.193094 + ], + "train_epoch_time": 4.844839334487915, + "train_loss": 2.207936185264861, + "train_score": 0.34560729015506325, + "val_loss": 2.2938550016773283, + "val_score": 0.3282012413892078 + }, + { + "epoch": 11, + "grad_norm": 1.0767444372177124, + "learning_rate": 0.215, + "model_norm": 87.72090911865234, + "step_logs": { + "grad_norm": { + "594": 1.1823039054870605, + "595": 1.1631243228912354, + "596": 1.4852553606033325, + "597": 1.4170695543289185, + "598": 1.2073761224746704, + "599": 1.1509628295898438, + "600": 1.1863006353378296, + "601": 1.2522052526474, + "602": 1.1972665786743164, + "603": 1.1848161220550537, + "604": 1.1319524049758911, + "605": 1.1472547054290771, + "606": 1.1560578346252441, + "607": 1.110656499862671, + "608": 1.2006510496139526, + "609": 1.3080259561538696, + "610": 1.3089104890823364, + "611": 1.1568255424499512, + "612": 1.0831385850906372, + "613": 1.1552590131759644, + "614": 1.2013462781906128, + "615": 1.205397129058838, + "616": 1.1810394525527954, + "617": 1.1056616306304932, + "618": 1.2109280824661255, + "619": 1.2529412508010864, + "620": 1.120617389678955, + "621": 1.0886765718460083, + "622": 1.2116297483444214, + "623": 1.2842867374420166, + "624": 1.365793228149414, + "625": 1.452988862991333, + "626": 1.2926194667816162, + "627": 1.2699536085128784, + "628": 1.1802414655685425, + "629": 1.1245898008346558, + "630": 1.1074035167694092, + "631": 1.1504571437835693, + "632": 1.1566340923309326, + "633": 1.2103692293167114, + "634": 1.169101357460022, + "635": 1.094599962234497, + "636": 1.107570767402649, + "637": 1.125802993774414, + "638": 0.9927701950073242, + "639": 1.0007109642028809, + "640": 1.0888326168060303, + "641": 1.1877045631408691, + "642": 1.2481164932250977, + "643": 1.295889973640442, + "644": 1.289625883102417, + "645": 1.1353554725646973, + "646": 1.0083565711975098, + "647": 1.0767444372177124 + }, + "loss": { + "594": 2.1937105655670166, + "595": 2.169579029083252, + "596": 2.219860076904297, + "597": 2.2350001335144043, + "598": 2.1906545162200928, + "599": 2.1421122550964355, + "600": 2.1757922172546387, + "601": 2.1895949840545654, + "602": 2.176201343536377, + "603": 2.1726856231689453, + "604": 2.1687235832214355, + "605": 2.186079978942871, + "606": 2.1864013671875, + "607": 2.189131021499634, + "608": 2.1774678230285645, + "609": 2.168029308319092, + "610": 2.174156665802002, + "611": 2.191075325012207, + "612": 2.146369695663452, + "613": 2.1871626377105713, + "614": 2.165951728820801, + "615": 2.1848959922790527, + "616": 2.2014527320861816, + "617": 2.1646289825439453, + "618": 2.1827850341796875, + "619": 2.1788363456726074, + "620": 2.1607096195220947, + "621": 2.1529273986816406, + "622": 2.163221597671509, + "623": 2.1707754135131836, + "624": 2.1937742233276367, + "625": 2.2061731815338135, + "626": 2.1780202388763428, + "627": 2.159313440322876, + "628": 2.169443130493164, + "629": 2.1191844940185547, + "630": 2.160952568054199, + "631": 2.1320996284484863, + "632": 2.1442203521728516, + "633": 2.1720244884490967, + "634": 2.164599657058716, + "635": 2.133831024169922, + "636": 2.149535655975342, + "637": 2.169243335723877, + "638": 2.165787935256958, + "639": 2.1171839237213135, + "640": 2.154299020767212, + "641": 2.138416290283203, + "642": 2.1590371131896973, + "643": 2.18825364112854, + "644": 2.165739059448242, + "645": 2.1778724193573, + "646": 2.1531286239624023, + "647": 2.160839080810547 + }, + "lr": { + "594": 0.215, + "595": 0.215, + "596": 0.215, + "597": 0.215, + "598": 0.215, + "599": 0.215, + "600": 0.215, + "601": 0.215, + "602": 0.215, + "603": 0.215, + "604": 0.215, + "605": 0.215, + "606": 0.215, + "607": 0.215, + "608": 0.215, + "609": 0.215, + "610": 0.215, + "611": 0.215, + "612": 0.215, + "613": 0.215, + "614": 0.215, + "615": 0.215, + "616": 0.215, + "617": 0.215, + "618": 0.215, + "619": 0.215, + "620": 0.215, + "621": 0.215, + "622": 0.215, + "623": 0.215, + "624": 0.215, + "625": 0.215, + "626": 0.215, + "627": 0.215, + "628": 0.215, + "629": 0.215, + "630": 0.215, + "631": 0.215, + "632": 0.215, + "633": 0.215, + "634": 0.215, + "635": 0.215, + "636": 0.215, + "637": 0.215, + "638": 0.215, + "639": 0.215, + "640": 0.215, + "641": 0.215, + "642": 0.215, + "643": 0.215, + "644": 0.215, + "645": 0.215, + "646": 0.215, + "647": 0.215 + } + }, + "step_size_list": [ + 0.201217, + 0.201493, + 0.194249, + 0.196063, + 0.200647, + 0.201598, + 0.201023, + 0.199632, + 0.200783, + 0.201037, + 0.20216, + 0.20193, + 0.201743, + 0.20272, + 0.200715, + 0.198187, + 0.19821, + 0.201753, + 0.203068, + 0.201765, + 0.200629, + 0.200655, + 0.20129, + 0.202694, + 0.200519, + 0.199544, + 0.202357, + 0.202987, + 0.200381, + 0.198765, + 0.196993, + 0.194946, + 0.19862, + 0.19902, + 0.201118, + 0.202038, + 0.202638, + 0.20155, + 0.201486, + 0.200465, + 0.201334, + 0.202761, + 0.202572, + 0.202294, + 0.204973, + 0.204597, + 0.202991, + 0.200763, + 0.199524, + 0.198615, + 0.198605, + 0.202139, + 0.204613, + 0.203275 + ], + "train_epoch_time": 4.845956087112427, + "train_loss": 2.150563732897021, + "train_score": 0.36072229200832473, + "val_loss": 2.238299369538282, + "val_score": 0.3406599821986889 + }, + { + "epoch": 12, + "grad_norm": 0.7353389263153076, + "learning_rate": 0.215, + "model_norm": 87.7488021850586, + "step_logs": { + "grad_norm": { + "648": 1.2911244630813599, + "649": 1.3675874471664429, + "650": 1.2735424041748047, + "651": 1.2170828580856323, + "652": 1.1520575284957886, + "653": 1.1016744375228882, + "654": 1.1052532196044922, + "655": 1.123640775680542, + "656": 1.0669153928756714, + "657": 1.0239628553390503, + "658": 1.1762434244155884, + "659": 1.109971523284912, + "660": 1.0068074464797974, + "661": 1.0006115436553955, + "662": 1.0679601430892944, + "663": 1.1093448400497437, + "664": 1.044920802116394, + "665": 1.0257349014282227, + "666": 1.1455438137054443, + "667": 1.2184151411056519, + "668": 1.3766710758209229, + "669": 1.2604378461837769, + "670": 1.1541317701339722, + "671": 1.0473637580871582, + "672": 1.0855695009231567, + "673": 1.1077187061309814, + "674": 1.0599641799926758, + "675": 1.080422043800354, + "676": 1.077756404876709, + "677": 1.037529468536377, + "678": 0.9889176487922668, + "679": 0.9151387810707092, + "680": 0.9786506295204163, + "681": 0.8844283223152161, + "682": 0.8383657336235046, + "683": 0.8414342403411865, + "684": 0.8716306090354919, + "685": 0.9330067038536072, + "686": 0.9714165925979614, + "687": 0.8918057084083557, + "688": 0.9009073972702026, + "689": 0.9107198119163513, + "690": 0.953681230545044, + "691": 0.8878958225250244, + "692": 0.8807072639465332, + "693": 0.8689160943031311, + "694": 0.9305926561355591, + "695": 0.9528166651725769, + "696": 0.9149192571640015, + "697": 0.8647100329399109, + "698": 0.843787431716919, + "699": 0.8242149949073792, + "700": 0.7857349514961243, + "701": 0.7353389263153076 + }, + "loss": { + "648": 2.1395187377929688, + "649": 2.1873302459716797, + "650": 2.1630945205688477, + "651": 2.154590368270874, + "652": 2.163867950439453, + "653": 2.1423096656799316, + "654": 2.119457244873047, + "655": 2.1374893188476562, + "656": 2.1446125507354736, + "657": 2.134413003921509, + "658": 2.1253976821899414, + "659": 2.1292295455932617, + "660": 2.107851505279541, + "661": 2.1307358741760254, + "662": 2.1395468711853027, + "663": 2.139540672302246, + "664": 2.125692367553711, + "665": 2.1287403106689453, + "666": 2.128596782684326, + "667": 2.114706039428711, + "668": 2.128831386566162, + "669": 2.1656930446624756, + "670": 2.12746524810791, + "671": 2.102405071258545, + "672": 2.1138434410095215, + "673": 2.146491527557373, + "674": 2.121488571166992, + "675": 2.104985475540161, + "676": 2.1472997665405273, + "677": 2.1106696128845215, + "678": 2.112091064453125, + "679": 2.1224775314331055, + "680": 2.10707688331604, + "681": 2.079653263092041, + "682": 2.100478172302246, + "683": 2.10656476020813, + "684": 2.099215507507324, + "685": 2.096406936645508, + "686": 2.096055507659912, + "687": 2.0820322036743164, + "688": 2.0983026027679443, + "689": 2.0865349769592285, + "690": 2.0767717361450195, + "691": 2.0998764038085938, + "692": 2.062168598175049, + "693": 2.0599799156188965, + "694": 2.0793333053588867, + "695": 2.075690269470215, + "696": 2.092489242553711, + "697": 2.0711312294006348, + "698": 2.0643420219421387, + "699": 2.0595874786376953, + "700": 2.0784080028533936, + "701": 2.068653106689453 + }, + "lr": { + "648": 0.215, + "649": 0.21367283950617283, + "650": 0.21234567901234566, + "651": 0.21101851851851852, + "652": 0.20969135802469135, + "653": 0.2083641975308642, + "654": 0.20703703703703705, + "655": 0.20570987654320988, + "656": 0.2043827160493827, + "657": 0.20305555555555554, + "658": 0.20172839506172838, + "659": 0.20040123456790124, + "660": 0.19907407407407407, + "661": 0.1977469135802469, + "662": 0.19641975308641976, + "663": 0.1950925925925926, + "664": 0.19376543209876543, + "665": 0.19243827160493826, + "666": 0.1911111111111111, + "667": 0.18978395061728395, + "668": 0.18845679012345679, + "669": 0.18712962962962962, + "670": 0.18580246913580248, + "671": 0.1844753086419753, + "672": 0.18314814814814814, + "673": 0.18182098765432098, + "674": 0.1804938271604938, + "675": 0.17916666666666667, + "676": 0.1778395061728395, + "677": 0.17651234567901233, + "678": 0.1751851851851852, + "679": 0.17385802469135803, + "680": 0.17253086419753086, + "681": 0.1712037037037037, + "682": 0.16987654320987652, + "683": 0.16854938271604938, + "684": 0.16722222222222222, + "685": 0.16589506172839505, + "686": 0.1645679012345679, + "687": 0.16324074074074074, + "688": 0.16191358024691357, + "689": 0.1605864197530864, + "690": 0.15925925925925924, + "691": 0.15793209876543207, + "692": 0.15660493827160493, + "693": 0.15527777777777776, + "694": 0.15395061728395062, + "695": 0.15262345679012346, + "696": 0.1512962962962963, + "697": 0.14996913580246912, + "698": 0.14864197530864195, + "699": 0.1473148148148148, + "700": 0.14598765432098765, + "701": 0.14466049382716048 + } + }, + "step_size_list": [ + 0.198384, + 0.195787, + 0.196687, + 0.196747, + 0.197021, + 0.196751, + 0.19538, + 0.193928, + 0.193867, + 0.193409, + 0.189299, + 0.189419, + 0.18998, + 0.188967, + 0.186648, + 0.184728, + 0.18458, + 0.183702, + 0.180479, + 0.177931, + 0.173871, + 0.175111, + 0.175589, + 0.176005, + 0.174252, + 0.172839, + 0.172261, + 0.170687, + 0.169678, + 0.168909, + 0.168357, + 0.168092, + 0.166021, + 0.165863, + 0.165182, + 0.163907, + 0.162311, + 0.160371, + 0.158689, + 0.158305, + 0.156997, + 0.155619, + 0.153893, + 0.153385, + 0.152125, + 0.150981, + 0.149168, + 0.147694, + 0.146852, + 0.146016, + 0.144927, + 0.143821, + 0.142889, + 0.141976 + ], + "train_epoch_time": 4.845593690872192, + "train_loss": 2.0632568827318494, + "train_score": 0.3854891499111608, + "val_loss": 2.1587396522066484, + "val_score": 0.3612092782996689 + }, + { + "epoch": 13, + "grad_norm": 0.634152352809906, + "learning_rate": 0.14333333333333334, + "model_norm": 87.76688385009766, + "step_logs": { + "grad_norm": { + "702": 0.7698751091957092, + "703": 0.8198410272598267, + "704": 0.9168505668640137, + "705": 0.9296581149101257, + "706": 0.8759832382202148, + "707": 0.8478113412857056, + "708": 0.8359658122062683, + "709": 0.8174706697463989, + "710": 0.791767418384552, + "711": 0.7542242407798767, + "712": 0.7842721939086914, + "713": 0.7353886961936951, + "714": 0.7139715552330017, + "715": 0.7196046113967896, + "716": 0.6799511909484863, + "717": 0.6413664221763611, + "718": 0.5351870059967041, + "719": 0.558090329170227, + "720": 0.580518364906311, + "721": 0.6409737467765808, + "722": 0.7095553874969482, + "723": 0.794053852558136, + "724": 0.988107442855835, + "725": 0.9364408254623413, + "726": 0.799116849899292, + "727": 0.7245419025421143, + "728": 0.7226298451423645, + "729": 0.7147102952003479, + "730": 0.6701464056968689, + "731": 0.6959460377693176, + "732": 0.6833547949790955, + "733": 0.6730333566665649, + "734": 0.6496109962463379, + "735": 0.641011655330658, + "736": 0.6611383557319641, + "737": 0.7568486928939819, + "738": 0.7157216668128967, + "739": 0.6155111789703369, + "740": 0.5869841575622559, + "741": 0.5859789848327637, + "742": 0.613866925239563, + "743": 0.5982912182807922, + "744": 0.6077699661254883, + "745": 0.5792810320854187, + "746": 0.6071605682373047, + "747": 0.5650544762611389, + "748": 0.6018205881118774, + "749": 0.6394475102424622, + "750": 0.6425354480743408, + "751": 0.6009455323219299, + "752": 0.5385817885398865, + "753": 0.5594043731689453, + "754": 0.5386627912521362, + "755": 0.634152352809906 + }, + "loss": { + "702": 2.0609946250915527, + "703": 2.0661439895629883, + "704": 2.072573184967041, + "705": 2.0751543045043945, + "706": 2.062167167663574, + "707": 2.064584255218506, + "708": 2.059007167816162, + "709": 2.0824217796325684, + "710": 2.0671615600585938, + "711": 2.0581789016723633, + "712": 2.040334939956665, + "713": 2.0633294582366943, + "714": 2.042848587036133, + "715": 2.079887866973877, + "716": 2.0341079235076904, + "717": 2.014914035797119, + "718": 2.037348747253418, + "719": 2.0633087158203125, + "720": 2.0116822719573975, + "721": 2.0289316177368164, + "722": 2.0564358234405518, + "723": 2.0336766242980957, + "724": 2.057720899581909, + "725": 2.0924949645996094, + "726": 2.0487751960754395, + "727": 2.045595645904541, + "728": 2.0411906242370605, + "729": 2.0441477298736572, + "730": 2.0299253463745117, + "731": 2.061331272125244, + "732": 2.0285091400146484, + "733": 2.040985345840454, + "734": 2.0330429077148438, + "735": 2.062788963317871, + "736": 2.0251986980438232, + "737": 2.0254530906677246, + "738": 2.047938585281372, + "739": 2.0503218173980713, + "740": 2.0282864570617676, + "741": 2.0657505989074707, + "742": 2.020443916320801, + "743": 2.0209243297576904, + "744": 2.0356192588806152, + "745": 2.0202062129974365, + "746": 2.0494089126586914, + "747": 2.032576560974121, + "748": 2.0152640342712402, + "749": 2.0075504779815674, + "750": 2.0470235347747803, + "751": 2.0157077312469482, + "752": 2.04846453666687, + "753": 2.0420918464660645, + "754": 2.0340828895568848, + "755": 2.0400009155273438 + }, + "lr": { + "702": 0.14333333333333334, + "703": 0.14200617283950617, + "704": 0.140679012345679, + "705": 0.13935185185185184, + "706": 0.13802469135802467, + "707": 0.1366975308641975, + "708": 0.13537037037037036, + "709": 0.1340432098765432, + "710": 0.13271604938271606, + "711": 0.1313888888888889, + "712": 0.13006172839506172, + "713": 0.12873456790123455, + "714": 0.12740740740740739, + "715": 0.12608024691358022, + "716": 0.12475308641975309, + "717": 0.12342592592592593, + "718": 0.12209876543209879, + "719": 0.12077160493827162, + "720": 0.11944444444444445, + "721": 0.11811728395061728, + "722": 0.11679012345679012, + "723": 0.11546296296296295, + "724": 0.11413580246913581, + "725": 0.11280864197530864, + "726": 0.1114814814814815, + "727": 0.11015432098765433, + "728": 0.10882716049382717, + "729": 0.1075, + "730": 0.10617283950617283, + "731": 0.10484567901234566, + "732": 0.10351851851851852, + "733": 0.10219135802469136, + "734": 0.10086419753086419, + "735": 0.09953703703703702, + "736": 0.09820987654320988, + "737": 0.09688271604938271, + "738": 0.09555555555555555, + "739": 0.09422839506172838, + "740": 0.09290123456790124, + "741": 0.09157407407407407, + "742": 0.0902469135802469, + "743": 0.08891975308641974, + "744": 0.0875925925925926, + "745": 0.08626543209876543, + "746": 0.08493827160493826, + "747": 0.0836111111111111, + "748": 0.08228395061728395, + "749": 0.08095679012345679, + "750": 0.07962962962962962, + "751": 0.07830246913580245, + "752": 0.07697530864197531, + "753": 0.07564814814814814, + "754": 0.07432098765432098, + "755": 0.07299382716049381 + } + }, + "step_size_list": [ + 0.140439, + 0.1388, + 0.136777, + 0.135422, + 0.134569, + 0.13352, + 0.13233, + 0.131221, + 0.130098, + 0.129046, + 0.127561, + 0.126599, + 0.125414, + 0.124132, + 0.123009, + 0.12189, + 0.12106, + 0.119681, + 0.118261, + 0.116721, + 0.115144, + 0.113433, + 0.111127, + 0.110204, + 0.109578, + 0.108619, + 0.107333, + 0.106075, + 0.10494, + 0.10357, + 0.1023, + 0.101045, + 0.0998193, + 0.09856, + 0.0971799, + 0.0955734, + 0.0944271, + 0.0934152, + 0.0921739, + 0.0908824, + 0.0894937, + 0.088225, + 0.086902, + 0.0856518, + 0.0842943, + 0.0830656, + 0.08168, + 0.0802948, + 0.0789953, + 0.0777571, + 0.0765581, + 0.0752122, + 0.0739291, + 0.0724724 + ], + "train_epoch_time": 4.845149993896484, + "train_loss": 2.022967890653241, + "train_score": 0.39838033527178607, + "val_loss": 2.1248326591728204, + "val_score": 0.37122380873776467 + }, + { + "epoch": 14, + "grad_norm": 0.5033368468284607, + "learning_rate": 0.07166666666666667, + "model_norm": 87.7729263305664, + "step_logs": { + "grad_norm": { + "756": 0.5817490220069885, + "757": 0.5924174189567566, + "758": 0.6085439324378967, + "759": 0.5963684916496277, + "760": 0.5649013519287109, + "761": 0.5879322290420532, + "762": 0.5865781307220459, + "763": 0.5456233024597168, + "764": 0.5751177668571472, + "765": 0.5415573120117188, + "766": 0.5502078533172607, + "767": 0.5416109561920166, + "768": 0.5399821400642395, + "769": 0.5714665651321411, + "770": 0.5819351673126221, + "771": 0.4997367560863495, + "772": 0.5309842824935913, + "773": 0.5636150240898132, + "774": 0.5608433485031128, + "775": 0.5910018682479858, + "776": 0.5440702438354492, + "777": 0.5018715858459473, + "778": 0.5244648456573486, + "779": 0.583748459815979, + "780": 0.535076916217804, + "781": 0.5455226898193359, + "782": 0.49497535824775696, + "783": 0.5218121409416199, + "784": 0.5231913924217224, + "785": 0.517935574054718, + "786": 0.5191906690597534, + "787": 0.5253349542617798, + "788": 0.532258152961731, + "789": 0.5133633017539978, + "790": 0.5274272561073303, + "791": 0.5238879323005676, + "792": 0.49098068475723267, + "793": 0.49649348855018616, + "794": 0.5085113644599915, + "795": 0.4726006090641022, + "796": 0.48333048820495605, + "797": 0.5235194563865662, + "798": 0.5081830024719238, + "799": 0.4873445928096771, + "800": 0.536490797996521, + "801": 0.5417503118515015, + "802": 0.46851634979248047, + "803": 0.5397835969924927, + "804": 0.5049363970756531, + "805": 0.49544158577919006, + "806": 0.48979800939559937, + "807": 0.537993311882019, + "808": 0.493367999792099, + "809": 0.5033368468284607 + }, + "loss": { + "756": 2.041349172592163, + "757": 2.035072088241577, + "758": 2.0335869789123535, + "759": 2.0425331592559814, + "760": 2.0125341415405273, + "761": 2.0303878784179688, + "762": 2.0292818546295166, + "763": 2.0409562587738037, + "764": 1.9783830642700195, + "765": 2.010605812072754, + "766": 2.0254247188568115, + "767": 2.025029182434082, + "768": 2.0390419960021973, + "769": 1.9939740896224976, + "770": 2.0474905967712402, + "771": 1.9895176887512207, + "772": 2.0228195190429688, + "773": 2.013899326324463, + "774": 2.0186731815338135, + "775": 2.000210762023926, + "776": 2.0273733139038086, + "777": 2.0154099464416504, + "778": 2.021178960800171, + "779": 2.028766632080078, + "780": 2.0093116760253906, + "781": 1.9783612489700317, + "782": 2.0299265384674072, + "783": 1.9778850078582764, + "784": 2.0285398960113525, + "785": 2.0226364135742188, + "786": 1.9988963603973389, + "787": 2.0297608375549316, + "788": 2.024130344390869, + "789": 1.9695649147033691, + "790": 1.9946712255477905, + "791": 2.028390407562256, + "792": 2.0368010997772217, + "793": 1.9961975812911987, + "794": 2.0180156230926514, + "795": 2.005180835723877, + "796": 2.019975185394287, + "797": 1.9743021726608276, + "798": 2.004042863845825, + "799": 2.0229129791259766, + "800": 1.9998270273208618, + "801": 1.9911322593688965, + "802": 2.0253686904907227, + "803": 2.0109105110168457, + "804": 2.0327634811401367, + "805": 2.0213463306427, + "806": 2.0170485973358154, + "807": 2.017352819442749, + "808": 2.0053954124450684, + "809": 1.9924137592315674 + }, + "lr": { + "756": 0.07166666666666667, + "757": 0.0703395061728395, + "758": 0.06901234567901234, + "759": 0.06768518518518517, + "760": 0.06635802469135803, + "761": 0.06503086419753086, + "762": 0.06370370370370369, + "763": 0.06237654320987653, + "764": 0.06104938271604939, + "765": 0.059722222222222225, + "766": 0.05839506172839506, + "767": 0.05706790123456789, + "768": 0.05574074074074075, + "769": 0.05441358024691358, + "770": 0.053086419753086415, + "771": 0.05175925925925925, + "772": 0.05043209876543211, + "773": 0.04910493827160494, + "774": 0.04777777777777777, + "775": 0.046450617283950606, + "776": 0.045123456790123466, + "777": 0.0437962962962963, + "778": 0.04246913580246913, + "779": 0.04114197530864196, + "780": 0.039814814814814824, + "781": 0.038487654320987656, + "782": 0.03716049382716049, + "783": 0.03583333333333333, + "784": 0.03450617283950618, + "785": 0.033179012345679014, + "786": 0.031851851851851846, + "787": 0.030524691358024682, + "788": 0.02919753086419754, + "789": 0.027870370370370375, + "790": 0.026543209876543208, + "791": 0.02521604938271604, + "792": 0.0238888888888889, + "793": 0.022561728395061733, + "794": 0.021234567901234565, + "795": 0.0199074074074074, + "796": 0.018580246913580258, + "797": 0.01725308641975309, + "798": 0.015925925925925923, + "799": 0.01459876543209876, + "800": 0.013271604938271616, + "801": 0.01194444444444445, + "802": 0.010617283950617283, + "803": 0.009290123456790117, + "804": 0.007962962962962974, + "805": 0.006635802469135808, + "806": 0.005308641975308641, + "807": 0.003981481481481476, + "808": 0.002654320987654333, + "809": 0.0013271604938271664 + } + }, + "step_size_list": [ + 0.0712434, + 0.0699155, + 0.0685814, + 0.0672887, + 0.0660107, + 0.0646729, + 0.0633615, + 0.0620941, + 0.0607394, + 0.0594632, + 0.0581413, + 0.056833, + 0.0555195, + 0.0541722, + 0.0528544, + 0.0515917, + 0.0502555, + 0.0489155, + 0.0476006, + 0.046263, + 0.0449753, + 0.0436768, + 0.0423468, + 0.0410003, + 0.0397022, + 0.0383766, + 0.0370773, + 0.0357452, + 0.034426, + 0.0331062, + 0.0317836, + 0.0304615, + 0.029138, + 0.0278185, + 0.0264942, + 0.0251731, + 0.0238552, + 0.0225303, + 0.0212057, + 0.0198854, + 0.0185603, + 0.0172324, + 0.0159096, + 0.0145863, + 0.0132589, + 0.0119339, + 0.0106112, + 0.00928388, + 0.00795899, + 0.00663313, + 0.00530697, + 0.00398034, + 0.00265389, + 0.00132705 + ], + "train_epoch_time": 4.8447723388671875, + "train_loss": 2.00816962838686, + "train_score": 0.4025342987890394, + "val_loss": 2.113864118278232, + "val_score": 0.3735693525843724 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:30:27.143766", + "final_model_norm": 87.7729263305664, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:28:45.505587", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 6.415041446685791, + "learning_rate": 4.64e-11, + "model_norm": 87.42022705078125, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.392029762268066, + "3": 8.84481143951416, + "4": 17.850553512573242, + "5": 6.034968852996826, + "6": 4.637722492218018, + "7": 4.9288530349731445, + "8": 4.521860122680664, + "9": 6.412627696990967, + "10": 3.8342607021331787, + "11": 14.737504005432129, + "12": 6.603000164031982, + "13": 6.025729179382324, + "14": 7.747841835021973, + "15": 3.797628402709961, + "16": 12.529465675354004, + "17": 22.666439056396484, + "18": 4.543037414550781, + "19": 5.329233169555664, + "20": 22.00737762451172, + "21": 4.220154285430908, + "22": 8.132468223571777, + "23": 3.5606038570404053, + "24": 7.961135387420654, + "25": 13.382946968078613, + "26": 5.8169474601745605, + "27": 4.369606971740723, + "28": 17.540950775146484, + "29": 10.750436782836914, + "30": 34.23600387573242, + "31": 3.700395345687866, + "32": 21.214187622070312, + "33": 11.726557731628418, + "34": 6.260554313659668, + "35": 3.59450364112854, + "36": 15.356278419494629, + "37": 4.497802734375, + "38": 18.671419143676758, + "39": 5.688169479370117, + "40": 6.008426666259766, + "41": 10.915599822998047, + "42": 10.294869422912598, + "43": 4.826373100280762, + "44": 3.8631951808929443, + "45": 6.129805088043213, + "46": 4.865635871887207, + "47": 10.85316276550293, + "48": 3.7912490367889404, + "49": 4.910715103149414, + "50": 10.566633224487305, + "51": 3.880162239074707, + "52": 9.479639053344727, + "53": 6.415041446685791 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.53290319442749, + "2": 3.8181304931640625, + "3": 3.8262925148010254, + "4": 4.349514484405518, + "5": 4.229140758514404, + "6": 3.5613114833831787, + "7": 3.743668794631958, + "8": 3.849187135696411, + "9": 3.633787155151367, + "10": 3.7944042682647705, + "11": 4.033172607421875, + "12": 6.311926364898682, + "13": 3.8382623195648193, + "14": 5.171895980834961, + "15": 3.6298279762268066, + "16": 3.903632164001465, + "17": 4.803995609283447, + "18": 4.232617378234863, + "19": 3.5956687927246094, + "20": 4.447190284729004, + "21": 3.989043712615967, + "22": 3.576582670211792, + "23": 3.1795363426208496, + "24": 3.9938511848449707, + "25": 4.409998416900635, + "26": 3.63392972946167, + "27": 3.9249258041381836, + "28": 5.45171594619751, + "29": 4.656816482543945, + "30": 4.673017978668213, + "31": 3.7599217891693115, + "32": 5.267796516418457, + "33": 5.631796360015869, + "34": 3.6322474479675293, + "35": 3.36767840385437, + "36": 5.973868370056152, + "37": 3.5887484550476074, + "38": 6.50324821472168, + "39": 5.428341865539551, + "40": 4.202503204345703, + "41": 4.49423885345459, + "42": 4.534539699554443, + "43": 3.8579530715942383, + "44": 3.6919684410095215, + "45": 3.7566146850585938, + "46": 3.64108943939209, + "47": 6.234297752380371, + "48": 3.9295856952667236, + "49": 3.7313809394836426, + "50": 5.694178581237793, + "51": 4.528134822845459, + "52": 4.5458760261535645, + "53": 3.82647705078125 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 4.64e-11, + 0.00602199, + 0.0168834, + 0.0216721, + 0.0157309, + 0.0386733, + 0.0476656, + 0.0536518, + 0.0620122, + 0.0567169, + 0.0786588, + 0.0272315, + 0.080427, + 0.0768105, + 0.0740717, + 0.109045, + 0.0372539, + 0.0167191, + 0.118699, + 0.103941, + 0.016711, + 0.135801, + 0.0707015, + 0.149727, + 0.0804855, + 0.0406226, + 0.113633, + 0.155681, + 0.0311841, + 0.0620166, + 0.00775171, + 0.188786, + 0.0216996, + 0.0646247, + 0.116758, + 0.200115, + 0.0439937, + 0.174491, + 0.0337388, + 0.174117, + 0.143079, + 0.0629558, + 0.0701655, + 0.180997, + 0.223702, + 0.135213, + 0.178776, + 0.0851806, + 0.245467, + 0.184143, + 0.0836164, + 0.261943, + 0.0830617, + 0.132757 + ], + "train_epoch_time": 4.849003076553345, + "train_loss": 3.8052916164890767, + "train_score": 0.12868655840498816, + "val_loss": 3.796106784954137, + "val_score": 0.12458739955786585 + }, + { + "epoch": 1, + "grad_norm": 3.3606152534484863, + "learning_rate": 0.464, + "model_norm": 87.35164642333984, + "step_logs": { + "grad_norm": { + "54": 2.858609676361084, + "55": 10.737136840820312, + "56": 3.2116808891296387, + "57": 3.384654998779297, + "58": 3.546201229095459, + "59": 3.1849629878997803, + "60": 4.52200984954834, + "61": 8.260720252990723, + "62": 2.8668410778045654, + "63": 12.155377388000488, + "64": 3.3598384857177734, + "65": 2.9624006748199463, + "66": 4.3477277755737305, + "67": 3.3108246326446533, + "68": 3.171374559402466, + "69": 3.7421977519989014, + "70": 3.211254596710205, + "71": 4.05525016784668, + "72": 11.48981761932373, + "73": 3.459059476852417, + "74": 2.85099196434021, + "75": 4.509913444519043, + "76": 2.6777124404907227, + "77": 2.991694927215576, + "78": 2.37827205657959, + "79": 4.576386451721191, + "80": 2.6979970932006836, + "81": 4.141533851623535, + "82": 3.5639302730560303, + "83": 2.1359448432922363, + "84": 3.2156429290771484, + "85": 2.999981641769409, + "86": 2.734909772872925, + "87": 4.064084529876709, + "88": 2.995272159576416, + "89": 5.097923278808594, + "90": 3.395552396774292, + "91": 3.936406135559082, + "92": 2.5835976600646973, + "93": 3.791635274887085, + "94": 2.783562660217285, + "95": 4.196510314941406, + "96": 2.4339089393615723, + "97": 13.713967323303223, + "98": 2.9899070262908936, + "99": 2.635986089706421, + "100": 2.4263968467712402, + "101": 2.2045938968658447, + "102": 2.121074914932251, + "103": 3.585427761077881, + "104": 1.44589364528656, + "105": 2.5503478050231934, + "106": 2.310807704925537, + "107": 3.3606152534484863 + }, + "loss": { + "54": 3.838592767715454, + "55": 5.012306213378906, + "56": 3.727595090866089, + "57": 3.6583807468414307, + "58": 3.7307209968566895, + "59": 3.450202465057373, + "60": 3.543002128601074, + "61": 4.588754653930664, + "62": 3.484729051589966, + "63": 5.3817596435546875, + "64": 3.8180785179138184, + "65": 3.299722194671631, + "66": 3.5598297119140625, + "67": 3.657235860824585, + "68": 3.625640869140625, + "69": 3.5288796424865723, + "70": 3.8291115760803223, + "71": 3.703097343444824, + "72": 4.860246658325195, + "73": 3.5869498252868652, + "74": 3.3525750637054443, + "75": 3.75704288482666, + "76": 3.555233955383301, + "77": 3.388500928878784, + "78": 3.2966203689575195, + "79": 3.493374824523926, + "80": 3.443857192993164, + "81": 3.500192880630493, + "82": 3.5496270656585693, + "83": 3.344968795776367, + "84": 3.2407050132751465, + "85": 3.616891384124756, + "86": 3.2851226329803467, + "87": 3.6439428329467773, + "88": 3.2720589637756348, + "89": 3.6229255199432373, + "90": 3.278555393218994, + "91": 3.543064594268799, + "92": 3.378326416015625, + "93": 3.3239662647247314, + "94": 3.398552417755127, + "95": 3.1471972465515137, + "96": 3.292386054992676, + "97": 4.4440999031066895, + "98": 3.343900680541992, + "99": 3.076943874359131, + "100": 3.334789752960205, + "101": 3.07427978515625, + "102": 3.110471248626709, + "103": 3.5102057456970215, + "104": 2.9275636672973633, + "105": 3.050889253616333, + "106": 3.179858446121216, + "107": 3.2726807594299316 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.3106, + 0.0732307, + 0.282585, + 0.268754, + 0.260378, + 0.275845, + 0.198376, + 0.104268, + 0.299901, + 0.0629628, + 0.275219, + 0.286948, + 0.207893, + 0.273689, + 0.282312, + 0.241582, + 0.285574, + 0.228539, + 0.0635471, + 0.261572, + 0.296965, + 0.205677, + 0.316099, + 0.287699, + 0.33189, + 0.194071, + 0.311332, + 0.217138, + 0.253529, + 0.352469, + 0.266627, + 0.294176, + 0.303619, + 0.226167, + 0.283598, + 0.174159, + 0.255523, + 0.230315, + 0.318159, + 0.231603, + 0.303481, + 0.201897, + 0.327353, + 0.0428908, + 0.28638, + 0.30448, + 0.329175, + 0.339485, + 0.347419, + 0.250859, + 0.398053, + 0.310449, + 0.333911, + 0.25769 + ], + "train_epoch_time": 4.845209360122681, + "train_loss": 3.1161415298494752, + "train_score": 0.1969646699715892, + "val_loss": 3.135316308959891, + "val_score": 0.19353652417865058 + }, + { + "epoch": 2, + "grad_norm": 1.0763298273086548, + "learning_rate": 0.464, + "model_norm": 87.33485412597656, + "step_logs": { + "grad_norm": { + "108": 1.4284387826919556, + "109": 1.6965339183807373, + "110": 3.2703537940979004, + "111": 1.5285755395889282, + "112": 1.754952311515808, + "113": 2.799487352371216, + "114": 1.4387683868408203, + "115": 1.8313871622085571, + "116": 1.794874906539917, + "117": 2.0885822772979736, + "118": 1.3231536149978638, + "119": 2.006324529647827, + "120": 1.4542523622512817, + "121": 1.6092643737792969, + "122": 1.6640442609786987, + "123": 1.8148009777069092, + "124": 1.4465478658676147, + "125": 1.3389941453933716, + "126": 1.7375508546829224, + "127": 2.907604694366455, + "128": 1.5427086353302002, + "129": 1.4757261276245117, + "130": 1.6154417991638184, + "131": 1.99034583568573, + "132": 1.387081503868103, + "133": 2.0772249698638916, + "134": 1.6645475625991821, + "135": 2.2846858501434326, + "136": 2.059701919555664, + "137": 1.4060015678405762, + "138": 1.903393268585205, + "139": 1.2354459762573242, + "140": 1.0644891262054443, + "141": 2.1027815341949463, + "142": 2.352018356323242, + "143": 1.2187929153442383, + "144": 1.6577410697937012, + "145": 1.9611213207244873, + "146": 1.984497308731079, + "147": 1.4651412963867188, + "148": 1.2242337465286255, + "149": 1.2056084871292114, + "150": 1.3629612922668457, + "151": 1.2349845170974731, + "152": 1.1854910850524902, + "153": 1.1565769910812378, + "154": 1.252566933631897, + "155": 1.3478208780288696, + "156": 1.703112244606018, + "157": 1.2441173791885376, + "158": 1.2054235935211182, + "159": 1.4467012882232666, + "160": 1.0429385900497437, + "161": 1.0763298273086548 + }, + "loss": { + "108": 3.1150312423706055, + "109": 2.8726983070373535, + "110": 3.2444562911987305, + "111": 3.025568962097168, + "112": 2.893230438232422, + "113": 3.072216510772705, + "114": 3.0514917373657227, + "115": 2.799407720565796, + "116": 3.0119693279266357, + "117": 2.9598679542541504, + "118": 2.912177801132202, + "119": 2.818962574005127, + "120": 2.9666385650634766, + "121": 2.8044402599334717, + "122": 2.895341396331787, + "123": 2.8714845180511475, + "124": 2.874838352203369, + "125": 2.744900941848755, + "126": 2.845977783203125, + "127": 3.015171766281128, + "128": 3.1005349159240723, + "129": 2.7572147846221924, + "130": 2.8017001152038574, + "131": 2.955296039581299, + "132": 2.8445024490356445, + "133": 2.8474457263946533, + "134": 2.9492745399475098, + "135": 2.943544864654541, + "136": 3.050510883331299, + "137": 2.8159570693969727, + "138": 2.784405469894409, + "139": 2.8976099491119385, + "140": 2.670093059539795, + "141": 2.787055492401123, + "142": 3.086111068725586, + "143": 2.804927349090576, + "144": 2.787051200866699, + "145": 2.976318120956421, + "146": 2.861079216003418, + "147": 2.871668815612793, + "148": 2.7234416007995605, + "149": 2.6982059478759766, + "150": 2.728771209716797, + "151": 2.7277684211730957, + "152": 2.6717612743377686, + "153": 2.692474365234375, + "154": 2.6754016876220703, + "155": 2.7360117435455322, + "156": 2.7431869506835938, + "157": 2.8736257553100586, + "158": 2.610884189605713, + "159": 2.81746768951416, + "160": 2.659851312637329, + "161": 2.605217933654785 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.402789, + 0.376487, + 0.262923, + 0.393499, + 0.372103, + 0.291489, + 0.400904, + 0.363079, + 0.371752, + 0.345774, + 0.407206, + 0.348536, + 0.398151, + 0.382133, + 0.379743, + 0.366481, + 0.396966, + 0.40294, + 0.372358, + 0.281127, + 0.393861, + 0.392142, + 0.381548, + 0.353931, + 0.401064, + 0.343307, + 0.380967, + 0.32875, + 0.350812, + 0.399014, + 0.356412, + 0.413471, + 0.422411, + 0.339164, + 0.327714, + 0.413229, + 0.377617, + 0.356981, + 0.35169, + 0.395423, + 0.411467, + 0.412453, + 0.400712, + 0.410722, + 0.413534, + 0.416046, + 0.408432, + 0.402066, + 0.372597, + 0.412458, + 0.410941, + 0.39579, + 0.423793, + 0.420608 + ], + "train_epoch_time": 4.8451924324035645, + "train_loss": 2.6872197686856243, + "train_score": 0.2114441356195439, + "val_loss": 2.7009051861637086, + "val_score": 0.21094646995157929 + }, + { + "epoch": 3, + "grad_norm": 0.9656234979629517, + "learning_rate": 0.464, + "model_norm": 87.35486602783203, + "step_logs": { + "grad_norm": { + "162": 1.1189560890197754, + "163": 1.2439683675765991, + "164": 1.1907100677490234, + "165": 1.1807596683502197, + "166": 1.2217439413070679, + "167": 1.590959072113037, + "168": 1.0795228481292725, + "169": 0.8327977657318115, + "170": 1.4565677642822266, + "171": 1.4457651376724243, + "172": 1.2531334161758423, + "173": 2.302196979522705, + "174": 1.0985028743743896, + "175": 0.8447472453117371, + "176": 0.8835721611976624, + "177": 1.4077037572860718, + "178": 1.3198087215423584, + "179": 1.0174331665039062, + "180": 0.990699291229248, + "181": 1.397781491279602, + "182": 1.1386934518814087, + "183": 0.8387331962585449, + "184": 0.9318520426750183, + "185": 1.1538796424865723, + "186": 1.1080108880996704, + "187": 1.1009576320648193, + "188": 1.5068435668945312, + "189": 1.0529667139053345, + "190": 0.7240515351295471, + "191": 0.8497000932693481, + "192": 1.029805064201355, + "193": 1.2156177759170532, + "194": 1.1472550630569458, + "195": 1.2585384845733643, + "196": 1.1241824626922607, + "197": 0.8736162185668945, + "198": 1.0393797159194946, + "199": 1.399061918258667, + "200": 0.9984358549118042, + "201": 0.8065123558044434, + "202": 0.8300282955169678, + "203": 1.0855735540390015, + "204": 0.9855717420578003, + "205": 0.8677594661712646, + "206": 0.9431927800178528, + "207": 1.0541419982910156, + "208": 1.0813190937042236, + "209": 1.0069924592971802, + "210": 0.9948697686195374, + "211": 1.0864923000335693, + "212": 0.9993169903755188, + "213": 0.8439814448356628, + "214": 0.8373750448226929, + "215": 0.9656234979629517 + }, + "loss": { + "162": 2.6909537315368652, + "163": 2.6405367851257324, + "164": 2.717409610748291, + "165": 2.618783473968506, + "166": 2.7271625995635986, + "167": 2.6659412384033203, + "168": 2.795989513397217, + "169": 2.5793042182922363, + "170": 2.671590805053711, + "171": 2.753861427307129, + "172": 2.7049529552459717, + "173": 2.803457736968994, + "174": 2.837716579437256, + "175": 2.5965073108673096, + "176": 2.588749408721924, + "177": 2.6406588554382324, + "178": 2.739302158355713, + "179": 2.663179397583008, + "180": 2.6126646995544434, + "181": 2.6613595485687256, + "182": 2.7359609603881836, + "183": 2.61000919342041, + "184": 2.596292734146118, + "185": 2.6409802436828613, + "186": 2.648367404937744, + "187": 2.6589622497558594, + "188": 2.6571998596191406, + "189": 2.7352123260498047, + "190": 2.5688743591308594, + "191": 2.5573902130126953, + "192": 2.595913887023926, + "193": 2.637622833251953, + "194": 2.6796278953552246, + "195": 2.617431163787842, + "196": 2.711655616760254, + "197": 2.585380792617798, + "198": 2.584141731262207, + "199": 2.6470589637756348, + "200": 2.707695245742798, + "201": 2.5650477409362793, + "202": 2.5718941688537598, + "203": 2.5787854194641113, + "204": 2.651869297027588, + "205": 2.5722475051879883, + "206": 2.604759693145752, + "207": 2.5915374755859375, + "208": 2.628849983215332, + "209": 2.603396415710449, + "210": 2.5950660705566406, + "211": 2.6052541732788086, + "212": 2.6123600006103516, + "213": 2.5820655822753906, + "214": 2.5787758827209473, + "215": 2.580049991607666 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 0.418793, + 0.408465, + 0.4139, + 0.41299, + 0.41172, + 0.380244, + 0.423088, + 0.436754, + 0.391813, + 0.394527, + 0.408924, + 0.322533, + 0.422334, + 0.436188, + 0.433659, + 0.395196, + 0.404348, + 0.425619, + 0.426802, + 0.396473, + 0.418037, + 0.436693, + 0.430589, + 0.415413, + 0.418944, + 0.419621, + 0.387233, + 0.424115, + 0.443025, + 0.435477, + 0.42383, + 0.410628, + 0.416534, + 0.406877, + 0.418725, + 0.434259, + 0.422976, + 0.396056, + 0.427487, + 0.438219, + 0.436851, + 0.419522, + 0.427658, + 0.434491, + 0.429934, + 0.422018, + 0.420599, + 0.425546, + 0.42628, + 0.419863, + 0.426201, + 0.43609, + 0.436466, + 0.428106 + ], + "train_epoch_time": 4.84496545791626, + "train_loss": 2.602953904191596, + "train_score": 0.23778918581337294, + "val_loss": 2.647889215554764, + "val_score": 0.2324241177872867 + }, + { + "epoch": 4, + "grad_norm": 0.8172276020050049, + "learning_rate": 0.464, + "model_norm": 87.3888168334961, + "step_logs": { + "grad_norm": { + "216": 0.9760596752166748, + "217": 0.9239879846572876, + "218": 0.9516480565071106, + "219": 0.9669117331504822, + "220": 0.9742136597633362, + "221": 0.8865550756454468, + "222": 0.8860629200935364, + "223": 1.0282435417175293, + "224": 0.9823834896087646, + "225": 0.97651606798172, + "226": 0.9466106295585632, + "227": 0.9056035876274109, + "228": 0.9550831317901611, + "229": 0.9927959442138672, + "230": 0.9454711675643921, + "231": 0.815541684627533, + "232": 0.8566503524780273, + "233": 1.0298569202423096, + "234": 1.0618904829025269, + "235": 0.9589795470237732, + "236": 1.2321819067001343, + "237": 1.1048648357391357, + "238": 0.8869365453720093, + "239": 0.8407354950904846, + "240": 0.9086779356002808, + "241": 0.9720743894577026, + "242": 0.9201876521110535, + "243": 0.8125502467155457, + "244": 0.7550165057182312, + "245": 0.8655475378036499, + "246": 0.8935474157333374, + "247": 0.8462132215499878, + "248": 0.7538568377494812, + "249": 0.8339540362358093, + "250": 1.0733461380004883, + "251": 0.981153666973114, + "252": 0.9008215665817261, + "253": 0.9153002500534058, + "254": 0.9088084101676941, + "255": 0.8909235596656799, + "256": 0.8397300243377686, + "257": 0.7997252941131592, + "258": 0.9583136439323425, + "259": 0.9640246629714966, + "260": 0.9869414567947388, + "261": 0.8781303763389587, + "262": 0.7422873973846436, + "263": 0.8049411773681641, + "264": 0.9022356271743774, + "265": 0.844919741153717, + "266": 0.8651341795921326, + "267": 0.8847090601921082, + "268": 0.8013103604316711, + "269": 0.8172276020050049 + }, + "loss": { + "216": 2.617234706878662, + "217": 2.5937962532043457, + "218": 2.587912082672119, + "219": 2.5723719596862793, + "220": 2.59344482421875, + "221": 2.582096815109253, + "222": 2.5561814308166504, + "223": 2.5979394912719727, + "224": 2.612966537475586, + "225": 2.5602614879608154, + "226": 2.6081910133361816, + "227": 2.5647189617156982, + "228": 2.597980260848999, + "229": 2.5668869018554688, + "230": 2.5928525924682617, + "231": 2.562885284423828, + "232": 2.5771901607513428, + "233": 2.5621814727783203, + "234": 2.641939163208008, + "235": 2.5838325023651123, + "236": 2.5744218826293945, + "237": 2.675947904586792, + "238": 2.572277069091797, + "239": 2.5442347526550293, + "240": 2.546400547027588, + "241": 2.5873584747314453, + "242": 2.561640739440918, + "243": 2.5563535690307617, + "244": 2.519526481628418, + "245": 2.550574779510498, + "246": 2.537480354309082, + "247": 2.5504183769226074, + "248": 2.529621124267578, + "249": 2.530531644821167, + "250": 2.574716567993164, + "251": 2.6057841777801514, + "252": 2.5374152660369873, + "253": 2.589334487915039, + "254": 2.5366361141204834, + "255": 2.5849251747131348, + "256": 2.54815936088562, + "257": 2.5613887310028076, + "258": 2.5224509239196777, + "259": 2.5913634300231934, + "260": 2.548593044281006, + "261": 2.5898196697235107, + "262": 2.537853956222534, + "263": 2.525055408477783, + "264": 2.5268354415893555, + "265": 2.5603861808776855, + "266": 2.5134687423706055, + "267": 2.534497022628784, + "268": 2.508770704269409, + "269": 2.5317869186401367 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 0.427867, + 0.431081, + 0.429158, + 0.427918, + 0.427688, + 0.433394, + 0.433136, + 0.42397, + 0.427379, + 0.427095, + 0.429747, + 0.431955, + 0.42905, + 0.426046, + 0.429636, + 0.43765, + 0.435247, + 0.423344, + 0.422194, + 0.428608, + 0.408155, + 0.419592, + 0.43326, + 0.435904, + 0.431536, + 0.427757, + 0.430952, + 0.437769, + 0.440859, + 0.434398, + 0.432433, + 0.435624, + 0.441014, + 0.436188, + 0.420362, + 0.427371, + 0.431951, + 0.431603, + 0.431411, + 0.433143, + 0.436008, + 0.438593, + 0.42786, + 0.428359, + 0.426209, + 0.434019, + 0.441749, + 0.437929, + 0.431732, + 0.435809, + 0.434016, + 0.432978, + 0.437993, + 0.437241 + ], + "train_epoch_time": 4.843236684799194, + "train_loss": 2.519512945473451, + "train_score": 0.26451421259465485, + "val_loss": 2.560137695340968, + "val_score": 0.25797843690170347 + }, + { + "epoch": 5, + "grad_norm": 1.2359113693237305, + "learning_rate": 0.464, + "model_norm": 87.43255615234375, + "step_logs": { + "grad_norm": { + "270": 0.909137487411499, + "271": 0.901824414730072, + "272": 0.8281104564666748, + "273": 0.8244837522506714, + "274": 0.8318466544151306, + "275": 0.9051135182380676, + "276": 0.9758633375167847, + "277": 0.9725322127342224, + "278": 0.8185096383094788, + "279": 0.8279964327812195, + "280": 0.8715904355049133, + "281": 1.2166608572006226, + "282": 1.1724765300750732, + "283": 0.9091575741767883, + "284": 0.7718557715415955, + "285": 0.8962677717208862, + "286": 0.9210909605026245, + "287": 1.0917760133743286, + "288": 0.9456706643104553, + "289": 0.759105384349823, + "290": 0.8831875324249268, + "291": 0.9598322510719299, + "292": 1.4102511405944824, + "293": 0.8130441904067993, + "294": 0.7901896238327026, + "295": 0.884803056716919, + "296": 0.8717489242553711, + "297": 0.8379859924316406, + "298": 0.9207040667533875, + "299": 0.9763308763504028, + "300": 1.0011659860610962, + "301": 0.9384574890136719, + "302": 0.9010646343231201, + "303": 1.242040753364563, + "304": 0.8243430256843567, + "305": 0.6908825635910034, + "306": 0.7374172806739807, + "307": 0.8237206935882568, + "308": 1.0682594776153564, + "309": 0.9662548899650574, + "310": 0.8974555134773254, + "311": 0.9084458947181702, + "312": 1.1049094200134277, + "313": 0.9480563998222351, + "314": 0.8933523297309875, + "315": 1.027705430984497, + "316": 1.4170277118682861, + "317": 0.8679870367050171, + "318": 0.716618001461029, + "319": 0.7424120306968689, + "320": 0.8490312099456787, + "321": 0.9254270792007446, + "322": 1.0150771141052246, + "323": 1.2359113693237305 + }, + "loss": { + "270": 2.502199411392212, + "271": 2.5485618114471436, + "272": 2.528486967086792, + "273": 2.532589912414551, + "274": 2.516814708709717, + "275": 2.5192108154296875, + "276": 2.538707733154297, + "277": 2.549635887145996, + "278": 2.512437105178833, + "279": 2.5092053413391113, + "280": 2.507082462310791, + "281": 2.5263195037841797, + "282": 2.602623462677002, + "283": 2.551633834838867, + "284": 2.5005698204040527, + "285": 2.4863510131835938, + "286": 2.524898052215576, + "287": 2.482865333557129, + "288": 2.566267967224121, + "289": 2.4417240619659424, + "290": 2.487529754638672, + "291": 2.4960594177246094, + "292": 2.5086896419525146, + "293": 2.563426971435547, + "294": 2.510998249053955, + "295": 2.532041549682617, + "296": 2.543931007385254, + "297": 2.4897828102111816, + "298": 2.502875328063965, + "299": 2.4739670753479004, + "300": 2.520045757293701, + "301": 2.5044987201690674, + "302": 2.4901108741760254, + "303": 2.471242904663086, + "304": 2.549704074859619, + "305": 2.478025197982788, + "306": 2.4522199630737305, + "307": 2.4467692375183105, + "308": 2.4881672859191895, + "309": 2.5033488273620605, + "310": 2.4888992309570312, + "311": 2.4655141830444336, + "312": 2.4746758937835693, + "313": 2.495021343231201, + "314": 2.4570131301879883, + "315": 2.461097240447998, + "316": 2.52840518951416, + "317": 2.5090882778167725, + "318": 2.4881362915039062, + "319": 2.4649782180786133, + "320": 2.4455957412719727, + "321": 2.4473414421081543, + "322": 2.476041316986084, + "323": 2.4688591957092285 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 0.430973, + 0.432016, + 0.436532, + 0.4368, + 0.436178, + 0.431449, + 0.426852, + 0.427231, + 0.436967, + 0.436341, + 0.433524, + 0.408473, + 0.413348, + 0.431566, + 0.439696, + 0.431646, + 0.430444, + 0.417499, + 0.429293, + 0.439914, + 0.432534, + 0.427402, + 0.391918, + 0.437807, + 0.438692, + 0.432944, + 0.433927, + 0.435503, + 0.430197, + 0.425927, + 0.424801, + 0.429001, + 0.431369, + 0.405302, + 0.436981, + 0.444152, + 0.441297, + 0.435952, + 0.419376, + 0.427049, + 0.431597, + 0.430564, + 0.416348, + 0.428212, + 0.431484, + 0.421986, + 0.39181, + 0.433782, + 0.442797, + 0.441117, + 0.434301, + 0.429159, + 0.423147, + 0.405758 + ], + "train_epoch_time": 4.8421950340271, + "train_loss": 2.5325661044209724, + "train_score": 0.24928376069198893, + "val_loss": 2.5795151303200443, + "val_score": 0.23620030867314365 + }, + { + "epoch": 6, + "grad_norm": 0.9817253351211548, + "learning_rate": 0.464, + "model_norm": 87.48594665527344, + "step_logs": { + "grad_norm": { + "324": 0.996283769607544, + "325": 0.7722855806350708, + "326": 0.8033398985862732, + "327": 0.8197267651557922, + "328": 0.9329664707183838, + "329": 1.2516851425170898, + "330": 0.9120838046073914, + "331": 0.9575769901275635, + "332": 0.9959551095962524, + "333": 0.9943754076957703, + "334": 0.9758836030960083, + "335": 1.2174527645111084, + "336": 1.0190855264663696, + "337": 0.8289433717727661, + "338": 1.024200677871704, + "339": 0.9762762784957886, + "340": 0.9613446593284607, + "341": 1.000328779220581, + "342": 1.1516013145446777, + "343": 0.977079451084137, + "344": 0.9106581211090088, + "345": 1.1612099409103394, + "346": 0.9213308095932007, + "347": 0.818103015422821, + "348": 1.3374097347259521, + "349": 0.8248967528343201, + "350": 0.7944849133491516, + "351": 0.841280996799469, + "352": 0.8733251690864563, + "353": 0.9115166664123535, + "354": 0.8414322137832642, + "355": 0.8549865484237671, + "356": 1.116495966911316, + "357": 0.8457679748535156, + "358": 1.2006250619888306, + "359": 1.0738205909729004, + "360": 0.8266971707344055, + "361": 0.8697921633720398, + "362": 1.067987084388733, + "363": 1.0071406364440918, + "364": 0.8190472722053528, + "365": 0.7928370237350464, + "366": 0.9520807862281799, + "367": 0.8789686560630798, + "368": 0.8155698776245117, + "369": 0.7787619233131409, + "370": 0.9483415484428406, + "371": 0.9625656604766846, + "372": 1.0349348783493042, + "373": 1.2274625301361084, + "374": 0.8992341756820679, + "375": 0.8969486951828003, + "376": 0.9945662617683411, + "377": 0.9817253351211548 + }, + "loss": { + "324": 2.5065536499023438, + "325": 2.4248814582824707, + "326": 2.4337539672851562, + "327": 2.4043781757354736, + "328": 2.4621071815490723, + "329": 2.438467264175415, + "330": 2.4949569702148438, + "331": 2.46988582611084, + "332": 2.4390950202941895, + "333": 2.4317359924316406, + "334": 2.434683322906494, + "335": 2.4207005500793457, + "336": 2.535224437713623, + "337": 2.3995237350463867, + "338": 2.4224109649658203, + "339": 2.4604482650756836, + "340": 2.4257209300994873, + "341": 2.4305429458618164, + "342": 2.4400181770324707, + "343": 2.4389264583587646, + "344": 2.4062788486480713, + "345": 2.413264274597168, + "346": 2.4867923259735107, + "347": 2.3970446586608887, + "348": 2.4382452964782715, + "349": 2.464383840560913, + "350": 2.44979190826416, + "351": 2.417203426361084, + "352": 2.404001474380493, + "353": 2.3936820030212402, + "354": 2.4074127674102783, + "355": 2.3702239990234375, + "356": 2.388772487640381, + "357": 2.4222207069396973, + "358": 2.428964614868164, + "359": 2.4855432510375977, + "360": 2.354668140411377, + "361": 2.3889927864074707, + "362": 2.376720905303955, + "363": 2.4616425037384033, + "364": 2.355320453643799, + "365": 2.373386859893799, + "366": 2.377262592315674, + "367": 2.412806272506714, + "368": 2.368696689605713, + "369": 2.365194320678711, + "370": 2.3522796630859375, + "371": 2.442009925842285, + "372": 2.398568868637085, + "373": 2.4214911460876465, + "374": 2.4210433959960938, + "375": 2.3750839233398438, + "376": 2.3622050285339355, + "377": 2.3980607986450195 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 0.424959, + 0.438952, + 0.437109, + 0.435747, + 0.428828, + 0.403808, + 0.430684, + 0.427204, + 0.423996, + 0.424002, + 0.425396, + 0.406286, + 0.42373, + 0.435094, + 0.42164, + 0.425738, + 0.426318, + 0.423545, + 0.412043, + 0.425371, + 0.429647, + 0.410754, + 0.429951, + 0.435772, + 0.396516, + 0.436066, + 0.437828, + 0.434486, + 0.432189, + 0.429419, + 0.434363, + 0.433017, + 0.413891, + 0.434248, + 0.407846, + 0.418913, + 0.434727, + 0.432243, + 0.417515, + 0.423513, + 0.43524, + 0.43714, + 0.426289, + 0.431914, + 0.43562, + 0.437947, + 0.426196, + 0.426461, + 0.420442, + 0.40547, + 0.430631, + 0.430193, + 0.422914, + 0.424426 + ], + "train_epoch_time": 4.844985723495483, + "train_loss": 2.356054432936002, + "train_score": 0.3018673781513997, + "val_loss": 2.3983173707048873, + "val_score": 0.295749318996609 + }, + { + "epoch": 7, + "grad_norm": 1.005070447921753, + "learning_rate": 0.464, + "model_norm": 87.54090118408203, + "step_logs": { + "grad_norm": { + "378": 0.8268588781356812, + "379": 0.8470097780227661, + "380": 0.7971072196960449, + "381": 0.834544837474823, + "382": 0.8371056318283081, + "383": 0.8414035439491272, + "384": 1.1593236923217773, + "385": 1.146713376045227, + "386": 0.931350827217102, + "387": 1.0470621585845947, + "388": 0.9749681353569031, + "389": 1.0175281763076782, + "390": 0.828610897064209, + "391": 0.7926346659660339, + "392": 0.9615519642829895, + "393": 0.9542427062988281, + "394": 0.9155589938163757, + "395": 0.8437886834144592, + "396": 0.8221478462219238, + "397": 0.8341313600540161, + "398": 0.8109428286552429, + "399": 1.310091257095337, + "400": 0.8244293332099915, + "401": 0.8038738369941711, + "402": 0.9810850024223328, + "403": 1.0706017017364502, + "404": 0.8593807220458984, + "405": 0.7971512675285339, + "406": 0.8965023756027222, + "407": 0.930963933467865, + "408": 0.9270331859588623, + "409": 0.9097792506217957, + "410": 0.999769926071167, + "411": 0.8395422697067261, + "412": 0.7553023099899292, + "413": 0.7785903811454773, + "414": 0.9699762463569641, + "415": 0.9446796774864197, + "416": 0.8213992118835449, + "417": 0.8536132574081421, + "418": 0.9033850431442261, + "419": 0.8954663276672363, + "420": 0.9131744503974915, + "421": 0.9360767602920532, + "422": 1.1010725498199463, + "423": 0.9090158343315125, + "424": 0.7672095894813538, + "425": 0.7239177227020264, + "426": 0.7278748154640198, + "427": 0.8265972137451172, + "428": 0.8000284433364868, + "429": 0.7516077756881714, + "430": 0.7950315475463867, + "431": 1.005070447921753 + }, + "loss": { + "378": 2.362210273742676, + "379": 2.3281240463256836, + "380": 2.3612046241760254, + "381": 2.317265748977661, + "382": 2.3329477310180664, + "383": 2.329225540161133, + "384": 2.350372791290283, + "385": 2.429685354232788, + "386": 2.353789806365967, + "387": 2.381924867630005, + "388": 2.3688807487487793, + "389": 2.3693394660949707, + "390": 2.3619942665100098, + "391": 2.3304827213287354, + "392": 2.317772388458252, + "393": 2.375673294067383, + "394": 2.3185245990753174, + "395": 2.337268829345703, + "396": 2.327305555343628, + "397": 2.3292040824890137, + "398": 2.3386919498443604, + "399": 2.3263797760009766, + "400": 2.393317699432373, + "401": 2.337904214859009, + "402": 2.3414881229400635, + "403": 2.38059663772583, + "404": 2.3296093940734863, + "405": 2.328822612762451, + "406": 2.341782569885254, + "407": 2.3438730239868164, + "408": 2.3233118057250977, + "409": 2.35960054397583, + "410": 2.328158140182495, + "411": 2.367513418197632, + "412": 2.3184781074523926, + "413": 2.302306652069092, + "414": 2.326796054840088, + "415": 2.3582675457000732, + "416": 2.317800283432007, + "417": 2.3093037605285645, + "418": 2.299227714538574, + "419": 2.3459649085998535, + "420": 2.3045103549957275, + "421": 2.3308467864990234, + "422": 2.32253098487854, + "423": 2.372323513031006, + "424": 2.302820920944214, + "425": 2.297060012817383, + "426": 2.2756214141845703, + "427": 2.288423538208008, + "428": 2.2854299545288086, + "429": 2.292900323867798, + "430": 2.273482322692871, + "431": 2.306735038757324 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 0.434804, + 0.433041, + 0.436735, + 0.433755, + 0.433772, + 0.433436, + 0.409653, + 0.41224, + 0.427454, + 0.419233, + 0.424483, + 0.42129, + 0.434685, + 0.436688, + 0.424696, + 0.426109, + 0.428092, + 0.433373, + 0.434709, + 0.433928, + 0.435584, + 0.396187, + 0.435318, + 0.436038, + 0.423601, + 0.417378, + 0.432211, + 0.436376, + 0.429779, + 0.42734, + 0.427328, + 0.429081, + 0.42197, + 0.434023, + 0.438943, + 0.437288, + 0.424205, + 0.426551, + 0.434647, + 0.432351, + 0.428698, + 0.429909, + 0.428064, + 0.426778, + 0.413878, + 0.429308, + 0.438025, + 0.440675, + 0.440222, + 0.433941, + 0.435692, + 0.438912, + 0.435885, + 0.421206 + ], + "train_epoch_time": 4.845086574554443, + "train_loss": 2.326692772800987, + "train_score": 0.31942925027316404, + "val_loss": 2.37390440777714, + "val_score": 0.30585802925577393 + }, + { + "epoch": 8, + "grad_norm": 0.7360766530036926, + "learning_rate": 0.464, + "model_norm": 87.59782409667969, + "step_logs": { + "grad_norm": { + "432": 0.9492727518081665, + "433": 0.9889562129974365, + "434": 0.9355610609054565, + "435": 0.8517712950706482, + "436": 1.0749973058700562, + "437": 0.8070528507232666, + "438": 0.6129425168037415, + "439": 0.7382088899612427, + "440": 0.8592383861541748, + "441": 0.9270436763763428, + "442": 1.020334243774414, + "443": 1.0437281131744385, + "444": 1.07532799243927, + "445": 0.8778788447380066, + "446": 0.7227190732955933, + "447": 0.699779748916626, + "448": 0.7732878923416138, + "449": 0.8422675132751465, + "450": 0.8751144409179688, + "451": 1.074497938156128, + "452": 0.9190720319747925, + "453": 0.7915751934051514, + "454": 0.9204183220863342, + "455": 0.9571609497070312, + "456": 0.8995140790939331, + "457": 0.9322042465209961, + "458": 1.0969650745391846, + "459": 0.8717686533927917, + "460": 0.8497338891029358, + "461": 1.1798006296157837, + "462": 0.9829171895980835, + "463": 0.6557402014732361, + "464": 0.7098110914230347, + "465": 0.820646345615387, + "466": 0.8885836005210876, + "467": 0.8325942158699036, + "468": 0.8422518372535706, + "469": 0.9479615092277527, + "470": 0.8833321928977966, + "471": 0.8008051514625549, + "472": 0.7530004382133484, + "473": 0.8229758739471436, + "474": 0.9000000953674316, + "475": 0.9692131876945496, + "476": 1.2287533283233643, + "477": 1.1396442651748657, + "478": 0.8824846744537354, + "479": 0.8060889840126038, + "480": 0.8273478746414185, + "481": 0.9048917889595032, + "482": 0.9326873421669006, + "483": 0.8799415230751038, + "484": 0.7800669074058533, + "485": 0.7360766530036926 + }, + "loss": { + "432": 2.342108726501465, + "433": 2.3036346435546875, + "434": 2.3311920166015625, + "435": 2.299793243408203, + "436": 2.322309970855713, + "437": 2.326324939727783, + "438": 2.2880859375, + "439": 2.242396831512451, + "440": 2.3136098384857178, + "441": 2.305769920349121, + "442": 2.302980899810791, + "443": 2.3291239738464355, + "444": 2.318730354309082, + "445": 2.327669143676758, + "446": 2.2609777450561523, + "447": 2.237064838409424, + "448": 2.2484545707702637, + "449": 2.2642552852630615, + "450": 2.2931790351867676, + "451": 2.2730207443237305, + "452": 2.3069095611572266, + "453": 2.260981559753418, + "454": 2.275710105895996, + "455": 2.2993481159210205, + "456": 2.274301528930664, + "457": 2.3036623001098633, + "458": 2.2984368801116943, + "459": 2.306331157684326, + "460": 2.2610087394714355, + "461": 2.2936747074127197, + "462": 2.3422675132751465, + "463": 2.234441041946411, + "464": 2.240450620651245, + "465": 2.2709479331970215, + "466": 2.2742180824279785, + "467": 2.267306327819824, + "468": 2.2503585815429688, + "469": 2.2720706462860107, + "470": 2.283860206604004, + "471": 2.2249021530151367, + "472": 2.2511680126190186, + "473": 2.227466106414795, + "474": 2.250838279724121, + "475": 2.2631845474243164, + "476": 2.273127555847168, + "477": 2.3066482543945312, + "478": 2.2900753021240234, + "479": 2.240640163421631, + "480": 2.2646498680114746, + "481": 2.2605137825012207, + "482": 2.2660460472106934, + "483": 2.233567476272583, + "484": 2.2607524394989014, + "485": 2.243975877761841 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 0.425977, + 0.422395, + 0.426821, + 0.432356, + 0.415977, + 0.435699, + 0.446973, + 0.439235, + 0.432017, + 0.427071, + 0.419956, + 0.41858, + 0.415884, + 0.430901, + 0.440397, + 0.441575, + 0.437035, + 0.432558, + 0.430635, + 0.415086, + 0.42767, + 0.435969, + 0.427112, + 0.424738, + 0.428622, + 0.42666, + 0.413746, + 0.431047, + 0.431994, + 0.406736, + 0.423476, + 0.44417, + 0.440993, + 0.434131, + 0.429412, + 0.433267, + 0.432378, + 0.425002, + 0.429923, + 0.434917, + 0.438383, + 0.433425, + 0.428246, + 0.423243, + 0.402046, + 0.41039, + 0.430069, + 0.43475, + 0.433595, + 0.428029, + 0.426055, + 0.42946, + 0.436728, + 0.439387 + ], + "train_epoch_time": 4.845744371414185, + "train_loss": 2.243636061710812, + "train_score": 0.3346025376699576, + "val_loss": 2.3135698170119943, + "val_score": 0.3151236011568632 + }, + { + "epoch": 9, + "grad_norm": 0.843839168548584, + "learning_rate": 0.464, + "model_norm": 87.65990447998047, + "step_logs": { + "grad_norm": { + "486": 0.8283859491348267, + "487": 0.8237842321395874, + "488": 0.820415735244751, + "489": 0.8672774434089661, + "490": 0.8662886023521423, + "491": 0.8299398422241211, + "492": 0.7683955430984497, + "493": 0.7220995426177979, + "494": 0.6857521533966064, + "495": 0.8653535842895508, + "496": 0.8666710257530212, + "497": 0.8675562739372253, + "498": 1.0014952421188354, + "499": 1.167513370513916, + "500": 1.0229462385177612, + "501": 0.7397806644439697, + "502": 0.7110167741775513, + "503": 0.6809916496276855, + "504": 0.767841100692749, + "505": 0.7951772809028625, + "506": 0.7795646786689758, + "507": 0.8319077491760254, + "508": 0.9412534236907959, + "509": 0.9180862903594971, + "510": 0.9317770004272461, + "511": 0.9476799964904785, + "512": 0.9560616612434387, + "513": 0.849151074886322, + "514": 0.8399502038955688, + "515": 0.8634765148162842, + "516": 0.7798449397087097, + "517": 0.7874664664268494, + "518": 0.8707910776138306, + "519": 0.8813244104385376, + "520": 0.842995285987854, + "521": 0.8618345856666565, + "522": 0.8467002511024475, + "523": 0.9257267713546753, + "524": 1.0691838264465332, + "525": 0.8687216639518738, + "526": 0.6504631042480469, + "527": 0.6196876764297485, + "528": 0.7249985933303833, + "529": 0.71677166223526, + "530": 0.7958369255065918, + "531": 0.9018344879150391, + "532": 0.9132189154624939, + "533": 0.9065372347831726, + "534": 0.9300948977470398, + "535": 1.0274509191513062, + "536": 0.893096387386322, + "537": 0.8255204558372498, + "538": 0.8568991422653198, + "539": 0.843839168548584 + }, + "loss": { + "486": 2.2444119453430176, + "487": 2.234156370162964, + "488": 2.232797622680664, + "489": 2.238800525665283, + "490": 2.227388858795166, + "491": 2.2598471641540527, + "492": 2.2319464683532715, + "493": 2.2023682594299316, + "494": 2.222175359725952, + "495": 2.23241925239563, + "496": 2.2589008808135986, + "497": 2.2227718830108643, + "498": 2.2694079875946045, + "499": 2.227041721343994, + "500": 2.2854886054992676, + "501": 2.2369472980499268, + "502": 2.218459129333496, + "503": 2.21677303314209, + "504": 2.232786178588867, + "505": 2.2118406295776367, + "506": 2.2002336978912354, + "507": 2.218547821044922, + "508": 2.221189260482788, + "509": 2.2538862228393555, + "510": 2.2390835285186768, + "511": 2.2270569801330566, + "512": 2.2375235557556152, + "513": 2.273449659347534, + "514": 2.2211389541625977, + "515": 2.2329139709472656, + "516": 2.198747396469116, + "517": 2.2132506370544434, + "518": 2.2233290672302246, + "519": 2.2301084995269775, + "520": 2.231362819671631, + "521": 2.2087626457214355, + "522": 2.2147560119628906, + "523": 2.2510318756103516, + "524": 2.2430527210235596, + "525": 2.26008939743042, + "526": 2.2032294273376465, + "527": 2.142625570297241, + "528": 2.169355630874634, + "529": 2.1630747318267822, + "530": 2.2113561630249023, + "531": 2.1988039016723633, + "532": 2.225597858428955, + "533": 2.230647087097168, + "534": 2.205254316329956, + "535": 2.188803195953369, + "536": 2.2361598014831543, + "537": 2.183591842651367, + "538": 2.192451000213623, + "539": 2.1907026767730713 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 0.433267, + 0.433455, + 0.43367, + 0.430449, + 0.43036, + 0.433356, + 0.43717, + 0.43984, + 0.442286, + 0.430498, + 0.430769, + 0.430204, + 0.420848, + 0.406305, + 0.419446, + 0.439078, + 0.440701, + 0.442522, + 0.437216, + 0.43514, + 0.436057, + 0.432686, + 0.4247, + 0.426957, + 0.425704, + 0.424303, + 0.423832, + 0.432198, + 0.432154, + 0.43064, + 0.436021, + 0.43568, + 0.429978, + 0.42931, + 0.432075, + 0.43042, + 0.431589, + 0.426344, + 0.414939, + 0.430639, + 0.444209, + 0.445477, + 0.439306, + 0.439767, + 0.435089, + 0.427329, + 0.426889, + 0.427463, + 0.425294, + 0.417306, + 0.428537, + 0.432672, + 0.430547, + 0.431464 + ], + "train_epoch_time": 4.845773220062256, + "train_loss": 2.203148044985712, + "train_score": 0.35310146151006305, + "val_loss": 2.276024468867568, + "val_score": 0.3372829365880837 + }, + { + "epoch": 10, + "grad_norm": 1.0875672101974487, + "learning_rate": 0.464, + "model_norm": 87.72252655029297, + "step_logs": { + "grad_norm": { + "540": 0.837222158908844, + "541": 0.7835797667503357, + "542": 0.7610028982162476, + "543": 0.7669121623039246, + "544": 0.8301094174385071, + "545": 0.9284971952438354, + "546": 0.8918527960777283, + "547": 0.866207480430603, + "548": 0.8836102485656738, + "549": 0.893557608127594, + "550": 0.878462553024292, + "551": 0.8611385822296143, + "552": 0.9313326478004456, + "553": 0.9181557297706604, + "554": 0.8733949065208435, + "555": 0.8756493926048279, + "556": 0.9271297454833984, + "557": 0.955710232257843, + "558": 0.9466307759284973, + "559": 0.8552358746528625, + "560": 0.9055444598197937, + "561": 0.8739283084869385, + "562": 0.8171818256378174, + "563": 0.8210787773132324, + "564": 0.827390193939209, + "565": 0.7717651128768921, + "566": 0.7284547090530396, + "567": 0.7656211256980896, + "568": 0.744737982749939, + "569": 0.8547093868255615, + "570": 0.9206417798995972, + "571": 1.081030011177063, + "572": 1.1768319606781006, + "573": 0.866636335849762, + "574": 0.8268339037895203, + "575": 0.8542226552963257, + "576": 0.9271738529205322, + "577": 0.9265819191932678, + "578": 0.9929571747779846, + "579": 0.9603354334831238, + "580": 0.899196207523346, + "581": 0.8540847897529602, + "582": 0.8603414297103882, + "583": 0.8398193120956421, + "584": 0.8098708391189575, + "585": 0.8223497867584229, + "586": 0.8144658207893372, + "587": 0.7410102486610413, + "588": 0.706733226776123, + "589": 0.7647109627723694, + "590": 0.8074276447296143, + "591": 0.8744938969612122, + "592": 0.9778786301612854, + "593": 1.0875672101974487 + }, + "loss": { + "540": 2.205142021179199, + "541": 2.193326950073242, + "542": 2.2021360397338867, + "543": 2.196610927581787, + "544": 2.167433261871338, + "545": 2.186608076095581, + "546": 2.2147414684295654, + "547": 2.201941967010498, + "548": 2.2095420360565186, + "549": 2.1882946491241455, + "550": 2.214224338531494, + "551": 2.200875759124756, + "552": 2.1780171394348145, + "553": 2.20267391204834, + "554": 2.1953084468841553, + "555": 2.2122104167938232, + "556": 2.195798873901367, + "557": 2.194793701171875, + "558": 2.176562786102295, + "559": 2.2067105770111084, + "560": 2.146296501159668, + "561": 2.2106218338012695, + "562": 2.1835570335388184, + "563": 2.1711323261260986, + "564": 2.19504451751709, + "565": 2.1459617614746094, + "566": 2.1423821449279785, + "567": 2.15317702293396, + "568": 2.170269012451172, + "569": 2.1745219230651855, + "570": 2.202603816986084, + "571": 2.1873385906219482, + "572": 2.2740583419799805, + "573": 2.20631742477417, + "574": 2.1620872020721436, + "575": 2.154696464538574, + "576": 2.1710996627807617, + "577": 2.1926236152648926, + "578": 2.2058892250061035, + "579": 2.2136383056640625, + "580": 2.1933369636535645, + "581": 2.171816825866699, + "582": 2.1926708221435547, + "583": 2.145845413208008, + "584": 2.184480667114258, + "585": 2.1450212001800537, + "586": 2.1509454250335693, + "587": 2.1145853996276855, + "588": 2.1454873085021973, + "589": 2.128196954727173, + "590": 2.1574501991271973, + "591": 2.159390449523926, + "592": 2.1849117279052734, + "593": 2.183688163757324 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 0.432132, + 0.435703, + 0.437318, + 0.436862, + 0.432127, + 0.425115, + 0.428313, + 0.430006, + 0.428843, + 0.427788, + 0.429289, + 0.430359, + 0.424756, + 0.426161, + 0.429385, + 0.429466, + 0.425368, + 0.423146, + 0.423545, + 0.430867, + 0.426221, + 0.429568, + 0.43326, + 0.43282, + 0.432693, + 0.435929, + 0.438786, + 0.436435, + 0.438029, + 0.430451, + 0.425971, + 0.41283, + 0.406557, + 0.430037, + 0.432288, + 0.4302, + 0.424963, + 0.425359, + 0.420405, + 0.423105, + 0.427443, + 0.430457, + 0.4303, + 0.431125, + 0.433783, + 0.432375, + 0.433018, + 0.437635, + 0.440224, + 0.436193, + 0.433602, + 0.428771, + 0.42123, + 0.412201 + ], + "train_epoch_time": 4.845861434936523, + "train_loss": 2.2146974980232534, + "train_score": 0.3436345499078376, + "val_loss": 2.3004243965127027, + "val_score": 0.3174153278944276 + }, + { + "epoch": 11, + "grad_norm": 0.6724064946174622, + "learning_rate": 0.464, + "model_norm": 87.78770446777344, + "step_logs": { + "grad_norm": { + "594": 1.0580754280090332, + "595": 0.9212614893913269, + "596": 0.9642578363418579, + "597": 0.9247661828994751, + "598": 0.8898668885231018, + "599": 0.764549195766449, + "600": 0.7429081797599792, + "601": 0.809561014175415, + "602": 0.8214095830917358, + "603": 0.7443106174468994, + "604": 0.7142700552940369, + "605": 0.7997999787330627, + "606": 0.8533837199211121, + "607": 0.8903974890708923, + "608": 0.8421460390090942, + "609": 0.7767924070358276, + "610": 0.7984451055526733, + "611": 0.7904222011566162, + "612": 0.7703580856323242, + "613": 0.7888203859329224, + "614": 0.8999834060668945, + "615": 0.9519425630569458, + "616": 1.2772712707519531, + "617": 1.1403990983963013, + "618": 1.3531757593154907, + "619": 1.0410200357437134, + "620": 0.9213559031486511, + "621": 0.8604251146316528, + "622": 0.9573296308517456, + "623": 0.9610622525215149, + "624": 0.9523951411247253, + "625": 0.876518964767456, + "626": 0.8423910737037659, + "627": 0.8058450818061829, + "628": 0.7705907225608826, + "629": 0.8506250381469727, + "630": 0.927774965763092, + "631": 0.8788832426071167, + "632": 0.8165997266769409, + "633": 0.7511072754859924, + "634": 0.7412652373313904, + "635": 0.7697358727455139, + "636": 0.7523447871208191, + "637": 0.7207874059677124, + "638": 0.736746609210968, + "639": 0.7831798195838928, + "640": 0.782400906085968, + "641": 0.7954618334770203, + "642": 0.8386595249176025, + "643": 0.8775931596755981, + "644": 0.8895736336708069, + "645": 0.8963454961776733, + "646": 0.7848446369171143, + "647": 0.6724064946174622 + }, + "loss": { + "594": 2.2167015075683594, + "595": 2.191697835922241, + "596": 2.186465263366699, + "597": 2.1545634269714355, + "598": 2.1651735305786133, + "599": 2.123749256134033, + "600": 2.115161180496216, + "601": 2.1567606925964355, + "602": 2.1536784172058105, + "603": 2.1429710388183594, + "604": 2.1172914505004883, + "605": 2.147050380706787, + "606": 2.153956174850464, + "607": 2.1501283645629883, + "608": 2.1439740657806396, + "609": 2.1566271781921387, + "610": 2.159396171569824, + "611": 2.1103720664978027, + "612": 2.151888847351074, + "613": 2.110909938812256, + "614": 2.1499547958374023, + "615": 2.155078172683716, + "616": 2.1719589233398438, + "617": 2.2504055500030518, + "618": 2.256662607192993, + "619": 2.2294726371765137, + "620": 2.166477680206299, + "621": 2.1753811836242676, + "622": 2.1534321308135986, + "623": 2.1601600646972656, + "624": 2.170780658721924, + "625": 2.141709327697754, + "626": 2.1476693153381348, + "627": 2.162611961364746, + "628": 2.1294023990631104, + "629": 2.1327340602874756, + "630": 2.1269547939300537, + "631": 2.13181209564209, + "632": 2.134298324584961, + "633": 2.1343894004821777, + "634": 2.084883689880371, + "635": 2.1140456199645996, + "636": 2.1203877925872803, + "637": 2.126919746398926, + "638": 2.1183371543884277, + "639": 2.0925917625427246, + "640": 2.1059441566467285, + "641": 2.1270558834075928, + "642": 2.1192996501922607, + "643": 2.155471086502075, + "644": 2.113860607147217, + "645": 2.131554126739502, + "646": 2.135742425918579, + "647": 2.0920801162719727 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 0.415335, + 0.42575, + 0.422333, + 0.424875, + 0.427709, + 0.43615, + 0.437515, + 0.433443, + 0.432561, + 0.437746, + 0.439434, + 0.434001, + 0.430251, + 0.427435, + 0.430929, + 0.435717, + 0.434256, + 0.434179, + 0.436098, + 0.434299, + 0.426705, + 0.422758, + 0.395142, + 0.409145, + 0.390491, + 0.416976, + 0.425335, + 0.430046, + 0.422303, + 0.422126, + 0.422995, + 0.428351, + 0.430964, + 0.433781, + 0.435805, + 0.430144, + 0.424175, + 0.42802, + 0.43264, + 0.43719, + 0.437264, + 0.435672, + 0.43694, + 0.439115, + 0.437964, + 0.434456, + 0.434686, + 0.434044, + 0.430828, + 0.428481, + 0.426921, + 0.426688, + 0.4349, + 0.441846 + ], + "train_epoch_time": 4.8454203605651855, + "train_loss": 2.098674478489836, + "train_score": 0.38179698715593074, + "val_loss": 2.205912221039204, + "val_score": 0.35292587512259366 + }, + { + "epoch": 12, + "grad_norm": 0.5794128179550171, + "learning_rate": 0.464, + "model_norm": 87.84599304199219, + "step_logs": { + "grad_norm": { + "648": 0.7280871868133545, + "649": 0.7949674129486084, + "650": 0.8373463749885559, + "651": 0.8505271673202515, + "652": 0.8080264925956726, + "653": 0.7680744528770447, + "654": 0.7920119762420654, + "655": 0.7798787951469421, + "656": 0.8150591850280762, + "657": 0.8534115552902222, + "658": 0.9188917875289917, + "659": 0.9360884428024292, + "660": 0.914743185043335, + "661": 0.9153463840484619, + "662": 0.7891954183578491, + "663": 0.6947863698005676, + "664": 0.7170270681381226, + "665": 0.720542311668396, + "666": 0.6777104735374451, + "667": 0.6016910076141357, + "668": 0.6527369618415833, + "669": 0.6453735828399658, + "670": 0.6522302031517029, + "671": 0.7421714663505554, + "672": 0.7705524563789368, + "673": 0.773090124130249, + "674": 0.6778600215911865, + "675": 0.6144305467605591, + "676": 0.6275186538696289, + "677": 0.6657248139381409, + "678": 0.76212477684021, + "679": 0.7866000533103943, + "680": 0.7449837327003479, + "681": 0.6946861147880554, + "682": 0.713527262210846, + "683": 0.7362378835678101, + "684": 0.733871340751648, + "685": 0.723556637763977, + "686": 0.6378058195114136, + "687": 0.5687558054924011, + "688": 0.5698312520980835, + "689": 0.56536465883255, + "690": 0.5483888387680054, + "691": 0.5366402268409729, + "692": 0.6047871112823486, + "693": 0.6195173859596252, + "694": 0.6775323748588562, + "695": 0.7518928647041321, + "696": 0.7077875733375549, + "697": 0.6100155115127563, + "698": 0.5949641466140747, + "699": 0.6668450236320496, + "700": 0.6424546837806702, + "701": 0.5794128179550171 + }, + "loss": { + "648": 2.094564437866211, + "649": 2.105900764465332, + "650": 2.07905912399292, + "651": 2.12368106842041, + "652": 2.1148014068603516, + "653": 2.090708017349243, + "654": 2.1004831790924072, + "655": 2.1072311401367188, + "656": 2.1105024814605713, + "657": 2.0993008613586426, + "658": 2.109952688217163, + "659": 2.1300911903381348, + "660": 2.124464511871338, + "661": 2.088480234146118, + "662": 2.138444185256958, + "663": 2.117091178894043, + "664": 2.0913760662078857, + "665": 2.10170841217041, + "666": 2.056595802307129, + "667": 2.0336270332336426, + "668": 2.066512107849121, + "669": 2.0774474143981934, + "670": 2.0543394088745117, + "671": 2.084582805633545, + "672": 2.090348243713379, + "673": 2.061830520629883, + "674": 2.0692152976989746, + "675": 2.0532665252685547, + "676": 2.0165762901306152, + "677": 2.047377824783325, + "678": 2.040558338165283, + "679": 2.086064338684082, + "680": 2.0734877586364746, + "681": 2.067492961883545, + "682": 2.0537562370300293, + "683": 2.065237522125244, + "684": 2.0659971237182617, + "685": 2.0526604652404785, + "686": 2.0711114406585693, + "687": 2.083730697631836, + "688": 2.033512592315674, + "689": 2.0273308753967285, + "690": 2.0299129486083984, + "691": 2.0306572914123535, + "692": 2.001108169555664, + "693": 1.996739387512207, + "694": 2.0321741104125977, + "695": 2.06087327003479, + "696": 2.025920867919922, + "697": 2.066075325012207, + "698": 2.0253281593322754, + "699": 2.0574288368225098, + "700": 2.0433220863342285, + "701": 2.0183725357055664 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 0.438266, + 0.431293, + 0.425399, + 0.422627, + 0.422994, + 0.422852, + 0.418869, + 0.41722, + 0.412454, + 0.407264, + 0.400472, + 0.397163, + 0.396115, + 0.393113, + 0.399255, + 0.401752, + 0.397729, + 0.395044, + 0.394286, + 0.395173, + 0.39035, + 0.388138, + 0.385003, + 0.378229, + 0.374251, + 0.37128, + 0.373382, + 0.373394, + 0.36994, + 0.365854, + 0.358769, + 0.355432, + 0.354672, + 0.354207, + 0.350682, + 0.34718, + 0.344676, + 0.342392, + 0.34319, + 0.342919, + 0.339948, + 0.337351, + 0.33517, + 0.332796, + 0.327849, + 0.324655, + 0.32023, + 0.315145, + 0.313848, + 0.314488, + 0.312042, + 0.307366, + 0.305345, + 0.304297 + ], + "train_epoch_time": 4.844924688339233, + "train_loss": 2.0192010106452054, + "train_score": 0.4027382980460245, + "val_loss": 2.138548207200901, + "val_score": 0.3692460177823405 + }, + { + "epoch": 13, + "grad_norm": 0.435800164937973, + "learning_rate": 0.3093333333333334, + "model_norm": 87.88268280029297, + "step_logs": { + "grad_norm": { + "702": 0.6073912978172302, + "703": 0.6126645803451538, + "704": 0.5555345416069031, + "705": 0.5069809556007385, + "706": 0.48485493659973145, + "707": 0.5341897010803223, + "708": 0.5930553674697876, + "709": 0.5822255611419678, + "710": 0.5164859890937805, + "711": 0.482624351978302, + "712": 0.49150392413139343, + "713": 0.5376775860786438, + "714": 0.5280558466911316, + "715": 0.5728492736816406, + "716": 0.5391548275947571, + "717": 0.5202983617782593, + "718": 0.5325655341148376, + "719": 0.5430764555931091, + "720": 0.4934731125831604, + "721": 0.43142956495285034, + "722": 0.461704283952713, + "723": 0.4697701632976532, + "724": 0.490200012922287, + "725": 0.5546342730522156, + "726": 0.5595705509185791, + "727": 0.5540966987609863, + "728": 0.5454463958740234, + "729": 0.5378643274307251, + "730": 0.4859277606010437, + "731": 0.48428964614868164, + "732": 0.4919300377368927, + "733": 0.47724759578704834, + "734": 0.43840646743774414, + "735": 0.41453710198402405, + "736": 0.436782568693161, + "737": 0.44236692786216736, + "738": 0.40313801169395447, + "739": 0.42692598700523376, + "740": 0.4014340937137604, + "741": 0.42544159293174744, + "742": 0.427396297454834, + "743": 0.4532626271247864, + "744": 0.4493344724178314, + "745": 0.4231850504875183, + "746": 0.43626317381858826, + "747": 0.4043489396572113, + "748": 0.421360045671463, + "749": 0.3930183947086334, + "750": 0.39664310216903687, + "751": 0.40400469303131104, + "752": 0.4585326611995697, + "753": 0.4349212646484375, + "754": 0.4583161175251007, + "755": 0.435800164937973 + }, + "loss": { + "702": 2.0058786869049072, + "703": 2.018357276916504, + "704": 1.9854450225830078, + "705": 1.9827589988708496, + "706": 2.0101265907287598, + "707": 2.011603355407715, + "708": 2.0356409549713135, + "709": 2.007253646850586, + "710": 2.01055908203125, + "711": 2.0131640434265137, + "712": 2.0131869316101074, + "713": 2.0186500549316406, + "714": 1.9881646633148193, + "715": 2.021908760070801, + "716": 1.9943209886550903, + "717": 2.017972946166992, + "718": 2.008711338043213, + "719": 2.007040500640869, + "720": 2.002372980117798, + "721": 1.9961776733398438, + "722": 1.988890528678894, + "723": 1.9769232273101807, + "724": 1.9916582107543945, + "725": 1.9797203540802002, + "726": 1.9986188411712646, + "727": 1.9732967615127563, + "728": 1.9990925788879395, + "729": 2.0032875537872314, + "730": 2.014237880706787, + "731": 1.9672949314117432, + "732": 2.0122385025024414, + "733": 1.9859099388122559, + "734": 1.9855337142944336, + "735": 1.976871371269226, + "736": 1.9841015338897705, + "737": 1.9613080024719238, + "738": 1.9689198732376099, + "739": 1.9993987083435059, + "740": 2.0084848403930664, + "741": 1.9688761234283447, + "742": 2.0016400814056396, + "743": 2.0074009895324707, + "744": 1.9846971035003662, + "745": 2.0019288063049316, + "746": 1.977049708366394, + "747": 1.9792331457138062, + "748": 1.9722143411636353, + "749": 1.9319303035736084, + "750": 1.968343734741211, + "751": 1.956522822380066, + "752": 1.9476523399353027, + "753": 1.959883451461792, + "754": 1.953392744064331, + "755": 1.9551382064819336 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 0.300777, + 0.297978, + 0.296606, + 0.294991, + 0.292777, + 0.288966, + 0.284956, + 0.282386, + 0.281079, + 0.278979, + 0.276043, + 0.272408, + 0.269761, + 0.26622, + 0.264053, + 0.261695, + 0.258694, + 0.255744, + 0.2538, + 0.25192, + 0.24869, + 0.245767, + 0.242714, + 0.238937, + 0.236142, + 0.233412, + 0.23083, + 0.228178, + 0.226099, + 0.22326, + 0.220446, + 0.217789, + 0.21541, + 0.212828, + 0.209813, + 0.206928, + 0.204482, + 0.20149, + 0.198894, + 0.195851, + 0.19305, + 0.190035, + 0.187237, + 0.184635, + 0.181705, + 0.17911, + 0.176172, + 0.173504, + 0.17068, + 0.167805, + 0.164647, + 0.161983, + 0.159024, + 0.156335 + ], + "train_epoch_time": 4.8451550006866455, + "train_loss": 1.964778862451036, + "train_score": 0.4169980720690367, + "val_loss": 2.096801211447995, + "val_score": 0.3804714407014792 + }, + { + "epoch": 14, + "grad_norm": 0.34223562479019165, + "learning_rate": 0.1546666666666667, + "model_norm": 87.89469909667969, + "step_logs": { + "grad_norm": { + "756": 0.42412814497947693, + "757": 0.4141750931739807, + "758": 0.4284485876560211, + "759": 0.4527234435081482, + "760": 0.4311065375804901, + "761": 0.3678427040576935, + "762": 0.38308480381965637, + "763": 0.368010014295578, + "764": 0.404380738735199, + "765": 0.41867321729660034, + "766": 0.40771737694740295, + "767": 0.3747258484363556, + "768": 0.41306740045547485, + "769": 0.4015268385410309, + "770": 0.40441352128982544, + "771": 0.3970872759819031, + "772": 0.3616786599159241, + "773": 0.41753914952278137, + "774": 0.35900408029556274, + "775": 0.37756288051605225, + "776": 0.3805305063724518, + "777": 0.37806326150894165, + "778": 0.3603123724460602, + "779": 0.39038917422294617, + "780": 0.378569632768631, + "781": 0.37287214398384094, + "782": 0.39142125844955444, + "783": 0.4298277199268341, + "784": 0.37450164556503296, + "785": 0.3769410252571106, + "786": 0.3681488335132599, + "787": 0.3858988881111145, + "788": 0.38970568776130676, + "789": 0.3641791045665741, + "790": 0.3625100553035736, + "791": 0.3731885254383087, + "792": 0.3449625074863434, + "793": 0.35954350233078003, + "794": 0.36654889583587646, + "795": 0.3550361394882202, + "796": 0.36998769640922546, + "797": 0.35033097863197327, + "798": 0.34985867142677307, + "799": 0.36207813024520874, + "800": 0.340498685836792, + "801": 0.34276333451271057, + "802": 0.3665873408317566, + "803": 0.3412649631500244, + "804": 0.36181315779685974, + "805": 0.3816055357456207, + "806": 0.3566543459892273, + "807": 0.3667950928211212, + "808": 0.38622111082077026, + "809": 0.34223562479019165 + }, + "loss": { + "756": 1.9722751379013062, + "757": 1.9607598781585693, + "758": 1.961469054222107, + "759": 1.9577608108520508, + "760": 1.948209524154663, + "761": 1.965173602104187, + "762": 1.9657812118530273, + "763": 1.9519789218902588, + "764": 1.9616801738739014, + "765": 1.9620543718338013, + "766": 1.9820327758789062, + "767": 1.9642672538757324, + "768": 1.9898093938827515, + "769": 1.9398128986358643, + "770": 1.9555704593658447, + "771": 1.9282821416854858, + "772": 1.9710948467254639, + "773": 1.9526134729385376, + "774": 1.978273630142212, + "775": 1.9627265930175781, + "776": 1.954703688621521, + "777": 1.9501171112060547, + "778": 1.9729516506195068, + "779": 1.964715838432312, + "780": 1.9740839004516602, + "781": 1.9470267295837402, + "782": 1.9209100008010864, + "783": 1.9696826934814453, + "784": 1.9587608575820923, + "785": 1.972074031829834, + "786": 1.923396348953247, + "787": 1.9671581983566284, + "788": 1.9669079780578613, + "789": 1.9630398750305176, + "790": 1.9407529830932617, + "791": 1.9612245559692383, + "792": 1.9570255279541016, + "793": 1.9639378786087036, + "794": 1.963686227798462, + "795": 1.9576197862625122, + "796": 1.936382532119751, + "797": 1.9589039087295532, + "798": 1.957580327987671, + "799": 1.9364898204803467, + "800": 1.9418058395385742, + "801": 1.949873924255371, + "802": 1.9419589042663574, + "803": 1.9290494918823242, + "804": 1.9263761043548584, + "805": 1.9481416940689087, + "806": 1.9577698707580566, + "807": 1.9249539375305176, + "808": 1.9796171188354492, + "809": 1.9654319286346436 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 0.153583, + 0.150801, + 0.147907, + 0.144966, + 0.142238, + 0.139671, + 0.13678, + 0.133992, + 0.131034, + 0.128151, + 0.125362, + 0.122621, + 0.119679, + 0.116862, + 0.114022, + 0.111196, + 0.108448, + 0.105476, + 0.102766, + 0.0998833, + 0.0970327, + 0.0941923, + 0.0913788, + 0.0884854, + 0.0856588, + 0.0828161, + 0.0799419, + 0.0770539, + 0.0742711, + 0.0714207, + 0.0685747, + 0.0657127, + 0.0628594, + 0.0600262, + 0.0571731, + 0.0543148, + 0.0514749, + 0.0486135, + 0.0457554, + 0.0429036, + 0.040042, + 0.0371912, + 0.0343335, + 0.0314726, + 0.0286175, + 0.0257578, + 0.0228954, + 0.0200373, + 0.0171752, + 0.0143133, + 0.0114525, + 0.00859001, + 0.00572716, + 0.00286395 + ], + "train_epoch_time": 4.844804048538208, + "train_loss": 1.9481386121068487, + "train_score": 0.42153985841544495, + "val_loss": 2.0832262726246147, + "val_score": 0.38454811286981017 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:32:08.945666", + "final_model_norm": 87.89469909667969, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:30:27.302467", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 3.4953649044036865, + "learning_rate": 4.64e-11, + "model_norm": 87.41687774658203, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.632101535797119, + "3": 9.477651596069336, + "4": 15.083366394042969, + "5": 6.223578929901123, + "6": 4.399288654327393, + "7": 6.111652851104736, + "8": 4.110215187072754, + "9": 6.012516498565674, + "10": 4.587062835693359, + "11": 4.777608871459961, + "12": 7.2164435386657715, + "13": 9.568192481994629, + "14": 3.8904850482940674, + "15": 5.508795261383057, + "16": 9.25052261352539, + "17": 6.2970147132873535, + "18": 10.49487590789795, + "19": 10.287344932556152, + "20": 13.054203033447266, + "21": 3.6671366691589355, + "22": 15.062812805175781, + "23": 6.225735187530518, + "24": 7.710809707641602, + "25": 7.015965461730957, + "26": 4.360213756561279, + "27": 16.566226959228516, + "28": 7.702916622161865, + "29": 9.260043144226074, + "30": 4.714838027954102, + "31": 3.2972371578216553, + "32": 5.28471040725708, + "33": 2.4057743549346924, + "34": 5.037460803985596, + "35": 7.777568340301514, + "36": 5.691040515899658, + "37": 5.113794326782227, + "38": 5.331128120422363, + "39": 23.418071746826172, + "40": 13.3868408203125, + "41": 3.1640076637268066, + "42": 8.675661087036133, + "43": 4.468127250671387, + "44": 4.63825798034668, + "45": 5.239930152893066, + "46": 8.783048629760742, + "47": 7.4971137046813965, + "48": 4.174231052398682, + "49": 2.8159408569335938, + "50": 4.93419885635376, + "51": 3.471348524093628, + "52": 7.316342353820801, + "53": 3.4953649044036865 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.8128108978271484, + "3": 3.8596410751342773, + "4": 4.148884296417236, + "5": 4.280588150024414, + "6": 3.5731735229492188, + "7": 3.730853796005249, + "8": 3.9655306339263916, + "9": 3.49727725982666, + "10": 3.813404083251953, + "11": 3.6488544940948486, + "12": 6.760714530944824, + "13": 3.6842222213745117, + "14": 4.1691131591796875, + "15": 3.6978936195373535, + "16": 4.105098247528076, + "17": 5.215545654296875, + "18": 4.9595465660095215, + "19": 4.571078300476074, + "20": 4.288591384887695, + "21": 3.841993570327759, + "22": 4.388175964355469, + "23": 3.681670665740967, + "24": 3.673552989959717, + "25": 3.8043699264526367, + "26": 4.5079240798950195, + "27": 5.53628396987915, + "28": 3.921935796737671, + "29": 4.44952917098999, + "30": 3.9125852584838867, + "31": 4.293788433074951, + "32": 3.923111915588379, + "33": 3.471259593963623, + "34": 3.690646171569824, + "35": 4.4711809158325195, + "36": 4.153311729431152, + "37": 3.850543975830078, + "38": 3.814574956893921, + "39": 5.762028694152832, + "40": 5.557949066162109, + "41": 3.583878755569458, + "42": 4.019481658935547, + "43": 4.102827072143555, + "44": 4.205428123474121, + "45": 4.144927024841309, + "46": 4.410046577453613, + "47": 4.4519453048706055, + "48": 3.869866371154785, + "49": 3.4741594791412354, + "50": 3.508986473083496, + "51": 3.837757110595703, + "52": 4.058722019195557, + "53": 3.5802788734436035 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 4.64e-11, + 0.00606328, + 0.0167652, + 0.0210278, + 0.0183967, + 0.0383495, + 0.048384, + 0.0490197, + 0.0641029, + 0.0583378, + 0.0738842, + 0.0773754, + 0.0779342, + 0.0482771, + 0.105127, + 0.0885962, + 0.0582833, + 0.0986183, + 0.0585115, + 0.0579794, + 0.0395945, + 0.145318, + 0.03252, + 0.100512, + 0.0794757, + 0.0927668, + 0.159917, + 0.0347504, + 0.0876193, + 0.074898, + 0.155455, + 0.210878, + 0.144365, + 0.243957, + 0.151349, + 0.101592, + 0.145088, + 0.158525, + 0.152414, + 0.0198606, + 0.0531471, + 0.248452, + 0.0838331, + 0.202471, + 0.199725, + 0.175231, + 0.0901816, + 0.116207, + 0.222408, + 0.299368, + 0.1778, + 0.268447, + 0.114293, + 0.258973 + ], + "train_epoch_time": 4.845700025558472, + "train_loss": 3.511465792197579, + "train_score": 0.14736706425931567, + "val_loss": 3.5557357807794476, + "val_score": 0.14010924961413637 + }, + { + "epoch": 1, + "grad_norm": 1.9664967060089111, + "learning_rate": 0.464, + "model_norm": 87.36225891113281, + "step_logs": { + "grad_norm": { + "54": 5.270796298980713, + "55": 4.202520370483398, + "56": 10.859441757202148, + "57": 4.501511096954346, + "58": 4.667898178100586, + "59": 15.348048210144043, + "60": 8.129271507263184, + "61": 6.9826531410217285, + "62": 2.8545353412628174, + "63": 8.056032180786133, + "64": 5.776970863342285, + "65": 4.928697109222412, + "66": 2.7388803958892822, + "67": 6.421074390411377, + "68": 3.449686050415039, + "69": 4.1241865158081055, + "70": 8.620981216430664, + "71": 2.5956056118011475, + "72": 4.451788425445557, + "73": 4.842113018035889, + "74": 3.030059814453125, + "75": 2.548440933227539, + "76": 3.4492027759552, + "77": 3.1907198429107666, + "78": 3.236433506011963, + "79": 1.7249805927276611, + "80": 2.1449711322784424, + "81": 4.539532661437988, + "82": 2.4484031200408936, + "83": 4.745017051696777, + "84": 2.536980152130127, + "85": 1.7241555452346802, + "86": 2.8588666915893555, + "87": 2.6496448516845703, + "88": 3.7677969932556152, + "89": 2.689509391784668, + "90": 1.9279940128326416, + "91": 2.6054961681365967, + "92": 2.0998308658599854, + "93": 4.391077995300293, + "94": 1.9211870431900024, + "95": 1.905600905418396, + "96": 2.057025194168091, + "97": 2.375565767288208, + "98": 2.6710333824157715, + "99": 1.7314995527267456, + "100": 1.4255518913269043, + "101": 2.0370473861694336, + "102": 4.326293468475342, + "103": 1.6464062929153442, + "104": 2.397109031677246, + "105": 1.6464992761611938, + "106": 2.808077096939087, + "107": 1.9664967060089111 + }, + "loss": { + "54": 3.531726121902466, + "55": 3.464325428009033, + "56": 4.565157890319824, + "57": 4.1141228675842285, + "58": 4.4550676345825195, + "59": 4.6623029708862305, + "60": 4.204016208648682, + "61": 3.8621020317077637, + "62": 3.56404972076416, + "63": 3.872037410736084, + "64": 3.9847793579101562, + "65": 4.252335548400879, + "66": 3.3278677463531494, + "67": 4.12910795211792, + "68": 3.715725898742676, + "69": 3.4431114196777344, + "70": 4.531065940856934, + "71": 3.4832961559295654, + "72": 3.4611239433288574, + "73": 3.645770311355591, + "74": 3.7510828971862793, + "75": 3.6006414890289307, + "76": 3.3778152465820312, + "77": 3.5806689262390137, + "78": 3.6561760902404785, + "79": 3.119947910308838, + "80": 3.093405246734619, + "81": 3.556382179260254, + "82": 3.2736024856567383, + "83": 3.7718029022216797, + "84": 3.239236354827881, + "85": 3.1432065963745117, + "86": 3.0772080421447754, + "87": 3.390064239501953, + "88": 3.5369338989257812, + "89": 3.340794563293457, + "90": 3.168405771255493, + "91": 3.1797964572906494, + "92": 3.1759305000305176, + "93": 3.412914276123047, + "94": 3.1868836879730225, + "95": 2.9689383506774902, + "96": 3.100706100463867, + "97": 3.0078396797180176, + "98": 3.190727710723877, + "99": 3.143548011779785, + "100": 2.801726818084717, + "101": 2.9404473304748535, + "102": 3.4823737144470215, + "103": 3.0103201866149902, + "104": 2.8882369995117188, + "105": 3.028944253921509, + "106": 2.9876348972320557, + "107": 3.1696414947509766 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.16425, + 0.212577, + 0.0663517, + 0.21655, + 0.217362, + 0.0364729, + 0.099851, + 0.118099, + 0.303186, + 0.0949151, + 0.15766, + 0.199541, + 0.30467, + 0.139903, + 0.266204, + 0.216209, + 0.0965579, + 0.320283, + 0.199276, + 0.186196, + 0.295947, + 0.327115, + 0.255348, + 0.27958, + 0.278737, + 0.379934, + 0.344966, + 0.197925, + 0.32565, + 0.194558, + 0.317596, + 0.38051, + 0.287094, + 0.313417, + 0.240267, + 0.308855, + 0.364728, + 0.310305, + 0.350958, + 0.200805, + 0.36573, + 0.361438, + 0.352424, + 0.323282, + 0.305515, + 0.379934, + 0.397166, + 0.349556, + 0.206503, + 0.383818, + 0.317468, + 0.384219, + 0.287784, + 0.361638 + ], + "train_epoch_time": 4.842940807342529, + "train_loss": 3.0945656823633048, + "train_score": 0.2102369530342915, + "val_loss": 3.112576820272803, + "val_score": 0.20478885654193252 + }, + { + "epoch": 2, + "grad_norm": 1.458455204963684, + "learning_rate": 0.464, + "model_norm": 87.34402465820312, + "step_logs": { + "grad_norm": { + "108": 2.2182905673980713, + "109": 2.0108089447021484, + "110": 1.5361435413360596, + "111": 2.0051770210266113, + "112": 2.2770516872406006, + "113": 1.674286127090454, + "114": 3.0552706718444824, + "115": 1.5741430521011353, + "116": 1.4676458835601807, + "117": 1.5990766286849976, + "118": 2.2896153926849365, + "119": 1.7487237453460693, + "120": 1.3608037233352661, + "121": 1.4323080778121948, + "122": 1.7352120876312256, + "123": 1.8004534244537354, + "124": 1.522517204284668, + "125": 1.97489333152771, + "126": 1.500195026397705, + "127": 1.1285768747329712, + "128": 1.5444203615188599, + "129": 1.5232722759246826, + "130": 1.168062448501587, + "131": 1.5509209632873535, + "132": 1.750795602798462, + "133": 1.473191499710083, + "134": 1.5706579685211182, + "135": 1.6227079629898071, + "136": 1.541170358657837, + "137": 1.4366670846939087, + "138": 1.9454410076141357, + "139": 1.7283855676651, + "140": 2.0398221015930176, + "141": 1.3189575672149658, + "142": 1.5378857851028442, + "143": 1.6694495677947998, + "144": 1.3832626342773438, + "145": 1.410828948020935, + "146": 1.6587904691696167, + "147": 2.24880051612854, + "148": 1.2307977676391602, + "149": 1.3819741010665894, + "150": 1.4442795515060425, + "151": 1.5331897735595703, + "152": 1.118809461593628, + "153": 1.5031019449234009, + "154": 2.323068857192993, + "155": 1.6250786781311035, + "156": 1.6264251470565796, + "157": 1.6101235151290894, + "158": 1.5338894128799438, + "159": 1.05475652217865, + "160": 1.373598337173462, + "161": 1.458455204963684 + }, + "loss": { + "108": 3.109159469604492, + "109": 3.070673942565918, + "110": 2.8874285221099854, + "111": 2.941579818725586, + "112": 2.9926042556762695, + "113": 2.9000515937805176, + "114": 3.1140685081481934, + "115": 3.102757215499878, + "116": 2.8098974227905273, + "117": 2.8647236824035645, + "118": 2.926743507385254, + "119": 3.038897752761841, + "120": 2.8261895179748535, + "121": 2.7418599128723145, + "122": 2.8474154472351074, + "123": 2.8810558319091797, + "124": 2.926438093185425, + "125": 2.8318533897399902, + "126": 2.9802098274230957, + "127": 2.703420639038086, + "128": 2.761086940765381, + "129": 2.911237955093384, + "130": 2.716820001602173, + "131": 2.7275338172912598, + "132": 2.866767406463623, + "133": 2.8330001831054688, + "134": 2.7773144245147705, + "135": 2.815718650817871, + "136": 2.7914857864379883, + "137": 2.7595999240875244, + "138": 2.806817054748535, + "139": 2.9285173416137695, + "140": 2.8892822265625, + "141": 2.8088126182556152, + "142": 2.7513771057128906, + "143": 2.82440185546875, + "144": 2.8121166229248047, + "145": 2.7674131393432617, + "146": 2.8214759826660156, + "147": 2.9398136138916016, + "148": 2.803689479827881, + "149": 2.6837542057037354, + "150": 2.8016669750213623, + "151": 2.7459566593170166, + "152": 2.7188401222229004, + "153": 2.6839404106140137, + "154": 2.9162955284118652, + "155": 2.933291435241699, + "156": 2.7737374305725098, + "157": 2.7417349815368652, + "158": 2.8533411026000977, + "159": 2.656414031982422, + "160": 2.652618646621704, + "161": 2.7714900970458984 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.339384, + 0.355422, + 0.390047, + 0.352286, + 0.330965, + 0.379006, + 0.273675, + 0.391469, + 0.39394, + 0.384398, + 0.327787, + 0.376177, + 0.402774, + 0.395369, + 0.372593, + 0.367951, + 0.391968, + 0.351642, + 0.394826, + 0.41828, + 0.386532, + 0.39159, + 0.415581, + 0.385191, + 0.371775, + 0.393978, + 0.384719, + 0.381278, + 0.387505, + 0.395391, + 0.353435, + 0.375205, + 0.347799, + 0.405704, + 0.386851, + 0.377563, + 0.40074, + 0.397647, + 0.378388, + 0.331644, + 0.412315, + 0.398249, + 0.395657, + 0.387117, + 0.419222, + 0.388189, + 0.32463, + 0.383829, + 0.379937, + 0.380524, + 0.389489, + 0.422909, + 0.398277, + 0.393869 + ], + "train_epoch_time": 4.842704772949219, + "train_loss": 2.790942992370474, + "train_score": 0.22029120336304095, + "val_loss": 2.8064462073771743, + "val_score": 0.2155568311600406 + }, + { + "epoch": 3, + "grad_norm": 1.433650016784668, + "learning_rate": 0.464, + "model_norm": 87.3509292602539, + "step_logs": { + "grad_norm": { + "162": 1.4366334676742554, + "163": 1.213464617729187, + "164": 1.1782711744308472, + "165": 1.8073817491531372, + "166": 1.3293166160583496, + "167": 1.121290922164917, + "168": 1.6984219551086426, + "169": 1.644121527671814, + "170": 1.2490148544311523, + "171": 1.6104655265808105, + "172": 1.358503818511963, + "173": 1.0819065570831299, + "174": 1.1684364080429077, + "175": 1.2244898080825806, + "176": 1.1080474853515625, + "177": 1.0779063701629639, + "178": 1.114522933959961, + "179": 1.3154613971710205, + "180": 1.5206512212753296, + "181": 1.2605290412902832, + "182": 1.306101679801941, + "183": 1.2241108417510986, + "184": 0.9293332099914551, + "185": 0.9950765371322632, + "186": 1.1068443059921265, + "187": 1.3171552419662476, + "188": 1.0813138484954834, + "189": 1.0367534160614014, + "190": 1.4814493656158447, + "191": 1.3181469440460205, + "192": 0.8359196186065674, + "193": 0.7603257298469543, + "194": 1.0790683031082153, + "195": 1.3632005453109741, + "196": 1.3106358051300049, + "197": 1.3817201852798462, + "198": 1.1835811138153076, + "199": 0.970302164554596, + "200": 1.0245310068130493, + "201": 1.3890938758850098, + "202": 1.2016632556915283, + "203": 0.9569770097732544, + "204": 0.9658185243606567, + "205": 1.333294153213501, + "206": 1.220920443534851, + "207": 1.174423098564148, + "208": 1.1236298084259033, + "209": 0.8445212244987488, + "210": 0.9995211362838745, + "211": 1.2012298107147217, + "212": 1.110680341720581, + "213": 0.979672908782959, + "214": 0.9985975623130798, + "215": 1.433650016784668 + }, + "loss": { + "162": 2.793078899383545, + "163": 2.6818556785583496, + "164": 2.660399913787842, + "165": 2.7539844512939453, + "166": 2.8909783363342285, + "167": 2.609816551208496, + "168": 2.7761762142181396, + "169": 2.8091959953308105, + "170": 2.758303642272949, + "171": 2.7061643600463867, + "172": 2.8290352821350098, + "173": 2.649831533432007, + "174": 2.6693527698516846, + "175": 2.66512393951416, + "176": 2.661381244659424, + "177": 2.620033025741577, + "178": 2.662315845489502, + "179": 2.6457929611206055, + "180": 2.728728771209717, + "181": 2.7737159729003906, + "182": 2.667226791381836, + "183": 2.727423667907715, + "184": 2.612975597381592, + "185": 2.5970468521118164, + "186": 2.6217851638793945, + "187": 2.660849094390869, + "188": 2.6600699424743652, + "189": 2.6397581100463867, + "190": 2.6401283740997314, + "191": 2.779290199279785, + "192": 2.604036331176758, + "193": 2.541745185852051, + "194": 2.58569073677063, + "195": 2.6986641883850098, + "196": 2.70920467376709, + "197": 2.6580862998962402, + "198": 2.7164857387542725, + "199": 2.5856382846832275, + "200": 2.6239733695983887, + "201": 2.6230173110961914, + "202": 2.7349560260772705, + "203": 2.567758083343506, + "204": 2.5872833728790283, + "205": 2.6019580364227295, + "206": 2.7150871753692627, + "207": 2.612663745880127, + "208": 2.682765007019043, + "209": 2.571889877319336, + "210": 2.574979305267334, + "211": 2.653383255004883, + "212": 2.615011692047119, + "213": 2.5925087928771973, + "214": 2.577688694000244, + "215": 2.6449642181396484 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 0.396096, + 0.411573, + 0.413891, + 0.363869, + 0.406373, + 0.417354, + 0.373873, + 0.37932, + 0.410179, + 0.379597, + 0.403006, + 0.420868, + 0.414783, + 0.41043, + 0.41914, + 0.420716, + 0.41868, + 0.40287, + 0.387765, + 0.409568, + 0.404047, + 0.411544, + 0.430953, + 0.426292, + 0.418618, + 0.403035, + 0.421062, + 0.423951, + 0.388982, + 0.405227, + 0.436807, + 0.440744, + 0.420109, + 0.400084, + 0.404499, + 0.397726, + 0.414419, + 0.427856, + 0.424595, + 0.396355, + 0.413366, + 0.428541, + 0.428185, + 0.400517, + 0.411576, + 0.413372, + 0.418326, + 0.435952, + 0.425684, + 0.412018, + 0.418227, + 0.4273, + 0.425785, + 0.393126 + ], + "train_epoch_time": 4.842948913574219, + "train_loss": 2.715890288866062, + "train_score": 0.21206397948682393, + "val_loss": 2.7603072714997214, + "val_score": 0.20775330104318743 + }, + { + "epoch": 4, + "grad_norm": 1.0544430017471313, + "learning_rate": 0.464, + "model_norm": 87.37629699707031, + "step_logs": { + "grad_norm": { + "216": 1.1416711807250977, + "217": 0.9183707237243652, + "218": 1.1421544551849365, + "219": 1.3010965585708618, + "220": 1.0878509283065796, + "221": 0.929415762424469, + "222": 0.8989261388778687, + "223": 0.921040415763855, + "224": 1.1952918767929077, + "225": 1.0492099523544312, + "226": 0.9612029194831848, + "227": 1.0214899778366089, + "228": 0.9509407877922058, + "229": 1.140182375907898, + "230": 1.0640504360198975, + "231": 1.034746766090393, + "232": 0.9620423913002014, + "233": 0.9955230951309204, + "234": 0.9503745436668396, + "235": 0.909529447555542, + "236": 0.9207335114479065, + "237": 1.0392158031463623, + "238": 0.923897922039032, + "239": 0.9228456020355225, + "240": 0.9023650884628296, + "241": 0.9074633717536926, + "242": 0.9456944465637207, + "243": 1.037077784538269, + "244": 0.9104472398757935, + "245": 0.8049510717391968, + "246": 0.7960094213485718, + "247": 0.8483934998512268, + "248": 0.8847818374633789, + "249": 0.874873697757721, + "250": 0.9503462910652161, + "251": 0.9728066921234131, + "252": 0.9684033393859863, + "253": 1.0017497539520264, + "254": 0.96277916431427, + "255": 0.9461209177970886, + "256": 0.8669742345809937, + "257": 0.8068379759788513, + "258": 0.8170136213302612, + "259": 0.9688451886177063, + "260": 1.0162771940231323, + "261": 0.997868001461029, + "262": 1.1000066995620728, + "263": 1.0799529552459717, + "264": 1.0200011730194092, + "265": 1.0192959308624268, + "266": 0.9461516737937927, + "267": 0.8776339888572693, + "268": 1.0877230167388916, + "269": 1.0544430017471313 + }, + "loss": { + "216": 2.714728355407715, + "217": 2.5691323280334473, + "218": 2.6134748458862305, + "219": 2.6674747467041016, + "220": 2.640254259109497, + "221": 2.5924925804138184, + "222": 2.5615062713623047, + "223": 2.5858359336853027, + "224": 2.601901054382324, + "225": 2.6623408794403076, + "226": 2.5938425064086914, + "227": 2.5729002952575684, + "228": 2.59918212890625, + "229": 2.5741467475891113, + "230": 2.672935724258423, + "231": 2.5458154678344727, + "232": 2.639704942703247, + "233": 2.570923328399658, + "234": 2.591862201690674, + "235": 2.5478522777557373, + "236": 2.595989227294922, + "237": 2.5573132038116455, + "238": 2.6101834774017334, + "239": 2.557218074798584, + "240": 2.5839805603027344, + "241": 2.5425610542297363, + "242": 2.609896183013916, + "243": 2.5647411346435547, + "244": 2.6072728633880615, + "245": 2.540611743927002, + "246": 2.5323143005371094, + "247": 2.5325565338134766, + "248": 2.553086042404175, + "249": 2.541210651397705, + "250": 2.5510361194610596, + "251": 2.551211357116699, + "252": 2.569657325744629, + "253": 2.563237190246582, + "254": 2.584824323654175, + "255": 2.564854383468628, + "256": 2.567793130874634, + "257": 2.524254560470581, + "258": 2.556800127029419, + "259": 2.531148672103882, + "260": 2.598818302154541, + "261": 2.566972494125366, + "262": 2.5986692905426025, + "263": 2.5702319145202637, + "264": 2.5863614082336426, + "265": 2.5713467597961426, + "266": 2.5680689811706543, + "267": 2.538209915161133, + "268": 2.5335302352905273, + "269": 2.6261777877807617 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 0.417495, + 0.431162, + 0.415844, + 0.404451, + 0.420295, + 0.430706, + 0.432357, + 0.431182, + 0.411569, + 0.423385, + 0.428583, + 0.424098, + 0.429345, + 0.415336, + 0.422482, + 0.422751, + 0.429096, + 0.425909, + 0.429293, + 0.431497, + 0.431322, + 0.422596, + 0.431279, + 0.430721, + 0.432389, + 0.431571, + 0.429829, + 0.42286, + 0.432127, + 0.43808, + 0.438542, + 0.435298, + 0.433185, + 0.433694, + 0.428781, + 0.427233, + 0.42778, + 0.425365, + 0.428361, + 0.429244, + 0.434493, + 0.437806, + 0.437501, + 0.427242, + 0.42483, + 0.42569, + 0.418763, + 0.419805, + 0.424393, + 0.424232, + 0.429283, + 0.433482, + 0.418643, + 0.422501 + ], + "train_epoch_time": 4.842125177383423, + "train_loss": 2.540635450806474, + "train_score": 0.25658738332636216, + "val_loss": 2.577581350893706, + "val_score": 0.2525159657069928 + }, + { + "epoch": 5, + "grad_norm": 0.7355007529258728, + "learning_rate": 0.464, + "model_norm": 87.41879272460938, + "step_logs": { + "grad_norm": { + "270": 0.8205832839012146, + "271": 0.8407583832740784, + "272": 0.9962707757949829, + "273": 0.8767731785774231, + "274": 0.7861824631690979, + "275": 0.8610600233078003, + "276": 1.0702558755874634, + "277": 0.9638050198554993, + "278": 0.784705400466919, + "279": 0.8742997646331787, + "280": 0.9360121488571167, + "281": 0.8274256587028503, + "282": 0.746822714805603, + "283": 0.8617690205574036, + "284": 1.086946964263916, + "285": 0.935854971408844, + "286": 0.6854451298713684, + "287": 0.6958264112472534, + "288": 0.8681105971336365, + "289": 0.948701024055481, + "290": 0.8501421213150024, + "291": 0.8043948411941528, + "292": 0.8479794263839722, + "293": 0.9081390500068665, + "294": 0.8667063117027283, + "295": 0.8414411544799805, + "296": 0.8328216075897217, + "297": 0.7659209966659546, + "298": 0.8017351627349854, + "299": 0.9452565312385559, + "300": 1.324703574180603, + "301": 1.0370687246322632, + "302": 0.7906301617622375, + "303": 0.8647352457046509, + "304": 0.8563400506973267, + "305": 0.8151233792304993, + "306": 0.9363498091697693, + "307": 0.9981499314308167, + "308": 0.9101542830467224, + "309": 0.918056845664978, + "310": 1.1482096910476685, + "311": 0.9801045060157776, + "312": 0.8164383172988892, + "313": 0.8930280208587646, + "314": 1.1796107292175293, + "315": 0.9580897092819214, + "316": 0.8253657817840576, + "317": 0.8682921528816223, + "318": 0.9067890048027039, + "319": 0.9489942193031311, + "320": 1.52891206741333, + "321": 0.8044270277023315, + "322": 0.7654194235801697, + "323": 0.7355007529258728 + }, + "loss": { + "270": 2.530991554260254, + "271": 2.53185772895813, + "272": 2.5386762619018555, + "273": 2.5862128734588623, + "274": 2.52093243598938, + "275": 2.532766580581665, + "276": 2.539736747741699, + "277": 2.6006762981414795, + "278": 2.509798049926758, + "279": 2.5284221172332764, + "280": 2.5356507301330566, + "281": 2.5504307746887207, + "282": 2.518136739730835, + "283": 2.5362398624420166, + "284": 2.533015727996826, + "285": 2.5774025917053223, + "286": 2.4998836517333984, + "287": 2.472355365753174, + "288": 2.501762866973877, + "289": 2.5282649993896484, + "290": 2.5422515869140625, + "291": 2.4945034980773926, + "292": 2.4808168411254883, + "293": 2.5010881423950195, + "294": 2.515791416168213, + "295": 2.5069451332092285, + "296": 2.506500720977783, + "297": 2.4782958030700684, + "298": 2.5075578689575195, + "299": 2.4994449615478516, + "300": 2.5394954681396484, + "301": 2.586496353149414, + "302": 2.483872175216675, + "303": 2.490853786468506, + "304": 2.4748849868774414, + "305": 2.4569995403289795, + "306": 2.4507343769073486, + "307": 2.500828742980957, + "308": 2.5189921855926514, + "309": 2.464787006378174, + "310": 2.4796557426452637, + "311": 2.5486512184143066, + "312": 2.4313340187072754, + "313": 2.4666738510131836, + "314": 2.4663496017456055, + "315": 2.5455098152160645, + "316": 2.4493112564086914, + "317": 2.4726908206939697, + "318": 2.452273368835449, + "319": 2.4863457679748535, + "320": 2.495359182357788, + "321": 2.5245702266693115, + "322": 2.4951186180114746, + "323": 2.480879306793213 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 0.437026, + 0.435774, + 0.425413, + 0.434067, + 0.439027, + 0.434492, + 0.420048, + 0.428492, + 0.439012, + 0.433589, + 0.429566, + 0.436797, + 0.441322, + 0.434484, + 0.418693, + 0.430093, + 0.444614, + 0.443835, + 0.433691, + 0.428602, + 0.43529, + 0.437662, + 0.434764, + 0.431026, + 0.43394, + 0.435467, + 0.436009, + 0.439845, + 0.437955, + 0.428465, + 0.399891, + 0.423176, + 0.438404, + 0.433788, + 0.434155, + 0.436608, + 0.42844, + 0.424743, + 0.431109, + 0.429896, + 0.41305, + 0.426689, + 0.436252, + 0.431625, + 0.410296, + 0.428178, + 0.435875, + 0.433346, + 0.43051, + 0.428031, + 0.381162, + 0.437956, + 0.440029, + 0.441657 + ], + "train_epoch_time": 4.842427968978882, + "train_loss": 2.459283773663054, + "train_score": 0.2681234307405425, + "val_loss": 2.5116698925311747, + "val_score": 0.2564850029044803 + }, + { + "epoch": 6, + "grad_norm": 0.9734485745429993, + "learning_rate": 0.464, + "model_norm": 87.46509552001953, + "step_logs": { + "grad_norm": { + "324": 0.7187559604644775, + "325": 0.8011908531188965, + "326": 0.9322064518928528, + "327": 0.9514461755752563, + "328": 0.8893263339996338, + "329": 0.9108681082725525, + "330": 0.925350546836853, + "331": 0.8453577756881714, + "332": 1.0446093082427979, + "333": 0.8957911729812622, + "334": 0.7204957008361816, + "335": 0.7012603282928467, + "336": 0.8112923502922058, + "337": 0.947418212890625, + "338": 1.1726282835006714, + "339": 1.1130179166793823, + "340": 0.866492748260498, + "341": 0.8699214458465576, + "342": 1.004923701286316, + "343": 0.9799953103065491, + "344": 0.8774306178092957, + "345": 1.2152811288833618, + "346": 0.771652340888977, + "347": 0.7924943566322327, + "348": 0.9351041316986084, + "349": 1.0074272155761719, + "350": 1.3154418468475342, + "351": 0.9169334173202515, + "352": 0.8625748157501221, + "353": 0.8255141973495483, + "354": 0.8864129781723022, + "355": 0.9081965088844299, + "356": 0.9370619654655457, + "357": 0.8638302683830261, + "358": 0.8353853225708008, + "359": 0.8667851686477661, + "360": 0.9097339510917664, + "361": 0.9536703824996948, + "362": 0.9217488765716553, + "363": 0.8648698329925537, + "364": 0.7994353175163269, + "365": 0.7991637587547302, + "366": 0.8861297965049744, + "367": 0.8647029995918274, + "368": 0.8331355452537537, + "369": 0.830400824546814, + "370": 0.8206387758255005, + "371": 0.9508046507835388, + "372": 1.0702282190322876, + "373": 0.9545499682426453, + "374": 0.937156617641449, + "375": 1.494293451309204, + "376": 0.825318455696106, + "377": 0.9734485745429993 + }, + "loss": { + "324": 2.457473039627075, + "325": 2.473825454711914, + "326": 2.4800539016723633, + "327": 2.5082457065582275, + "328": 2.4482905864715576, + "329": 2.454723358154297, + "330": 2.449164867401123, + "331": 2.4679617881774902, + "332": 2.4207777976989746, + "333": 2.496211528778076, + "334": 2.4067816734313965, + "335": 2.3856005668640137, + "336": 2.3871731758117676, + "337": 2.4347410202026367, + "338": 2.452162027359009, + "339": 2.510870933532715, + "340": 2.450366973876953, + "341": 2.4260406494140625, + "342": 2.4311933517456055, + "343": 2.4496262073516846, + "344": 2.428589344024658, + "345": 2.4351205825805664, + "346": 2.487351894378662, + "347": 2.4238204956054688, + "348": 2.4073033332824707, + "349": 2.454934597015381, + "350": 2.4357969760894775, + "351": 2.526148796081543, + "352": 2.3934249877929688, + "353": 2.381464958190918, + "354": 2.4217166900634766, + "355": 2.4232378005981445, + "356": 2.3913872241973877, + "357": 2.4414122104644775, + "358": 2.3776772022247314, + "359": 2.4333183765411377, + "360": 2.39973521232605, + "361": 2.430523157119751, + "362": 2.400848865509033, + "363": 2.4175333976745605, + "364": 2.353456974029541, + "365": 2.40130352973938, + "366": 2.3738744258880615, + "367": 2.410468578338623, + "368": 2.371448040008545, + "369": 2.4066576957702637, + "370": 2.370410919189453, + "371": 2.394441843032837, + "372": 2.408982276916504, + "373": 2.4111380577087402, + "374": 2.356574058532715, + "375": 2.4638688564300537, + "376": 2.455554723739624, + "377": 2.461418867111206 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 0.442423, + 0.437654, + 0.429116, + 0.42815, + 0.43165, + 0.430261, + 0.429188, + 0.434791, + 0.42007, + 0.431797, + 0.441888, + 0.442822, + 0.436104, + 0.427441, + 0.410585, + 0.416344, + 0.433205, + 0.432687, + 0.423215, + 0.425315, + 0.432212, + 0.406765, + 0.439586, + 0.437688, + 0.427937, + 0.423391, + 0.398347, + 0.43074, + 0.432787, + 0.435113, + 0.431518, + 0.430041, + 0.427576, + 0.433277, + 0.434419, + 0.432984, + 0.429625, + 0.426936, + 0.428795, + 0.432924, + 0.4365, + 0.437033, + 0.43093, + 0.43285, + 0.434495, + 0.435079, + 0.435308, + 0.42663, + 0.417902, + 0.426599, + 0.427074, + 0.383391, + 0.435945, + 0.425955 + ], + "train_epoch_time": 4.842965602874756, + "train_loss": 2.447208639538955, + "train_score": 0.26918489961234193, + "val_loss": 2.4942915494351654, + "val_score": 0.26127475561695995 + }, + { + "epoch": 7, + "grad_norm": 0.9931488037109375, + "learning_rate": 0.464, + "model_norm": 87.51716613769531, + "step_logs": { + "grad_norm": { + "378": 0.9711732268333435, + "379": 0.8608596920967102, + "380": 0.8385066390037537, + "381": 0.9775044322013855, + "382": 1.2387624979019165, + "383": 1.1272770166397095, + "384": 1.0324372053146362, + "385": 0.8504167795181274, + "386": 0.8226385712623596, + "387": 0.9326133131980896, + "388": 0.9151419401168823, + "389": 1.0062415599822998, + "390": 0.9342063069343567, + "391": 0.7492815256118774, + "392": 0.7576513290405273, + "393": 0.8583515882492065, + "394": 0.8657914400100708, + "395": 1.0114110708236694, + "396": 0.877298891544342, + "397": 0.6910508871078491, + "398": 0.7735267877578735, + "399": 0.7676224708557129, + "400": 0.8051762580871582, + "401": 0.9203561544418335, + "402": 0.9358859658241272, + "403": 1.0726943016052246, + "404": 0.9116992354393005, + "405": 1.0098536014556885, + "406": 0.8614804148674011, + "407": 0.8529787659645081, + "408": 0.8717355132102966, + "409": 0.8469391465187073, + "410": 0.9287498593330383, + "411": 0.8325091600418091, + "412": 0.887321949005127, + "413": 0.8735884428024292, + "414": 0.8441396355628967, + "415": 0.9033401012420654, + "416": 0.9610435962677002, + "417": 0.9324353337287903, + "418": 0.87081378698349, + "419": 0.8831162452697754, + "420": 0.8046960830688477, + "421": 0.8732675313949585, + "422": 0.9297661185264587, + "423": 0.9147859811782837, + "424": 1.2592324018478394, + "425": 1.0980116128921509, + "426": 1.0170390605926514, + "427": 0.942375898361206, + "428": 0.8641000986099243, + "429": 0.9221069812774658, + "430": 1.0613394975662231, + "431": 0.9931488037109375 + }, + "loss": { + "378": 2.4451165199279785, + "379": 2.3860435485839844, + "380": 2.3800244331359863, + "381": 2.38053035736084, + "382": 2.448452949523926, + "383": 2.4239790439605713, + "384": 2.434828519821167, + "385": 2.3803062438964844, + "386": 2.3709335327148438, + "387": 2.389134407043457, + "388": 2.4044575691223145, + "389": 2.393167495727539, + "390": 2.401137590408325, + "391": 2.3586223125457764, + "392": 2.3603355884552, + "393": 2.3524577617645264, + "394": 2.395683765411377, + "395": 2.3487906455993652, + "396": 2.4343361854553223, + "397": 2.320807933807373, + "398": 2.373628854751587, + "399": 2.3598885536193848, + "400": 2.339709520339966, + "401": 2.364112377166748, + "402": 2.373948335647583, + "403": 2.350099802017212, + "404": 2.371169090270996, + "405": 2.351757049560547, + "406": 2.369741916656494, + "407": 2.328277111053467, + "408": 2.352144718170166, + "409": 2.330667495727539, + "410": 2.3731205463409424, + "411": 2.354344367980957, + "412": 2.3447418212890625, + "413": 2.3495028018951416, + "414": 2.3375487327575684, + "415": 2.3459177017211914, + "416": 2.3344593048095703, + "417": 2.350660800933838, + "418": 2.330465793609619, + "419": 2.3261756896972656, + "420": 2.329648971557617, + "421": 2.300516128540039, + "422": 2.3484814167022705, + "423": 2.339043617248535, + "424": 2.3472890853881836, + "425": 2.4060328006744385, + "426": 2.3729686737060547, + "427": 2.3613250255584717, + "428": 2.3165369033813477, + "429": 2.332780361175537, + "430": 2.3335185050964355, + "431": 2.3769116401672363 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 0.425887, + 0.432813, + 0.434239, + 0.424472, + 0.405098, + 0.413686, + 0.421219, + 0.433447, + 0.435182, + 0.427863, + 0.429309, + 0.422526, + 0.427916, + 0.439717, + 0.439218, + 0.432569, + 0.432597, + 0.421419, + 0.432291, + 0.442859, + 0.438363, + 0.438593, + 0.435974, + 0.42839, + 0.427414, + 0.416669, + 0.429103, + 0.421587, + 0.432571, + 0.432635, + 0.431646, + 0.433077, + 0.427915, + 0.434337, + 0.430465, + 0.431484, + 0.433352, + 0.429351, + 0.424991, + 0.427331, + 0.431431, + 0.430514, + 0.435891, + 0.430864, + 0.427493, + 0.428439, + 0.401133, + 0.415677, + 0.421386, + 0.426764, + 0.431717, + 0.427822, + 0.417269, + 0.423252 + ], + "train_epoch_time": 4.842416763305664, + "train_loss": 2.3158105449320767, + "train_score": 0.3202194673515632, + "val_loss": 2.36853690645588, + "val_score": 0.308472660324193 + }, + { + "epoch": 8, + "grad_norm": 0.7776458859443665, + "learning_rate": 0.464, + "model_norm": 87.57430267333984, + "step_logs": { + "grad_norm": { + "432": 0.8584507703781128, + "433": 0.913176417350769, + "434": 1.148016333580017, + "435": 0.9464519023895264, + "436": 0.7887730002403259, + "437": 0.8173912167549133, + "438": 0.8222514390945435, + "439": 0.8680413365364075, + "440": 0.9511731266975403, + "441": 0.9565020203590393, + "442": 1.0319210290908813, + "443": 0.8324898481369019, + "444": 0.6966243386268616, + "445": 0.7177284955978394, + "446": 0.8587246537208557, + "447": 1.071374773979187, + "448": 1.1695053577423096, + "449": 0.9351375699043274, + "450": 0.9293837547302246, + "451": 0.8108306527137756, + "452": 0.7694725394248962, + "453": 0.7713205218315125, + "454": 0.8990504145622253, + "455": 0.9682612419128418, + "456": 0.8445351719856262, + "457": 0.7650795578956604, + "458": 0.8014143705368042, + "459": 0.8509171009063721, + "460": 0.9707989692687988, + "461": 0.861588716506958, + "462": 0.7892059087753296, + "463": 0.7764274477958679, + "464": 0.7194980978965759, + "465": 0.7126712799072266, + "466": 0.8123999238014221, + "467": 0.9879125356674194, + "468": 0.9892673492431641, + "469": 1.1081101894378662, + "470": 1.7786822319030762, + "471": 1.500467300415039, + "472": 1.124647855758667, + "473": 1.1007084846496582, + "474": 0.7789115905761719, + "475": 1.0154168605804443, + "476": 0.960903525352478, + "477": 0.8702136874198914, + "478": 0.9485010504722595, + "479": 0.8980382680892944, + "480": 0.8163084387779236, + "481": 0.8291359543800354, + "482": 0.7678986191749573, + "483": 0.6684704422950745, + "484": 0.6550145745277405, + "485": 0.7776458859443665 + }, + "loss": { + "432": 2.3186731338500977, + "433": 2.3324971199035645, + "434": 2.342672348022461, + "435": 2.375446319580078, + "436": 2.3103208541870117, + "437": 2.289398670196533, + "438": 2.2947425842285156, + "439": 2.277359962463379, + "440": 2.334648847579956, + "441": 2.3235483169555664, + "442": 2.3202476501464844, + "443": 2.3141045570373535, + "444": 2.2952592372894287, + "445": 2.2474730014801025, + "446": 2.271878719329834, + "447": 2.31207275390625, + "448": 2.340684175491333, + "449": 2.36981201171875, + "450": 2.3036441802978516, + "451": 2.2990965843200684, + "452": 2.3098270893096924, + "453": 2.274724006652832, + "454": 2.2898430824279785, + "455": 2.3211851119995117, + "456": 2.2781529426574707, + "457": 2.2576098442077637, + "458": 2.262582540512085, + "459": 2.285156488418579, + "460": 2.2788801193237305, + "461": 2.314042329788208, + "462": 2.2632551193237305, + "463": 2.260465621948242, + "464": 2.2466373443603516, + "465": 2.2557120323181152, + "466": 2.2535109519958496, + "467": 2.2910313606262207, + "468": 2.3230910301208496, + "469": 2.324202060699463, + "470": 2.337447166442871, + "471": 2.4766297340393066, + "472": 2.410165309906006, + "473": 2.3862979412078857, + "474": 2.286574363708496, + "475": 2.2892537117004395, + "476": 2.3257291316986084, + "477": 2.331841468811035, + "478": 2.2833876609802246, + "479": 2.284867525100708, + "480": 2.2700722217559814, + "481": 2.2664663791656494, + "482": 2.269594192504883, + "483": 2.269068479537964, + "484": 2.2483325004577637, + "485": 2.264895439147949 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 0.432136, + 0.428462, + 0.410431, + 0.426672, + 0.436715, + 0.434577, + 0.434313, + 0.430922, + 0.425725, + 0.425162, + 0.41935, + 0.433855, + 0.442304, + 0.440572, + 0.431506, + 0.416077, + 0.408607, + 0.427409, + 0.426867, + 0.435132, + 0.437955, + 0.437456, + 0.428878, + 0.424246, + 0.43258, + 0.437673, + 0.435331, + 0.432227, + 0.423379, + 0.431859, + 0.436153, + 0.436964, + 0.440454, + 0.440965, + 0.434479, + 0.422267, + 0.422689, + 0.413338, + 0.353118, + 0.383186, + 0.413639, + 0.415105, + 0.437094, + 0.420103, + 0.424867, + 0.43149, + 0.425139, + 0.42888, + 0.434416, + 0.433495, + 0.437622, + 0.443727, + 0.444329, + 0.436934 + ], + "train_epoch_time": 4.84152889251709, + "train_loss": 2.261845543736878, + "train_score": 0.34534388442805714, + "val_loss": 2.3263614760748146, + "val_score": 0.32416493275409175 + }, + { + "epoch": 9, + "grad_norm": 0.7436298131942749, + "learning_rate": 0.464, + "model_norm": 87.63212585449219, + "step_logs": { + "grad_norm": { + "486": 0.824658989906311, + "487": 0.7818910479545593, + "488": 0.8712555170059204, + "489": 1.0522475242614746, + "490": 0.9231565594673157, + "491": 0.8687759041786194, + "492": 0.9941099286079407, + "493": 1.0223734378814697, + "494": 0.9244717955589294, + "495": 1.058251976966858, + "496": 1.0190001726150513, + "497": 0.8731686472892761, + "498": 0.8970601558685303, + "499": 0.8075329661369324, + "500": 0.7713789939880371, + "501": 0.7968210577964783, + "502": 0.7654058933258057, + "503": 0.7904702425003052, + "504": 0.8240793943405151, + "505": 0.8736516833305359, + "506": 0.8800214529037476, + "507": 0.828270435333252, + "508": 0.7866398096084595, + "509": 0.7940694689750671, + "510": 0.8814888000488281, + "511": 0.9283365607261658, + "512": 0.8684666156768799, + "513": 0.7865447998046875, + "514": 0.7620531916618347, + "515": 0.7946272492408752, + "516": 0.8509487509727478, + "517": 1.0813733339309692, + "518": 1.0491727590560913, + "519": 0.7492812871932983, + "520": 0.7069373726844788, + "521": 0.781988263130188, + "522": 0.8679794669151306, + "523": 0.9445253610610962, + "524": 0.9281182289123535, + "525": 0.7825267314910889, + "526": 0.71170574426651, + "527": 0.7191166877746582, + "528": 0.8151636719703674, + "529": 0.8615965247154236, + "530": 0.8744245171546936, + "531": 0.8416059017181396, + "532": 0.897475004196167, + "533": 0.8868516087532043, + "534": 0.7889246344566345, + "535": 0.8108001947402954, + "536": 0.8731734752655029, + "537": 0.9442831873893738, + "538": 0.8500063419342041, + "539": 0.7436298131942749 + }, + "loss": { + "486": 2.2751922607421875, + "487": 2.2577123641967773, + "488": 2.268313407897949, + "489": 2.294137477874756, + "490": 2.2868940830230713, + "491": 2.2579355239868164, + "492": 2.2897439002990723, + "493": 2.3227405548095703, + "494": 2.272470474243164, + "495": 2.2700045108795166, + "496": 2.3085427284240723, + "497": 2.265313148498535, + "498": 2.2471089363098145, + "499": 2.2609357833862305, + "500": 2.253617525100708, + "501": 2.242844343185425, + "502": 2.2181596755981445, + "503": 2.243516206741333, + "504": 2.2470316886901855, + "505": 2.2407383918762207, + "506": 2.276848554611206, + "507": 2.2264628410339355, + "508": 2.244060516357422, + "509": 2.234928607940674, + "510": 2.2360825538635254, + "511": 2.2703027725219727, + "512": 2.2440080642700195, + "513": 2.2522902488708496, + "514": 2.2549145221710205, + "515": 2.2166194915771484, + "516": 2.2307488918304443, + "517": 2.222540855407715, + "518": 2.305981159210205, + "519": 2.2309727668762207, + "520": 2.2235610485076904, + "521": 2.1981966495513916, + "522": 2.212188243865967, + "523": 2.2313284873962402, + "524": 2.2742764949798584, + "525": 2.25014328956604, + "526": 2.212052345275879, + "527": 2.200212001800537, + "528": 2.240995407104492, + "529": 2.2436797618865967, + "530": 2.2521815299987793, + "531": 2.2249138355255127, + "532": 2.215155601501465, + "533": 2.2271718978881836, + "534": 2.231881618499756, + "535": 2.214655876159668, + "536": 2.2488224506378174, + "537": 2.2581586837768555, + "538": 2.2572288513183594, + "539": 2.2004570960998535 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 0.43391, + 0.436574, + 0.430571, + 0.417277, + 0.427077, + 0.430606, + 0.421768, + 0.420137, + 0.426764, + 0.416347, + 0.420156, + 0.430394, + 0.428407, + 0.434899, + 0.437218, + 0.435404, + 0.43721, + 0.435838, + 0.433598, + 0.430017, + 0.430063, + 0.433044, + 0.436101, + 0.435495, + 0.429384, + 0.426444, + 0.430436, + 0.436203, + 0.43784, + 0.435236, + 0.431504, + 0.413523, + 0.417737, + 0.438405, + 0.441004, + 0.435869, + 0.430024, + 0.424614, + 0.426521, + 0.436445, + 0.440594, + 0.440007, + 0.434135, + 0.430922, + 0.430122, + 0.432087, + 0.427903, + 0.428864, + 0.435805, + 0.434105, + 0.430165, + 0.425061, + 0.431925, + 0.438438 + ], + "train_epoch_time": 4.841368198394775, + "train_loss": 2.2177160761106647, + "train_score": 0.3403089131652197, + "val_loss": 2.2879242376947238, + "val_score": 0.32227235279729255 + }, + { + "epoch": 10, + "grad_norm": 0.9012609124183655, + "learning_rate": 0.464, + "model_norm": 87.6935043334961, + "step_logs": { + "grad_norm": { + "540": 0.779034435749054, + "541": 0.7930557131767273, + "542": 0.8253334164619446, + "543": 0.8134466409683228, + "544": 1.0176053047180176, + "545": 1.0052220821380615, + "546": 0.8841612935066223, + "547": 0.8768597841262817, + "548": 0.9078478217124939, + "549": 0.8520333766937256, + "550": 0.7919204831123352, + "551": 0.816322386264801, + "552": 0.8913934230804443, + "553": 0.9353684782981873, + "554": 0.8901406526565552, + "555": 0.8221054673194885, + "556": 0.8405773639678955, + "557": 0.7872676849365234, + "558": 0.7312039732933044, + "559": 0.7674193382263184, + "560": 0.7997778058052063, + "561": 0.859046459197998, + "562": 0.7318307161331177, + "563": 0.6345550417900085, + "564": 0.6962268352508545, + "565": 0.7579600811004639, + "566": 0.8382430076599121, + "567": 0.8840183615684509, + "568": 0.9224840998649597, + "569": 0.8858940601348877, + "570": 0.8380024433135986, + "571": 0.8245536088943481, + "572": 0.7209427356719971, + "573": 0.7269752025604248, + "574": 0.8401072025299072, + "575": 0.8850416541099548, + "576": 0.7944228053092957, + "577": 0.8254134058952332, + "578": 0.8889157772064209, + "579": 0.9426918625831604, + "580": 0.902326762676239, + "581": 0.8255059123039246, + "582": 0.7830792665481567, + "583": 0.7566782236099243, + "584": 0.8512044548988342, + "585": 0.9580790400505066, + "586": 0.870844841003418, + "587": 0.7903308272361755, + "588": 0.826323390007019, + "589": 0.8290290832519531, + "590": 0.9161742329597473, + "591": 0.8972206115722656, + "592": 0.9315125346183777, + "593": 0.9012609124183655 + }, + "loss": { + "540": 2.220630645751953, + "541": 2.203184127807617, + "542": 2.2084784507751465, + "543": 2.2190728187561035, + "544": 2.2008109092712402, + "545": 2.273651599884033, + "546": 2.23386549949646, + "547": 2.2281219959259033, + "548": 2.2046709060668945, + "549": 2.226468086242676, + "550": 2.203355073928833, + "551": 2.2306888103485107, + "552": 2.1902008056640625, + "553": 2.241281270980835, + "554": 2.234280586242676, + "555": 2.20111083984375, + "556": 2.2291407585144043, + "557": 2.213456392288208, + "558": 2.163346290588379, + "559": 2.178955078125, + "560": 2.203125238418579, + "561": 2.1998467445373535, + "562": 2.2070279121398926, + "563": 2.1700072288513184, + "564": 2.1725716590881348, + "565": 2.180734872817993, + "566": 2.1932973861694336, + "567": 2.220599412918091, + "568": 2.222261905670166, + "569": 2.229309558868408, + "570": 2.2023744583129883, + "571": 2.1994171142578125, + "572": 2.19333553314209, + "573": 2.154064655303955, + "574": 2.1894516944885254, + "575": 2.2035470008850098, + "576": 2.2258341312408447, + "577": 2.195603847503662, + "578": 2.2182822227478027, + "579": 2.222468137741089, + "580": 2.2159554958343506, + "581": 2.220186471939087, + "582": 2.186936616897583, + "583": 2.149834394454956, + "584": 2.2068819999694824, + "585": 2.204127788543701, + "586": 2.253419876098633, + "587": 2.150207042694092, + "588": 2.1659839153289795, + "589": 2.204482078552246, + "590": 2.189301013946533, + "591": 2.208019256591797, + "592": 2.1686458587646484, + "593": 2.2045767307281494 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 0.436334, + 0.435179, + 0.433015, + 0.433978, + 0.418335, + 0.42063, + 0.429157, + 0.429606, + 0.426969, + 0.431369, + 0.435258, + 0.433926, + 0.427978, + 0.425468, + 0.428727, + 0.433144, + 0.432216, + 0.435696, + 0.438838, + 0.436621, + 0.434718, + 0.430496, + 0.43927, + 0.44485, + 0.441164, + 0.437274, + 0.431899, + 0.428975, + 0.426141, + 0.428965, + 0.43204, + 0.43295, + 0.43982, + 0.439011, + 0.431714, + 0.428649, + 0.435362, + 0.43284, + 0.428582, + 0.42461, + 0.427554, + 0.433155, + 0.435659, + 0.436999, + 0.431159, + 0.423119, + 0.430396, + 0.434703, + 0.432377, + 0.432702, + 0.426099, + 0.427814, + 0.424587, + 0.427461 + ], + "train_epoch_time": 4.840528964996338, + "train_loss": 2.1896640532670095, + "train_score": 0.3543243364504454, + "val_loss": 2.26704915355459, + "val_score": 0.33408976829558373 + }, + { + "epoch": 11, + "grad_norm": 1.0583347082138062, + "learning_rate": 0.464, + "model_norm": 87.7582778930664, + "step_logs": { + "grad_norm": { + "594": 0.9341737031936646, + "595": 0.9632622003555298, + "596": 1.0863580703735352, + "597": 0.6909806132316589, + "598": 0.6440359950065613, + "599": 0.7869378328323364, + "600": 0.8006590008735657, + "601": 0.8540022373199463, + "602": 0.9666603803634644, + "603": 0.9440022706985474, + "604": 0.947803258895874, + "605": 1.02663254737854, + "606": 0.9005405902862549, + "607": 0.7349113821983337, + "608": 0.7511271834373474, + "609": 0.7522056698799133, + "610": 0.7426261901855469, + "611": 0.871614396572113, + "612": 0.8691319227218628, + "613": 0.8341525793075562, + "614": 0.7974764704704285, + "615": 0.803653359413147, + "616": 0.8657097220420837, + "617": 0.8455263376235962, + "618": 0.8263103365898132, + "619": 0.9229281544685364, + "620": 0.9237298369407654, + "621": 0.843511700630188, + "622": 0.7880929708480835, + "623": 0.8206955790519714, + "624": 0.8767843246459961, + "625": 0.9239310622215271, + "626": 1.0672721862792969, + "627": 1.097472906112671, + "628": 1.1559778451919556, + "629": 0.9792971014976501, + "630": 0.861211359500885, + "631": 0.9408342838287354, + "632": 0.870436429977417, + "633": 0.7878254055976868, + "634": 0.872065007686615, + "635": 0.9755440354347229, + "636": 1.0096006393432617, + "637": 0.8474461436271667, + "638": 0.885677695274353, + "639": 0.8158318996429443, + "640": 0.7676178812980652, + "641": 0.7629214525222778, + "642": 0.793358564376831, + "643": 0.7681413888931274, + "644": 0.8716553449630737, + "645": 0.9326581954956055, + "646": 1.056408405303955, + "647": 1.0583347082138062 + }, + "loss": { + "594": 2.216533660888672, + "595": 2.190443515777588, + "596": 2.199070453643799, + "597": 2.197890043258667, + "598": 2.1279449462890625, + "599": 2.1496286392211914, + "600": 2.1944611072540283, + "601": 2.1710777282714844, + "602": 2.2079710960388184, + "603": 2.204212188720703, + "604": 2.178293466567993, + "605": 2.2084767818450928, + "606": 2.221353054046631, + "607": 2.1685538291931152, + "608": 2.1588807106018066, + "609": 2.1699604988098145, + "610": 2.1372690200805664, + "611": 2.1742005348205566, + "612": 2.1818742752075195, + "613": 2.171539068222046, + "614": 2.1581578254699707, + "615": 2.121051549911499, + "616": 2.1752583980560303, + "617": 2.1521987915039062, + "618": 2.1538634300231934, + "619": 2.1955318450927734, + "620": 2.180851459503174, + "621": 2.1731820106506348, + "622": 2.160604476928711, + "623": 2.165349006652832, + "624": 2.1680428981781006, + "625": 2.1506142616271973, + "626": 2.1739940643310547, + "627": 2.222702980041504, + "628": 2.184702157974243, + "629": 2.184560775756836, + "630": 2.1318483352661133, + "631": 2.163470506668091, + "632": 2.174398422241211, + "633": 2.155637502670288, + "634": 2.1470487117767334, + "635": 2.175360679626465, + "636": 2.1443963050842285, + "637": 2.1520023345947266, + "638": 2.1553986072540283, + "639": 2.1636781692504883, + "640": 2.1627466678619385, + "641": 2.1270852088928223, + "642": 2.1557087898254395, + "643": 2.103940010070801, + "644": 2.1279478073120117, + "645": 2.1773481369018555, + "646": 2.149991035461426, + "647": 2.1986966133117676 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 0.425165, + 0.422481, + 0.412625, + 0.441737, + 0.443925, + 0.434931, + 0.434549, + 0.430453, + 0.422516, + 0.424211, + 0.423482, + 0.417747, + 0.427769, + 0.438654, + 0.437476, + 0.437532, + 0.437792, + 0.429206, + 0.429502, + 0.431894, + 0.434308, + 0.433384, + 0.429657, + 0.4308, + 0.432213, + 0.425685, + 0.425387, + 0.431244, + 0.43499, + 0.432769, + 0.428731, + 0.424874, + 0.413711, + 0.412182, + 0.406339, + 0.421111, + 0.429346, + 0.423775, + 0.429296, + 0.434946, + 0.428766, + 0.421245, + 0.417914, + 0.430657, + 0.427873, + 0.433092, + 0.436415, + 0.436302, + 0.434563, + 0.435655, + 0.428505, + 0.424642, + 0.414129, + 0.414958 + ], + "train_epoch_time": 4.841063022613525, + "train_loss": 2.1805734154825744, + "train_score": 0.357954851130673, + "val_loss": 2.2797273450822972, + "val_score": 0.3361482849487892 + }, + { + "epoch": 12, + "grad_norm": 0.5975656509399414, + "learning_rate": 0.464, + "model_norm": 87.81676483154297, + "step_logs": { + "grad_norm": { + "648": 1.1492356061935425, + "649": 1.0196889638900757, + "650": 0.8637744784355164, + "651": 0.8311303853988647, + "652": 0.7575616240501404, + "653": 0.711988091468811, + "654": 0.7553699612617493, + "655": 0.7828322649002075, + "656": 0.8395745754241943, + "657": 0.9479196667671204, + "658": 0.8363834023475647, + "659": 0.7534735202789307, + "660": 0.6846778988838196, + "661": 0.6148439645767212, + "662": 0.5814868211746216, + "663": 0.5759593844413757, + "664": 0.572715163230896, + "665": 0.6244068741798401, + "666": 0.6298314332962036, + "667": 0.5988413691520691, + "668": 0.571150004863739, + "669": 0.5969657897949219, + "670": 0.5965138673782349, + "671": 0.675101637840271, + "672": 0.7497316598892212, + "673": 0.687667965888977, + "674": 0.671347975730896, + "675": 0.6587310433387756, + "676": 0.6218955516815186, + "677": 0.6500345468521118, + "678": 0.7552685141563416, + "679": 0.8805782198905945, + "680": 0.9224926829338074, + "681": 0.7951744198799133, + "682": 0.7153221368789673, + "683": 0.7064132690429688, + "684": 0.6845642924308777, + "685": 0.6810371279716492, + "686": 0.6073359847068787, + "687": 0.555525004863739, + "688": 0.5570313930511475, + "689": 0.5753442049026489, + "690": 0.5745162963867188, + "691": 0.5818527936935425, + "692": 0.6150049567222595, + "693": 0.6485708951950073, + "694": 0.6962539553642273, + "695": 0.6749153733253479, + "696": 0.610314667224884, + "697": 0.5902243256568909, + "698": 0.618800938129425, + "699": 0.6315382122993469, + "700": 0.5988143086433411, + "701": 0.5975656509399414 + }, + "loss": { + "648": 2.201709508895874, + "649": 2.218527317047119, + "650": 2.1624703407287598, + "651": 2.16119384765625, + "652": 2.113049268722534, + "653": 2.1079556941986084, + "654": 2.107941150665283, + "655": 2.132331609725952, + "656": 2.1268858909606934, + "657": 2.1456098556518555, + "658": 2.158702850341797, + "659": 2.108194351196289, + "660": 2.1002771854400635, + "661": 2.093564033508301, + "662": 2.075716018676758, + "663": 2.065596103668213, + "664": 2.084507942199707, + "665": 2.0992631912231445, + "666": 2.0900769233703613, + "667": 2.1014933586120605, + "668": 2.06412410736084, + "669": 2.06667160987854, + "670": 2.093775987625122, + "671": 2.0799057483673096, + "672": 2.0605130195617676, + "673": 2.0615901947021484, + "674": 2.071089029312134, + "675": 2.075998544692993, + "676": 2.07181453704834, + "677": 2.0697274208068848, + "678": 2.1032066345214844, + "679": 2.063380479812622, + "680": 2.0770063400268555, + "681": 2.104832887649536, + "682": 2.113340377807617, + "683": 2.077195882797241, + "684": 2.0580098628997803, + "685": 2.0724377632141113, + "686": 2.067328691482544, + "687": 2.057271957397461, + "688": 2.0643768310546875, + "689": 2.065443515777588, + "690": 2.012594223022461, + "691": 2.035238742828369, + "692": 2.057743787765503, + "693": 2.062107563018799, + "694": 2.066854953765869, + "695": 2.0930724143981934, + "696": 2.0564115047454834, + "697": 2.0373926162719727, + "698": 2.0564770698547363, + "699": 2.0385019779205322, + "700": 2.0522985458374023, + "701": 2.055959701538086 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 0.407314, + 0.416165, + 0.424696, + 0.424511, + 0.426342, + 0.426612, + 0.421336, + 0.417327, + 0.411043, + 0.40139, + 0.406671, + 0.408694, + 0.409973, + 0.410932, + 0.409754, + 0.407268, + 0.404853, + 0.399886, + 0.396909, + 0.39575, + 0.394052, + 0.390263, + 0.387775, + 0.381483, + 0.37504, + 0.375496, + 0.373692, + 0.371648, + 0.370529, + 0.36668, + 0.359635, + 0.350499, + 0.345956, + 0.350055, + 0.351037, + 0.348525, + 0.346646, + 0.344234, + 0.344253, + 0.343227, + 0.340491, + 0.337203, + 0.334282, + 0.331444, + 0.327794, + 0.324036, + 0.319787, + 0.317986, + 0.31714, + 0.31494, + 0.311487, + 0.308336, + 0.306622, + 0.303957 + ], + "train_epoch_time": 4.840806722640991, + "train_loss": 2.0371761134570434, + "train_score": 0.3957204986507958, + "val_loss": 2.1470213062317036, + "val_score": 0.3678333100987904 + }, + { + "epoch": 13, + "grad_norm": 0.39106497168540955, + "learning_rate": 0.3093333333333334, + "model_norm": 87.85234069824219, + "step_logs": { + "grad_norm": { + "702": 0.5970368385314941, + "703": 0.6340122222900391, + "704": 0.6445062160491943, + "705": 0.6437075138092041, + "706": 0.6375112533569336, + "707": 0.5934171080589294, + "708": 0.5600729584693909, + "709": 0.5130089521408081, + "710": 0.5271528363227844, + "711": 0.5570048689842224, + "712": 0.5029557943344116, + "713": 0.48491641879081726, + "714": 0.4905167818069458, + "715": 0.5370640158653259, + "716": 0.5350328087806702, + "717": 0.5381401181221008, + "718": 0.519120454788208, + "719": 0.5183349251747131, + "720": 0.5376592874526978, + "721": 0.542233407497406, + "722": 0.5692716240882874, + "723": 0.5099456906318665, + "724": 0.4888961911201477, + "725": 0.4807478189468384, + "726": 0.48631423711776733, + "727": 0.5174635648727417, + "728": 0.5809503793716431, + "729": 0.6511600017547607, + "730": 0.6437714099884033, + "731": 0.5654249787330627, + "732": 0.5159339904785156, + "733": 0.4789518415927887, + "734": 0.436004638671875, + "735": 0.42933663725852966, + "736": 0.411679744720459, + "737": 0.4152348041534424, + "738": 0.4593541920185089, + "739": 0.45132145285606384, + "740": 0.42840060591697693, + "741": 0.4757876694202423, + "742": 0.44800931215286255, + "743": 0.4203818142414093, + "744": 0.4935603141784668, + "745": 0.47921067476272583, + "746": 0.4374750554561615, + "747": 0.4020148515701294, + "748": 0.38653868436813354, + "749": 0.4267551600933075, + "750": 0.404039591550827, + "751": 0.4053076207637787, + "752": 0.41479888558387756, + "753": 0.42037707567214966, + "754": 0.3903408348560333, + "755": 0.39106497168540955 + }, + "loss": { + "702": 2.047452926635742, + "703": 2.0433740615844727, + "704": 2.03933048248291, + "705": 2.0338144302368164, + "706": 2.038938522338867, + "707": 2.0089528560638428, + "708": 2.026998996734619, + "709": 2.0096888542175293, + "710": 2.027291774749756, + "711": 2.0200047492980957, + "712": 1.9866796731948853, + "713": 2.01995849609375, + "714": 2.03725266456604, + "715": 2.01417875289917, + "716": 2.0321667194366455, + "717": 2.0208499431610107, + "718": 2.0232436656951904, + "719": 2.0460236072540283, + "720": 2.026439666748047, + "721": 1.9988811016082764, + "722": 2.0238771438598633, + "723": 2.021852731704712, + "724": 2.0371651649475098, + "725": 1.994450330734253, + "726": 2.005601406097412, + "727": 2.0040032863616943, + "728": 2.0220701694488525, + "729": 2.0117228031158447, + "730": 2.02345609664917, + "731": 2.0317282676696777, + "732": 2.012515068054199, + "733": 1.9958631992340088, + "734": 2.0060486793518066, + "735": 2.0161843299865723, + "736": 1.993544578552246, + "737": 1.9995037317276, + "738": 1.9948532581329346, + "739": 2.022557020187378, + "740": 1.9958159923553467, + "741": 1.9894587993621826, + "742": 2.0125489234924316, + "743": 1.9957914352416992, + "744": 2.0075745582580566, + "745": 1.9775092601776123, + "746": 2.0013368129730225, + "747": 1.9752076864242554, + "748": 1.9865528345108032, + "749": 1.9957020282745361, + "750": 2.006993293762207, + "751": 1.9674067497253418, + "752": 2.0028648376464844, + "753": 1.984318733215332, + "754": 1.9704029560089111, + "755": 1.9741883277893066 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 0.301222, + 0.297501, + 0.294499, + 0.291801, + 0.289288, + 0.287577, + 0.28569, + 0.283906, + 0.280905, + 0.277513, + 0.275763, + 0.273406, + 0.27057, + 0.266899, + 0.264224, + 0.261382, + 0.258962, + 0.256257, + 0.253124, + 0.250222, + 0.247064, + 0.245255, + 0.242812, + 0.24007, + 0.237227, + 0.234012, + 0.230349, + 0.226463, + 0.223882, + 0.222314, + 0.220155, + 0.217783, + 0.215457, + 0.212726, + 0.210058, + 0.207218, + 0.203997, + 0.201297, + 0.198662, + 0.195432, + 0.192892, + 0.190285, + 0.186894, + 0.184182, + 0.181716, + 0.179122, + 0.176402, + 0.173334, + 0.170659, + 0.167804, + 0.164946, + 0.162081, + 0.159406, + 0.156576 + ], + "train_epoch_time": 4.84127140045166, + "train_loss": 1.9806178782557484, + "train_score": 0.4138080613770478, + "val_loss": 2.09881235482909, + "val_score": 0.38297843717543323 + }, + { + "epoch": 14, + "grad_norm": 0.33874234557151794, + "learning_rate": 0.1546666666666667, + "model_norm": 87.8645248413086, + "step_logs": { + "grad_norm": { + "756": 0.37414446473121643, + "757": 0.3537454307079315, + "758": 0.3970438241958618, + "759": 0.38566383719444275, + "760": 0.40795400738716125, + "761": 0.39117708802223206, + "762": 0.3797253370285034, + "763": 0.42360520362854004, + "764": 0.41331180930137634, + "765": 0.36566537618637085, + "766": 0.38493847846984863, + "767": 0.37495824694633484, + "768": 0.3491826057434082, + "769": 0.35057926177978516, + "770": 0.36475133895874023, + "771": 0.3751797378063202, + "772": 0.3661869466304779, + "773": 0.3664822578430176, + "774": 0.3681018352508545, + "775": 0.39027464389801025, + "776": 0.3661656975746155, + "777": 0.3489469289779663, + "778": 0.3828277289867401, + "779": 0.4010617733001709, + "780": 0.3795221149921417, + "781": 0.3674989938735962, + "782": 0.3609263002872467, + "783": 0.365070641040802, + "784": 0.37838661670684814, + "785": 0.3415158987045288, + "786": 0.3888396918773651, + "787": 0.33122503757476807, + "788": 0.3711661398410797, + "789": 0.3774604797363281, + "790": 0.35745200514793396, + "791": 0.41088002920150757, + "792": 0.3404195308685303, + "793": 0.3303207457065582, + "794": 0.37241995334625244, + "795": 0.3590289354324341, + "796": 0.35071736574172974, + "797": 0.3599846363067627, + "798": 0.33293843269348145, + "799": 0.3308292031288147, + "800": 0.3526424467563629, + "801": 0.34812334179878235, + "802": 0.37489041686058044, + "803": 0.33675456047058105, + "804": 0.36126208305358887, + "805": 0.34167638421058655, + "806": 0.33782336115837097, + "807": 0.35379254817962646, + "808": 0.33602678775787354, + "809": 0.33874234557151794 + }, + "loss": { + "756": 1.9569520950317383, + "757": 1.9686423540115356, + "758": 1.9982889890670776, + "759": 1.9629350900650024, + "760": 1.954088807106018, + "761": 1.9893782138824463, + "762": 1.9996222257614136, + "763": 1.9854071140289307, + "764": 1.9949249029159546, + "765": 1.9680728912353516, + "766": 1.9819000959396362, + "767": 1.9962029457092285, + "768": 1.9658184051513672, + "769": 1.9386069774627686, + "770": 2.0059897899627686, + "771": 1.9722788333892822, + "772": 1.9910156726837158, + "773": 1.9721592664718628, + "774": 1.9776965379714966, + "775": 1.984902262687683, + "776": 1.976353645324707, + "777": 1.9893345832824707, + "778": 1.9513378143310547, + "779": 1.9796507358551025, + "780": 1.9403984546661377, + "781": 1.9532275199890137, + "782": 1.9857637882232666, + "783": 1.9697333574295044, + "784": 2.002728223800659, + "785": 1.9567406177520752, + "786": 1.9737015962600708, + "787": 1.9744305610656738, + "788": 2.006575584411621, + "789": 1.9719974994659424, + "790": 1.9621679782867432, + "791": 1.9796128273010254, + "792": 1.9757723808288574, + "793": 1.971252679824829, + "794": 1.960754156112671, + "795": 1.98601496219635, + "796": 1.9481157064437866, + "797": 1.9799139499664307, + "798": 1.9645318984985352, + "799": 1.9503973722457886, + "800": 1.9789137840270996, + "801": 1.9745160341262817, + "802": 1.949885606765747, + "803": 1.945481300354004, + "804": 1.959337830543518, + "805": 1.9622700214385986, + "806": 1.973149299621582, + "807": 1.9708741903305054, + "808": 1.9373981952667236, + "809": 1.9534707069396973 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 0.153816, + 0.151074, + 0.148068, + 0.14527, + 0.142342, + 0.139592, + 0.136803, + 0.133803, + 0.131014, + 0.128327, + 0.125434, + 0.122629, + 0.119849, + 0.116997, + 0.114134, + 0.11126, + 0.108442, + 0.105594, + 0.102748, + 0.0998628, + 0.0970621, + 0.0942459, + 0.0913399, + 0.088471, + 0.0856528, + 0.0828239, + 0.0799871, + 0.0771315, + 0.0742714, + 0.0714525, + 0.0685602, + 0.0657562, + 0.0628763, + 0.0600177, + 0.0571773, + 0.0542938, + 0.0514777, + 0.0486258, + 0.045753, + 0.0429031, + 0.0400481, + 0.0371892, + 0.0343371, + 0.0314783, + 0.0286162, + 0.0257574, + 0.0228947, + 0.0200377, + 0.0171754, + 0.0143149, + 0.011453, + 0.00859025, + 0.00572744, + 0.00286396 + ], + "train_epoch_time": 4.8408122062683105, + "train_loss": 1.9635653591908546, + "train_score": 0.4184025287970238, + "val_loss": 2.086306357356355, + "val_score": 0.38613124337179927 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:33:50.676876", + "final_model_norm": 87.8645248413086, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:32:09.107956", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 0.464, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 10.867344856262207, + "learning_rate": 4.64e-11, + "model_norm": 87.38897705078125, + "step_logs": { + "grad_norm": { + "0": 22.7664794921875, + "1": 23.4499454498291, + "2": 6.445989608764648, + "3": 8.906142234802246, + "4": 17.763187408447266, + "5": 6.188925266265869, + "6": 4.13126277923584, + "7": 5.662048816680908, + "8": 4.080653667449951, + "9": 5.714040756225586, + "10": 3.9929022789001465, + "11": 7.0095601081848145, + "12": 8.02617073059082, + "13": 12.596332550048828, + "14": 6.581796646118164, + "15": 4.435499668121338, + "16": 17.17401695251465, + "17": 5.262341022491455, + "18": 4.078556060791016, + "19": 44.627567291259766, + "20": 4.673335552215576, + "21": 7.911524772644043, + "22": 5.466825485229492, + "23": 8.20952320098877, + "24": 21.045005798339844, + "25": 6.467375755310059, + "26": 6.189671993255615, + "27": 12.949029922485352, + "28": 8.57295036315918, + "29": 5.5378007888793945, + "30": 14.023530960083008, + "31": 6.726585865020752, + "32": 7.064029693603516, + "33": 7.079834938049316, + "34": 5.3993988037109375, + "35": 11.558791160583496, + "36": 8.70158863067627, + "37": 4.486657619476318, + "38": 17.66220474243164, + "39": 5.690054416656494, + "40": 7.140784740447998, + "41": 5.444159507751465, + "42": 10.740113258361816, + "43": 4.331324100494385, + "44": 10.871084213256836, + "45": 6.3053879737854, + "46": 5.462993144989014, + "47": 4.7902679443359375, + "48": 3.2392709255218506, + "49": 2.816694974899292, + "50": 11.274415016174316, + "51": 7.449441909790039, + "52": 4.374978542327881, + "53": 10.867344856262207 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.8003854751586914, + "3": 3.804929733276367, + "4": 4.3205342292785645, + "5": 4.274775505065918, + "6": 3.550309181213379, + "7": 3.7156550884246826, + "8": 3.9138545989990234, + "9": 3.5210957527160645, + "10": 3.758052349090576, + "11": 3.648374557495117, + "12": 6.392759323120117, + "13": 4.713771820068359, + "14": 4.208435535430908, + "15": 3.9465599060058594, + "16": 5.199435710906982, + "17": 4.328524589538574, + "18": 3.651263475418091, + "19": 5.360636234283447, + "20": 3.920323371887207, + "21": 4.008439064025879, + "22": 4.396872043609619, + "23": 3.7561728954315186, + "24": 5.591197967529297, + "25": 3.7877416610717773, + "26": 4.384615898132324, + "27": 4.973392009735107, + "28": 5.224878311157227, + "29": 5.193818092346191, + "30": 4.656402111053467, + "31": 4.017825603485107, + "32": 3.922196388244629, + "33": 4.70242977142334, + "34": 3.6501121520996094, + "35": 4.688882827758789, + "36": 4.468406677246094, + "37": 4.085740089416504, + "38": 6.512267112731934, + "39": 4.839545249938965, + "40": 4.314859390258789, + "41": 4.304404258728027, + "42": 5.375008583068848, + "43": 4.207009315490723, + "44": 6.017759799957275, + "45": 4.3909454345703125, + "46": 3.86588716506958, + "47": 4.739500045776367, + "48": 3.9381890296936035, + "49": 3.6182003021240234, + "50": 4.826262474060059, + "51": 4.624513626098633, + "52": 5.69394588470459, + "53": 5.001783847808838 + }, + "lr": { + "0": 4.64e-11, + "1": 0.009280000045472001, + "2": 0.018560000044544, + "3": 0.027840000043616, + "4": 0.037120000042688, + "5": 0.04640000004176, + "6": 0.055680000040832, + "7": 0.064960000039904, + "8": 0.074240000038976, + "9": 0.083520000038048, + "10": 0.09280000003712001, + "11": 0.102080000036192, + "12": 0.111360000035264, + "13": 0.120640000034336, + "14": 0.129920000033408, + "15": 0.13920000003248, + "16": 0.148480000031552, + "17": 0.157760000030624, + "18": 0.16704000002969602, + "19": 0.17632000002876802, + "20": 0.18560000002784002, + "21": 0.194880000026912, + "22": 0.204160000025984, + "23": 0.21344000002505603, + "24": 0.222720000024128, + "25": 0.23200000002319995, + "26": 0.24128000002227198, + "27": 0.25056000002134404, + "28": 0.259840000020416, + "29": 0.269120000019488, + "30": 0.27840000001856, + "31": 0.28768000001763205, + "32": 0.29696000001670403, + "33": 0.306240000015776, + "34": 0.315520000014848, + "35": 0.32480000001392006, + "36": 0.33408000001299204, + "37": 0.343360000012064, + "38": 0.35264000001113605, + "39": 0.361920000010208, + "40": 0.37120000000928005, + "41": 0.380480000008352, + "42": 0.389760000007424, + "43": 0.39904000000649603, + "44": 0.408320000005568, + "45": 0.41760000000464, + "46": 0.42688000000371207, + "47": 0.43616000000278404, + "48": 0.445440000001856, + "49": 0.454720000000928, + "50": 0.464, + "51": 0.464, + "52": 0.464, + "53": 0.464 + } + }, + "step_size_list": [ + 4.64e-11, + 0.00593737, + 0.0168503, + 0.0215783, + 0.0157592, + 0.0384145, + 0.0491077, + 0.0507405, + 0.0641144, + 0.0602063, + 0.077537, + 0.0604964, + 0.0713351, + 0.03981, + 0.0778582, + 0.103344, + 0.0284914, + 0.104849, + 0.120999, + 0.00522371, + 0.122348, + 0.0772862, + 0.12053, + 0.0732249, + 0.0226777, + 0.101712, + 0.117461, + 0.0479651, + 0.091897, + 0.149968, + 0.040471, + 0.109807, + 0.102788, + 0.116347, + 0.139609, + 0.0577171, + 0.0872155, + 0.186017, + 0.0373316, + 0.163718, + 0.116242, + 0.164714, + 0.0752112, + 0.211163, + 0.0815104, + 0.144469, + 0.161224, + 0.212155, + 0.279551, + 0.303442, + 0.0652571, + 0.122622, + 0.260692, + 0.0716287 + ], + "train_epoch_time": 4.84460973739624, + "train_loss": 3.801257535682689, + "train_score": 0.13558890781494945, + "val_loss": 3.8098775253777664, + "val_score": 0.1370012920123154 + }, + { + "epoch": 1, + "grad_norm": 2.0714566707611084, + "learning_rate": 0.464, + "model_norm": 87.30049133300781, + "step_logs": { + "grad_norm": { + "54": 3.2989003658294678, + "55": 4.222681999206543, + "56": 7.397814750671387, + "57": 3.8926427364349365, + "58": 14.16972827911377, + "59": 6.420956134796143, + "60": 4.9557952880859375, + "61": 13.485730171203613, + "62": 4.930192470550537, + "63": 3.3796579837799072, + "64": 4.4158830642700195, + "65": 5.228013515472412, + "66": 8.382903099060059, + "67": 7.993526458740234, + "68": 6.3797197341918945, + "69": 11.84712028503418, + "70": 3.5176312923431396, + "71": 5.429211616516113, + "72": 3.579590082168579, + "73": 5.0470170974731445, + "74": 7.79220724105835, + "75": 5.338777542114258, + "76": 4.107653617858887, + "77": 12.636404991149902, + "78": 3.68166184425354, + "79": 2.7825984954833984, + "80": 3.896320343017578, + "81": 7.72820520401001, + "82": 2.3804285526275635, + "83": 13.873343467712402, + "84": 6.812201023101807, + "85": 3.397881507873535, + "86": 4.052323818206787, + "87": 3.012146472930908, + "88": 2.5745558738708496, + "89": 2.8760454654693604, + "90": 3.4528188705444336, + "91": 4.478063106536865, + "92": 2.996354103088379, + "93": 5.239041328430176, + "94": 3.2973387241363525, + "95": 3.3594624996185303, + "96": 3.310357093811035, + "97": 4.669572830200195, + "98": 3.144587278366089, + "99": 2.272308588027954, + "100": 5.3997416496276855, + "101": 2.291703939437866, + "102": 1.769398808479309, + "103": 2.8895630836486816, + "104": 2.5099306106567383, + "105": 5.2935075759887695, + "106": 2.0428225994110107, + "107": 2.0714566707611084 + }, + "loss": { + "54": 3.7783703804016113, + "55": 3.9213552474975586, + "56": 4.5158867835998535, + "57": 3.687530517578125, + "58": 6.772468566894531, + "59": 5.479622840881348, + "60": 4.148347854614258, + "61": 6.270169258117676, + "62": 4.35581111907959, + "63": 3.503696918487549, + "64": 3.747910976409912, + "65": 4.157194137573242, + "66": 4.164535999298096, + "67": 4.440500259399414, + "68": 3.896484851837158, + "69": 5.742057800292969, + "70": 3.98677396774292, + "71": 4.361114501953125, + "72": 4.1221923828125, + "73": 4.0096588134765625, + "74": 5.064618110656738, + "75": 5.140599250793457, + "76": 3.7918624877929688, + "77": 6.432853698730469, + "78": 4.206114768981934, + "79": 3.278982639312744, + "80": 3.826313018798828, + "81": 4.816197872161865, + "82": 3.3911356925964355, + "83": 4.876935005187988, + "84": 3.8991923332214355, + "85": 4.186374187469482, + "86": 3.430126428604126, + "87": 3.3390538692474365, + "88": 3.622817039489746, + "89": 3.3547282218933105, + "90": 3.3335962295532227, + "91": 3.782844066619873, + "92": 3.63641357421875, + "93": 4.060509204864502, + "94": 3.517932653427124, + "95": 3.6322638988494873, + "96": 3.4169673919677734, + "97": 3.781601905822754, + "98": 3.8285770416259766, + "99": 3.433558702468872, + "100": 3.5310988426208496, + "101": 3.4940805435180664, + "102": 2.991687297821045, + "103": 3.139760732650757, + "104": 3.3575704097747803, + "105": 3.537698745727539, + "106": 3.453799247741699, + "107": 3.001152753829956 + }, + "lr": { + "54": 0.464, + "55": 0.464, + "56": 0.464, + "57": 0.464, + "58": 0.464, + "59": 0.464, + "60": 0.464, + "61": 0.464, + "62": 0.464, + "63": 0.464, + "64": 0.464, + "65": 0.464, + "66": 0.464, + "67": 0.464, + "68": 0.464, + "69": 0.464, + "70": 0.464, + "71": 0.464, + "72": 0.464, + "73": 0.464, + "74": 0.464, + "75": 0.464, + "76": 0.464, + "77": 0.464, + "78": 0.464, + "79": 0.464, + "80": 0.464, + "81": 0.464, + "82": 0.464, + "83": 0.464, + "84": 0.464, + "85": 0.464, + "86": 0.464, + "87": 0.464, + "88": 0.464, + "89": 0.464, + "90": 0.464, + "91": 0.464, + "92": 0.464, + "93": 0.464, + "94": 0.464, + "95": 0.464, + "96": 0.464, + "97": 0.464, + "98": 0.464, + "99": 0.464, + "100": 0.464, + "101": 0.464, + "102": 0.464, + "103": 0.464, + "104": 0.464, + "105": 0.464, + "106": 0.464, + "107": 0.464 + } + }, + "step_size_list": [ + 0.27814, + 0.225797, + 0.121734, + 0.237544, + 0.058898, + 0.169, + 0.195489, + 0.0600328, + 0.202211, + 0.264188, + 0.210233, + 0.183739, + 0.0944086, + 0.106953, + 0.135539, + 0.0695566, + 0.269758, + 0.180681, + 0.269587, + 0.187563, + 0.122706, + 0.202944, + 0.228308, + 0.0686513, + 0.2655, + 0.299774, + 0.241606, + 0.11968, + 0.334375, + 0.0456875, + 0.123367, + 0.282956, + 0.219835, + 0.284593, + 0.325735, + 0.295159, + 0.253593, + 0.208086, + 0.295016, + 0.180669, + 0.270237, + 0.269633, + 0.266049, + 0.198484, + 0.290144, + 0.343989, + 0.159139, + 0.344031, + 0.373355, + 0.286959, + 0.323278, + 0.163518, + 0.36241, + 0.348426 + ], + "train_epoch_time": 4.842065095901489, + "train_loss": 3.0175992001762006, + "train_score": 0.20004595585242235, + "val_loss": 3.032195486239533, + "val_score": 0.19873887817156163 + }, + { + "epoch": 2, + "grad_norm": 0.7995576858520508, + "learning_rate": 0.464, + "model_norm": 87.32730865478516, + "step_logs": { + "grad_norm": { + "108": 1.7453795671463013, + "109": 2.3346517086029053, + "110": 1.7789217233657837, + "111": 1.5735795497894287, + "112": 1.9343189001083374, + "113": 2.1023476123809814, + "114": 1.7054225206375122, + "115": 1.8063548803329468, + "116": 1.4714947938919067, + "117": 1.2690017223358154, + "118": 1.2429780960083008, + "119": 1.3882229328155518, + "120": 1.2908350229263306, + "121": 1.7685282230377197, + "122": 1.3473387956619263, + "123": 1.6238688230514526, + "124": 1.3509222269058228, + "125": 1.2665413618087769, + "126": 1.188738465309143, + "127": 1.5948922634124756, + "128": 1.218187928199768, + "129": 0.8909094929695129, + "130": 1.313584327697754, + "131": 1.4468961954116821, + "132": 1.267411708831787, + "133": 1.5071102380752563, + "134": 1.4385671615600586, + "135": 1.036034345626831, + "136": 1.005440592765808, + "137": 1.2462759017944336, + "138": 1.0342200994491577, + "139": 0.9531611800193787, + "140": 1.0670197010040283, + "141": 1.552345633506775, + "142": 1.0417265892028809, + "143": 0.9641990661621094, + "144": 1.0335783958435059, + "145": 1.43638014793396, + "146": 1.1249327659606934, + "147": 0.7396571636199951, + "148": 0.7599216103553772, + "149": 1.0708014965057373, + "150": 1.2021658420562744, + "151": 1.2595103979110718, + "152": 1.0128695964813232, + "153": 0.7846184372901917, + "154": 0.9114413857460022, + "155": 1.3085514307022095, + "156": 1.051295518875122, + "157": 1.017424464225769, + "158": 1.0869580507278442, + "159": 1.28909170627594, + "160": 0.999721884727478, + "161": 0.7995576858520508 + }, + "loss": { + "108": 3.0271198749542236, + "109": 2.991933822631836, + "110": 3.1839370727539062, + "111": 2.8543524742126465, + "112": 3.0024099349975586, + "113": 2.972348690032959, + "114": 3.1099231243133545, + "115": 2.882803440093994, + "116": 3.00272274017334, + "117": 2.8202266693115234, + "118": 2.73990797996521, + "119": 2.8246216773986816, + "120": 2.8119752407073975, + "121": 2.8203301429748535, + "122": 2.9565765857696533, + "123": 2.7843191623687744, + "124": 2.9501309394836426, + "125": 2.731534242630005, + "126": 2.781210422515869, + "127": 2.719125747680664, + "128": 2.923259735107422, + "129": 2.7054128646850586, + "130": 2.6743741035461426, + "131": 2.852283477783203, + "132": 2.741102457046509, + "133": 2.7418837547302246, + "134": 2.8744490146636963, + "135": 2.71822190284729, + "136": 2.6969385147094727, + "137": 2.6778035163879395, + "138": 2.777622699737549, + "139": 2.650146484375, + "140": 2.680689811706543, + "141": 2.7095980644226074, + "142": 2.790527105331421, + "143": 2.6293399333953857, + "144": 2.6897921562194824, + "145": 2.6595771312713623, + "146": 2.801690101623535, + "147": 2.603154182434082, + "148": 2.5941762924194336, + "149": 2.620685338973999, + "150": 2.717017889022827, + "151": 2.687593936920166, + "152": 2.7148540019989014, + "153": 2.5804102420806885, + "154": 2.61409068107605, + "155": 2.657287120819092, + "156": 2.745023727416992, + "157": 2.630979537963867, + "158": 2.68674373626709, + "159": 2.6528375148773193, + "160": 2.747378349304199, + "161": 2.5929226875305176 + }, + "lr": { + "108": 0.464, + "109": 0.464, + "110": 0.464, + "111": 0.464, + "112": 0.464, + "113": 0.464, + "114": 0.464, + "115": 0.464, + "116": 0.464, + "117": 0.464, + "118": 0.464, + "119": 0.464, + "120": 0.464, + "121": 0.464, + "122": 0.464, + "123": 0.464, + "124": 0.464, + "125": 0.464, + "126": 0.464, + "127": 0.464, + "128": 0.464, + "129": 0.464, + "130": 0.464, + "131": 0.464, + "132": 0.464, + "133": 0.464, + "134": 0.464, + "135": 0.464, + "136": 0.464, + "137": 0.464, + "138": 0.464, + "139": 0.464, + "140": 0.464, + "141": 0.464, + "142": 0.464, + "143": 0.464, + "144": 0.464, + "145": 0.464, + "146": 0.464, + "147": 0.464, + "148": 0.464, + "149": 0.464, + "150": 0.464, + "151": 0.464, + "152": 0.464, + "153": 0.464, + "154": 0.464, + "155": 0.464, + "156": 0.464, + "157": 0.464, + "158": 0.464, + "159": 0.464, + "160": 0.464, + "161": 0.464 + } + }, + "step_size_list": [ + 0.376173, + 0.326152, + 0.377055, + 0.386261, + 0.359936, + 0.344986, + 0.381274, + 0.367498, + 0.397499, + 0.409723, + 0.410321, + 0.400591, + 0.407922, + 0.36905, + 0.406146, + 0.380415, + 0.405765, + 0.408363, + 0.415073, + 0.381256, + 0.415111, + 0.434431, + 0.403588, + 0.396485, + 0.408467, + 0.3892, + 0.397591, + 0.42506, + 0.426878, + 0.408967, + 0.425946, + 0.429815, + 0.422381, + 0.384638, + 0.425602, + 0.428823, + 0.424853, + 0.393228, + 0.419989, + 0.442428, + 0.441214, + 0.421241, + 0.413031, + 0.408113, + 0.4266, + 0.439665, + 0.43214, + 0.403655, + 0.424361, + 0.425189, + 0.421045, + 0.405125, + 0.427887, + 0.438895 + ], + "train_epoch_time": 4.841308832168579, + "train_loss": 2.5992508830777883, + "train_score": 0.2544487535953522, + "val_loss": 2.622389969404201, + "val_score": 0.24980266955087707 + }, + { + "epoch": 3, + "grad_norm": 0.9289785027503967, + "learning_rate": 0.464, + "model_norm": 87.36541748046875, + "step_logs": { + "grad_norm": { + "162": 0.7847865223884583, + "163": 0.88823401927948, + "164": 1.0743495225906372, + "165": 1.1417078971862793, + "166": 1.0487600564956665, + "167": 1.1039804220199585, + "168": 1.0553600788116455, + "169": 1.234736442565918, + "170": 1.0465025901794434, + "171": 0.7626662850379944, + "172": 0.8245648741722107, + "173": 1.2805877923965454, + "174": 1.0924804210662842, + "175": 0.6467738151550293, + "176": 0.5612294673919678, + "177": 0.8982486128807068, + "178": 1.001540184020996, + "179": 0.9854945540428162, + "180": 0.9586362242698669, + "181": 0.9121066331863403, + "182": 0.9199315905570984, + "183": 0.9640321135520935, + "184": 0.8909121751785278, + "185": 0.7867289185523987, + "186": 0.8402086496353149, + "187": 0.886196494102478, + "188": 0.8527680039405823, + "189": 0.8706535696983337, + "190": 0.8681197762489319, + "191": 0.9133608937263489, + "192": 1.1165709495544434, + "193": 1.1397994756698608, + "194": 1.094490885734558, + "195": 0.888022243976593, + "196": 0.9068925380706787, + "197": 1.2189306020736694, + "198": 1.0212775468826294, + "199": 0.8594958186149597, + "200": 0.9148173928260803, + "201": 1.2075608968734741, + "202": 1.0625265836715698, + "203": 0.7372179627418518, + "204": 0.707021951675415, + "205": 0.881438672542572, + "206": 0.9729852676391602, + "207": 0.9525948762893677, + "208": 0.851600170135498, + "209": 0.7714646458625793, + "210": 0.9278709292411804, + "211": 1.1792516708374023, + "212": 1.0500853061676025, + "213": 0.8017606139183044, + "214": 0.8323712944984436, + "215": 0.9289785027503967 + }, + "loss": { + "162": 2.5928449630737305, + "163": 2.6062660217285156, + "164": 2.6345601081848145, + "165": 2.6397294998168945, + "166": 2.6643354892730713, + "167": 2.648961067199707, + "168": 2.6670284271240234, + "169": 2.615281581878662, + "170": 2.736809730529785, + "171": 2.5692615509033203, + "172": 2.597013235092163, + "173": 2.5934057235717773, + "174": 2.754359006881714, + "175": 2.560157299041748, + "176": 2.519620656967163, + "177": 2.563488006591797, + "178": 2.651240348815918, + "179": 2.5946297645568848, + "180": 2.62282133102417, + "181": 2.5967390537261963, + "182": 2.612783432006836, + "183": 2.586153984069824, + "184": 2.632042407989502, + "185": 2.5606541633605957, + "186": 2.597174644470215, + "187": 2.5834105014801025, + "188": 2.614773988723755, + "189": 2.5667665004730225, + "190": 2.6083247661590576, + "191": 2.5700886249542236, + "192": 2.647434711456299, + "193": 2.6329309940338135, + "194": 2.6459603309631348, + "195": 2.605299949645996, + "196": 2.5579757690429688, + "197": 2.619462251663208, + "198": 2.6579835414886475, + "199": 2.5811972618103027, + "200": 2.58602237701416, + "201": 2.5991976261138916, + "202": 2.685232639312744, + "203": 2.5744965076446533, + "204": 2.529045820236206, + "205": 2.5680088996887207, + "206": 2.608891010284424, + "207": 2.5749635696411133, + "208": 2.5732386112213135, + "209": 2.5692808628082275, + "210": 2.5827243328094482, + "211": 2.58756160736084, + "212": 2.6649670600891113, + "213": 2.5771422386169434, + "214": 2.578545093536377, + "215": 2.5754103660583496 + }, + "lr": { + "162": 0.464, + "163": 0.464, + "164": 0.464, + "165": 0.464, + "166": 0.464, + "167": 0.464, + "168": 0.464, + "169": 0.464, + "170": 0.464, + "171": 0.464, + "172": 0.464, + "173": 0.464, + "174": 0.464, + "175": 0.464, + "176": 0.464, + "177": 0.464, + "178": 0.464, + "179": 0.464, + "180": 0.464, + "181": 0.464, + "182": 0.464, + "183": 0.464, + "184": 0.464, + "185": 0.464, + "186": 0.464, + "187": 0.464, + "188": 0.464, + "189": 0.464, + "190": 0.464, + "191": 0.464, + "192": 0.464, + "193": 0.464, + "194": 0.464, + "195": 0.464, + "196": 0.464, + "197": 0.464, + "198": 0.464, + "199": 0.464, + "200": 0.464, + "201": 0.464, + "202": 0.464, + "203": 0.464, + "204": 0.464, + "205": 0.464, + "206": 0.464, + "207": 0.464, + "208": 0.464, + "209": 0.464, + "210": 0.464, + "211": 0.464, + "212": 0.464, + "213": 0.464, + "214": 0.464, + "215": 0.464 + } + }, + "step_size_list": [ + 0.439765, + 0.433552, + 0.42119, + 0.416307, + 0.423445, + 0.419249, + 0.423016, + 0.408723, + 0.424583, + 0.440846, + 0.437431, + 0.404639, + 0.421615, + 0.447053, + 0.450922, + 0.432424, + 0.426558, + 0.426926, + 0.429118, + 0.431898, + 0.43157, + 0.428293, + 0.43366, + 0.439362, + 0.436475, + 0.433431, + 0.435876, + 0.434247, + 0.434851, + 0.431505, + 0.418299, + 0.41634, + 0.419897, + 0.433555, + 0.431791, + 0.410041, + 0.425283, + 0.43511, + 0.431596, + 0.410562, + 0.422763, + 0.442336, + 0.443656, + 0.433568, + 0.42797, + 0.428931, + 0.435523, + 0.440336, + 0.430692, + 0.41256, + 0.42336, + 0.438618, + 0.436773, + 0.43053 + ], + "train_epoch_time": 4.841070890426636, + "train_loss": 2.5811669449553087, + "train_score": 0.2214860563255964, + "val_loss": 2.615156807116608, + "val_score": 0.21387055117624088 + }, + { + "epoch": 4, + "grad_norm": 0.889293372631073, + "learning_rate": 0.464, + "model_norm": 87.40538787841797, + "step_logs": { + "grad_norm": { + "216": 0.754064679145813, + "217": 0.8650315999984741, + "218": 0.8457391858100891, + "219": 0.8708078861236572, + "220": 0.8217610120773315, + "221": 0.710178792476654, + "222": 0.7253114581108093, + "223": 0.9253487586975098, + "224": 0.8692904710769653, + "225": 0.715171754360199, + "226": 0.7517708539962769, + "227": 0.8607370257377625, + "228": 0.8758278489112854, + "229": 0.894248902797699, + "230": 0.9296528100967407, + "231": 0.8673824071884155, + "232": 0.8326968550682068, + "233": 0.8765765428543091, + "234": 0.8940305113792419, + "235": 0.924471914768219, + "236": 0.9716203808784485, + "237": 0.9140962362289429, + "238": 0.9629188776016235, + "239": 0.9863651990890503, + "240": 1.0309852361679077, + "241": 0.8867030143737793, + "242": 0.8277438282966614, + "243": 0.8383505940437317, + "244": 0.8075698614120483, + "245": 0.7585670948028564, + "246": 0.7546662092208862, + "247": 0.7675836086273193, + "248": 0.8228592276573181, + "249": 0.8094436526298523, + "250": 0.7157390117645264, + "251": 0.7271272540092468, + "252": 0.772714376449585, + "253": 0.7835621237754822, + "254": 0.8827233910560608, + "255": 0.803135335445404, + "256": 0.6097615361213684, + "257": 0.6437383890151978, + "258": 0.7866104245185852, + "259": 0.8246266841888428, + "260": 0.818315863609314, + "261": 0.7815305590629578, + "262": 0.7399082779884338, + "263": 0.8718881011009216, + "264": 0.8604130148887634, + "265": 0.8218329548835754, + "266": 0.8615092635154724, + "267": 0.8987654447555542, + "268": 0.9400328397750854, + "269": 0.889293372631073 + }, + "loss": { + "216": 2.5780444145202637, + "217": 2.5299715995788574, + "218": 2.5789334774017334, + "219": 2.5701842308044434, + "220": 2.604111671447754, + "221": 2.527941942214966, + "222": 2.5688040256500244, + "223": 2.5431976318359375, + "224": 2.6267378330230713, + "225": 2.5190181732177734, + "226": 2.551858901977539, + "227": 2.545146942138672, + "228": 2.585883140563965, + "229": 2.5538382530212402, + "230": 2.578336238861084, + "231": 2.572866439819336, + "232": 2.5440096855163574, + "233": 2.5524444580078125, + "234": 2.5869879722595215, + "235": 2.55580997467041, + "236": 2.6068756580352783, + "237": 2.57442307472229, + "238": 2.5645904541015625, + "239": 2.582531452178955, + "240": 2.5895957946777344, + "241": 2.5937376022338867, + "242": 2.5293283462524414, + "243": 2.5690677165985107, + "244": 2.528202533721924, + "245": 2.5501508712768555, + "246": 2.509793758392334, + "247": 2.5440261363983154, + "248": 2.550060272216797, + "249": 2.557403564453125, + "250": 2.5279016494750977, + "251": 2.538327693939209, + "252": 2.553427219390869, + "253": 2.545814037322998, + "254": 2.527787685394287, + "255": 2.5648326873779297, + "256": 2.501943826675415, + "257": 2.509753465652466, + "258": 2.517652988433838, + "259": 2.5426149368286133, + "260": 2.5298633575439453, + "261": 2.549063205718994, + "262": 2.505011558532715, + "263": 2.5539746284484863, + "264": 2.563380241394043, + "265": 2.5415501594543457, + "266": 2.5311694145202637, + "267": 2.549185276031494, + "268": 2.542485237121582, + "269": 2.560265064239502 + }, + "lr": { + "216": 0.464, + "217": 0.464, + "218": 0.464, + "219": 0.464, + "220": 0.464, + "221": 0.464, + "222": 0.464, + "223": 0.464, + "224": 0.464, + "225": 0.464, + "226": 0.464, + "227": 0.464, + "228": 0.464, + "229": 0.464, + "230": 0.464, + "231": 0.464, + "232": 0.464, + "233": 0.464, + "234": 0.464, + "235": 0.464, + "236": 0.464, + "237": 0.464, + "238": 0.464, + "239": 0.464, + "240": 0.464, + "241": 0.464, + "242": 0.464, + "243": 0.464, + "244": 0.464, + "245": 0.464, + "246": 0.464, + "247": 0.464, + "248": 0.464, + "249": 0.464, + "250": 0.464, + "251": 0.464, + "252": 0.464, + "253": 0.464, + "254": 0.464, + "255": 0.464, + "256": 0.464, + "257": 0.464, + "258": 0.464, + "259": 0.464, + "260": 0.464, + "261": 0.464, + "262": 0.464, + "263": 0.464, + "264": 0.464, + "265": 0.464, + "266": 0.464, + "267": 0.464, + "268": 0.464, + "269": 0.464 + } + }, + "step_size_list": [ + 0.441413, + 0.434206, + 0.435949, + 0.434274, + 0.437669, + 0.443473, + 0.442954, + 0.430382, + 0.434969, + 0.443126, + 0.441324, + 0.434647, + 0.434123, + 0.432575, + 0.43052, + 0.434522, + 0.436405, + 0.433709, + 0.432965, + 0.430595, + 0.428038, + 0.431508, + 0.428092, + 0.426705, + 0.423656, + 0.433513, + 0.436564, + 0.436308, + 0.437799, + 0.440918, + 0.440794, + 0.44034, + 0.437076, + 0.437968, + 0.443165, + 0.442611, + 0.440123, + 0.439414, + 0.433032, + 0.43842, + 0.448536, + 0.446881, + 0.438971, + 0.436892, + 0.437155, + 0.439564, + 0.441609, + 0.434028, + 0.434863, + 0.437054, + 0.434446, + 0.432225, + 0.429378, + 0.432972 + ], + "train_epoch_time": 4.841597557067871, + "train_loss": 2.5323087305044343, + "train_score": 0.2518965207863401, + "val_loss": 2.5780961787248997, + "val_score": 0.24644804832308492 + }, + { + "epoch": 5, + "grad_norm": 0.9988219738006592, + "learning_rate": 0.464, + "model_norm": 87.45085144042969, + "step_logs": { + "grad_norm": { + "270": 0.8288596272468567, + "271": 0.7934896349906921, + "272": 0.9915941953659058, + "273": 0.9043965935707092, + "274": 0.7631224989891052, + "275": 0.7688599228858948, + "276": 0.8321759700775146, + "277": 0.8154940605163574, + "278": 0.7812998294830322, + "279": 0.7776891589164734, + "280": 0.7468566298484802, + "281": 0.7364938259124756, + "282": 0.7987938523292542, + "283": 0.7861807942390442, + "284": 0.8148996233940125, + "285": 0.9629576206207275, + "286": 0.9761831760406494, + "287": 0.8699951171875, + "288": 0.7334563136100769, + "289": 0.7432525157928467, + "290": 0.7942057847976685, + "291": 0.7675772905349731, + "292": 0.7070748209953308, + "293": 0.7475346922874451, + "294": 0.7945957779884338, + "295": 0.8732880353927612, + "296": 0.9207392930984497, + "297": 0.9889922142028809, + "298": 0.8606439828872681, + "299": 0.7848327159881592, + "300": 0.7060503959655762, + "301": 0.7030293345451355, + "302": 0.7826439738273621, + "303": 1.0410118103027344, + "304": 0.8113794922828674, + "305": 0.7012067437171936, + "306": 0.8564667701721191, + "307": 1.0028573274612427, + "308": 1.307279348373413, + "309": 0.8510302305221558, + "310": 0.825967013835907, + "311": 0.8582208156585693, + "312": 0.9986169934272766, + "313": 0.9275185465812683, + "314": 1.0599502325057983, + "315": 0.8711197376251221, + "316": 0.7934519648551941, + "317": 0.8633790016174316, + "318": 0.8468979597091675, + "319": 0.9627843499183655, + "320": 0.8951519131660461, + "321": 0.772579550743103, + "322": 0.9886743426322937, + "323": 0.9988219738006592 + }, + "loss": { + "270": 2.534425735473633, + "271": 2.538745403289795, + "272": 2.5303900241851807, + "273": 2.5948383808135986, + "274": 2.5028274059295654, + "275": 2.516233205795288, + "276": 2.5069284439086914, + "277": 2.554931163787842, + "278": 2.4920616149902344, + "279": 2.54748272895813, + "280": 2.495222568511963, + "281": 2.5200893878936768, + "282": 2.5054917335510254, + "283": 2.554323196411133, + "284": 2.4950976371765137, + "285": 2.5486481189727783, + "286": 2.5618343353271484, + "287": 2.5696475505828857, + "288": 2.506333351135254, + "289": 2.505603790283203, + "290": 2.510037422180176, + "291": 2.5071001052856445, + "292": 2.482905626296997, + "293": 2.4872779846191406, + "294": 2.506176471710205, + "295": 2.492616653442383, + "296": 2.5271568298339844, + "297": 2.518397808074951, + "298": 2.532978057861328, + "299": 2.4740850925445557, + "300": 2.4835891723632812, + "301": 2.4555060863494873, + "302": 2.495685338973999, + "303": 2.461479902267456, + "304": 2.5400919914245605, + "305": 2.476271152496338, + "306": 2.484402656555176, + "307": 2.465885639190674, + "308": 2.5619664192199707, + "309": 2.5466525554656982, + "310": 2.5172061920166016, + "311": 2.4887819290161133, + "312": 2.4919095039367676, + "313": 2.487433910369873, + "314": 2.4931745529174805, + "315": 2.4929707050323486, + "316": 2.437417507171631, + "317": 2.458775043487549, + "318": 2.454230546951294, + "319": 2.427515983581543, + "320": 2.5097384452819824, + "321": 2.421292304992676, + "322": 2.4354043006896973, + "323": 2.4980649948120117 + }, + "lr": { + "270": 0.464, + "271": 0.464, + "272": 0.464, + "273": 0.464, + "274": 0.464, + "275": 0.464, + "276": 0.464, + "277": 0.464, + "278": 0.464, + "279": 0.464, + "280": 0.464, + "281": 0.464, + "282": 0.464, + "283": 0.464, + "284": 0.464, + "285": 0.464, + "286": 0.464, + "287": 0.464, + "288": 0.464, + "289": 0.464, + "290": 0.464, + "291": 0.464, + "292": 0.464, + "293": 0.464, + "294": 0.464, + "295": 0.464, + "296": 0.464, + "297": 0.464, + "298": 0.464, + "299": 0.464, + "300": 0.464, + "301": 0.464, + "302": 0.464, + "303": 0.464, + "304": 0.464, + "305": 0.464, + "306": 0.464, + "307": 0.464, + "308": 0.464, + "309": 0.464, + "310": 0.464, + "311": 0.464, + "312": 0.464, + "313": 0.464, + "314": 0.464, + "315": 0.464, + "316": 0.464, + "317": 0.464, + "318": 0.464, + "319": 0.464, + "320": 0.464, + "321": 0.464, + "322": 0.464, + "323": 0.464 + } + }, + "step_size_list": [ + 0.436546, + 0.438755, + 0.425629, + 0.43238, + 0.440235, + 0.440017, + 0.436054, + 0.437576, + 0.43905, + 0.439777, + 0.441122, + 0.441932, + 0.438115, + 0.439337, + 0.437016, + 0.427883, + 0.427139, + 0.43432, + 0.44199, + 0.441421, + 0.438439, + 0.44001, + 0.443292, + 0.441013, + 0.438378, + 0.433247, + 0.430496, + 0.425647, + 0.434521, + 0.438663, + 0.443354, + 0.443299, + 0.439003, + 0.420999, + 0.437682, + 0.443567, + 0.434254, + 0.423891, + 0.401816, + 0.43528, + 0.436551, + 0.434189, + 0.42458, + 0.429535, + 0.420082, + 0.433394, + 0.437767, + 0.433509, + 0.434538, + 0.426239, + 0.432001, + 0.438899, + 0.424475, + 0.424654 + ], + "train_epoch_time": 4.841272354125977, + "train_loss": 2.4665567812652807, + "train_score": 0.25491503755023526, + "val_loss": 2.514818378211984, + "val_score": 0.24674852926339677 + }, + { + "epoch": 6, + "grad_norm": 1.0370677709579468, + "learning_rate": 0.464, + "model_norm": 87.49996948242188, + "step_logs": { + "grad_norm": { + "324": 0.9800245761871338, + "325": 0.8283821940422058, + "326": 0.7775773406028748, + "327": 0.9417735934257507, + "328": 0.9661932587623596, + "329": 0.9594317674636841, + "330": 0.7793853878974915, + "331": 0.7245911359786987, + "332": 0.7517696619033813, + "333": 1.074756383895874, + "334": 0.9611711502075195, + "335": 0.6978328227996826, + "336": 0.6949235200881958, + "337": 1.037560224533081, + "338": 0.7125256657600403, + "339": 0.6865198016166687, + "340": 0.8098397254943848, + "341": 1.0357433557510376, + "342": 1.1265802383422852, + "343": 1.0206893682479858, + "344": 1.0810078382492065, + "345": 1.1776796579360962, + "346": 0.9603952169418335, + "347": 0.9163500070571899, + "348": 0.8210528492927551, + "349": 0.8515852689743042, + "350": 0.7847344875335693, + "351": 0.7387920022010803, + "352": 0.7622573375701904, + "353": 0.800679087638855, + "354": 0.88532954454422, + "355": 0.9865078926086426, + "356": 0.8688163161277771, + "357": 0.7257600426673889, + "358": 0.7686408162117004, + "359": 0.9300809502601624, + "360": 0.9252261519432068, + "361": 0.9970901608467102, + "362": 1.9082632064819336, + "363": 0.6677086353302002, + "364": 0.6691631078720093, + "365": 0.8937729597091675, + "366": 1.0749813318252563, + "367": 1.1392680406570435, + "368": 1.0505801439285278, + "369": 0.7789050340652466, + "370": 0.8209730386734009, + "371": 0.954608678817749, + "372": 1.3039089441299438, + "373": 1.0377908945083618, + "374": 1.0113333463668823, + "375": 1.0593420267105103, + "376": 0.9562830924987793, + "377": 1.0370677709579468 + }, + "loss": { + "324": 2.4723334312438965, + "325": 2.4934020042419434, + "326": 2.4199047088623047, + "327": 2.4279255867004395, + "328": 2.481714963912964, + "329": 2.4320318698883057, + "330": 2.4659900665283203, + "331": 2.3892579078674316, + "332": 2.4013938903808594, + "333": 2.403709650039673, + "334": 2.476989269256592, + "335": 2.420353889465332, + "336": 2.3951759338378906, + "337": 2.396409749984741, + "338": 2.4970617294311523, + "339": 2.4162540435791016, + "340": 2.39434814453125, + "341": 2.4294943809509277, + "342": 2.464895725250244, + "343": 2.429109811782837, + "344": 2.44795560836792, + "345": 2.437321186065674, + "346": 2.442086696624756, + "347": 2.3903326988220215, + "348": 2.4357619285583496, + "349": 2.3904590606689453, + "350": 2.4162936210632324, + "351": 2.374342441558838, + "352": 2.397109270095825, + "353": 2.3708555698394775, + "354": 2.4121124744415283, + "355": 2.407874584197998, + "356": 2.4555654525756836, + "357": 2.377549171447754, + "358": 2.380791187286377, + "359": 2.3692097663879395, + "360": 2.393126964569092, + "361": 2.386563301086426, + "362": 2.4485650062561035, + "363": 2.4321084022521973, + "364": 2.3986318111419678, + "365": 2.432197332382202, + "366": 2.448928117752075, + "367": 2.4770030975341797, + "368": 2.4764983654022217, + "369": 2.415029287338257, + "370": 2.39186954498291, + "371": 2.3887853622436523, + "372": 2.417573928833008, + "373": 2.451000928878784, + "374": 2.41477632522583, + "375": 2.3854353427886963, + "376": 2.432030200958252, + "377": 2.398467540740967 + }, + "lr": { + "324": 0.464, + "325": 0.464, + "326": 0.464, + "327": 0.464, + "328": 0.464, + "329": 0.464, + "330": 0.464, + "331": 0.464, + "332": 0.464, + "333": 0.464, + "334": 0.464, + "335": 0.464, + "336": 0.464, + "337": 0.464, + "338": 0.464, + "339": 0.464, + "340": 0.464, + "341": 0.464, + "342": 0.464, + "343": 0.464, + "344": 0.464, + "345": 0.464, + "346": 0.464, + "347": 0.464, + "348": 0.464, + "349": 0.464, + "350": 0.464, + "351": 0.464, + "352": 0.464, + "353": 0.464, + "354": 0.464, + "355": 0.464, + "356": 0.464, + "357": 0.464, + "358": 0.464, + "359": 0.464, + "360": 0.464, + "361": 0.464, + "362": 0.464, + "363": 0.464, + "364": 0.464, + "365": 0.464, + "366": 0.464, + "367": 0.464, + "368": 0.464, + "369": 0.464, + "370": 0.464, + "371": 0.464, + "372": 0.464, + "373": 0.464, + "374": 0.464, + "375": 0.464, + "376": 0.464, + "377": 0.464 + } + }, + "step_size_list": [ + 0.425638, + 0.436152, + 0.438577, + 0.427748, + 0.426757, + 0.426545, + 0.438917, + 0.441492, + 0.439977, + 0.417459, + 0.427048, + 0.443307, + 0.443266, + 0.420206, + 0.443099, + 0.443911, + 0.436276, + 0.420884, + 0.414486, + 0.42201, + 0.417736, + 0.409888, + 0.426618, + 0.429034, + 0.436005, + 0.43349, + 0.438097, + 0.440507, + 0.439296, + 0.43661, + 0.431472, + 0.424222, + 0.433112, + 0.441317, + 0.438741, + 0.427765, + 0.428444, + 0.423108, + 0.344975, + 0.445072, + 0.444738, + 0.431147, + 0.418216, + 0.413707, + 0.420519, + 0.438446, + 0.435528, + 0.426273, + 0.398915, + 0.421074, + 0.422484, + 0.418341, + 0.426771, + 0.420278 + ], + "train_epoch_time": 4.8418288230896, + "train_loss": 2.4159175653197673, + "train_score": 0.2785699874718528, + "val_loss": 2.45024912584526, + "val_score": 0.2707466270975623 + }, + { + "epoch": 7, + "grad_norm": 0.7485067844390869, + "learning_rate": 0.464, + "model_norm": 87.55538940429688, + "step_logs": { + "grad_norm": { + "378": 1.0064396858215332, + "379": 0.8539838194847107, + "380": 0.8043100237846375, + "381": 0.8470326066017151, + "382": 0.9387885928153992, + "383": 0.8522586822509766, + "384": 0.8815553784370422, + "385": 1.0263776779174805, + "386": 0.9056224822998047, + "387": 0.7453776597976685, + "388": 0.7296508550643921, + "389": 0.7386884689331055, + "390": 0.805571436882019, + "391": 0.9203474521636963, + "392": 1.0028228759765625, + "393": 1.0470596551895142, + "394": 0.9864926338195801, + "395": 0.9148935675621033, + "396": 0.8554189801216125, + "397": 0.8848603367805481, + "398": 1.2553482055664062, + "399": 0.6481537818908691, + "400": 0.6834568381309509, + "401": 0.8915372490882874, + "402": 0.9963279366493225, + "403": 1.0326377153396606, + "404": 0.892877995967865, + "405": 0.7649993896484375, + "406": 0.7975972294807434, + "407": 0.9212661385536194, + "408": 0.8549370169639587, + "409": 0.8188075423240662, + "410": 0.8381356596946716, + "411": 0.825498104095459, + "412": 0.8052555918693542, + "413": 0.8285270929336548, + "414": 0.8110234141349792, + "415": 0.8597236275672913, + "416": 0.8731857538223267, + "417": 0.8154451251029968, + "418": 0.7808809280395508, + "419": 0.7859033942222595, + "420": 0.8809411525726318, + "421": 0.9184620380401611, + "422": 0.9998373985290527, + "423": 0.887656033039093, + "424": 0.8951314091682434, + "425": 0.9491535425186157, + "426": 0.8798481822013855, + "427": 0.8304247856140137, + "428": 0.8761394023895264, + "429": 0.8243850469589233, + "430": 0.7738993167877197, + "431": 0.7485067844390869 + }, + "loss": { + "378": 2.4386940002441406, + "379": 2.374080181121826, + "380": 2.3617091178894043, + "381": 2.342466115951538, + "382": 2.3666210174560547, + "383": 2.37374210357666, + "384": 2.3594861030578613, + "385": 2.3664932250976562, + "386": 2.384056568145752, + "387": 2.3169920444488525, + "388": 2.2913923263549805, + "389": 2.3237414360046387, + "390": 2.3744289875030518, + "391": 2.3328261375427246, + "392": 2.36293888092041, + "393": 2.3453316688537598, + "394": 2.382493495941162, + "395": 2.3690600395202637, + "396": 2.3495705127716064, + "397": 2.355351686477661, + "398": 2.3587958812713623, + "399": 2.383967399597168, + "400": 2.3315072059631348, + "401": 2.342471122741699, + "402": 2.3810830116271973, + "403": 2.379814386367798, + "404": 2.3917574882507324, + "405": 2.304711103439331, + "406": 2.324188232421875, + "407": 2.3327572345733643, + "408": 2.3713417053222656, + "409": 2.2914185523986816, + "410": 2.3370871543884277, + "411": 2.310056209564209, + "412": 2.3397903442382812, + "413": 2.302206039428711, + "414": 2.331683874130249, + "415": 2.313957691192627, + "416": 2.3480639457702637, + "417": 2.3266613483428955, + "418": 2.2904434204101562, + "419": 2.3172760009765625, + "420": 2.3151981830596924, + "421": 2.3076066970825195, + "422": 2.3341920375823975, + "423": 2.3350610733032227, + "424": 2.3181991577148438, + "425": 2.3311357498168945, + "426": 2.3104472160339355, + "427": 2.308957099914551, + "428": 2.308932065963745, + "429": 2.3289713859558105, + "430": 2.302090883255005, + "431": 2.2854814529418945 + }, + "lr": { + "378": 0.464, + "379": 0.464, + "380": 0.464, + "381": 0.464, + "382": 0.464, + "383": 0.464, + "384": 0.464, + "385": 0.464, + "386": 0.464, + "387": 0.464, + "388": 0.464, + "389": 0.464, + "390": 0.464, + "391": 0.464, + "392": 0.464, + "393": 0.464, + "394": 0.464, + "395": 0.464, + "396": 0.464, + "397": 0.464, + "398": 0.464, + "399": 0.464, + "400": 0.464, + "401": 0.464, + "402": 0.464, + "403": 0.464, + "404": 0.464, + "405": 0.464, + "406": 0.464, + "407": 0.464, + "408": 0.464, + "409": 0.464, + "410": 0.464, + "411": 0.464, + "412": 0.464, + "413": 0.464, + "414": 0.464, + "415": 0.464, + "416": 0.464, + "417": 0.464, + "418": 0.464, + "419": 0.464, + "420": 0.464, + "421": 0.464, + "422": 0.464, + "423": 0.464, + "424": 0.464, + "425": 0.464, + "426": 0.464, + "427": 0.464, + "428": 0.464, + "429": 0.464, + "430": 0.464, + "431": 0.464 + } + }, + "step_size_list": [ + 0.423218, + 0.433132, + 0.436275, + 0.433216, + 0.4271, + 0.433244, + 0.431061, + 0.420566, + 0.429705, + 0.439548, + 0.440268, + 0.440028, + 0.436333, + 0.42795, + 0.422303, + 0.418603, + 0.423836, + 0.428848, + 0.432734, + 0.430777, + 0.401732, + 0.445775, + 0.443391, + 0.430139, + 0.42308, + 0.420307, + 0.430694, + 0.438186, + 0.436295, + 0.427883, + 0.433034, + 0.434505, + 0.433753, + 0.434279, + 0.435969, + 0.433979, + 0.435498, + 0.431987, + 0.431494, + 0.435148, + 0.437008, + 0.436979, + 0.43052, + 0.427725, + 0.422064, + 0.430313, + 0.429555, + 0.425821, + 0.430533, + 0.433933, + 0.430774, + 0.434579, + 0.437588, + 0.439031 + ], + "train_epoch_time": 4.842438220977783, + "train_loss": 2.279833248595425, + "train_score": 0.32523090032970203, + "val_loss": 2.333723345525503, + "val_score": 0.3123923646869944 + }, + { + "epoch": 8, + "grad_norm": 0.8299069404602051, + "learning_rate": 0.464, + "model_norm": 87.61180114746094, + "step_logs": { + "grad_norm": { + "432": 0.7126715779304504, + "433": 0.8567155599594116, + "434": 0.9202989935874939, + "435": 0.9785818457603455, + "436": 0.9496986269950867, + "437": 0.9030150175094604, + "438": 1.1402196884155273, + "439": 0.7947887778282166, + "440": 0.787836492061615, + "441": 0.9827735424041748, + "442": 0.924113929271698, + "443": 0.9095634818077087, + "444": 1.1255983114242554, + "445": 1.1245964765548706, + "446": 0.9567276239395142, + "447": 1.1173712015151978, + "448": 0.9507923722267151, + "449": 0.801628053188324, + "450": 0.7259226441383362, + "451": 0.812200665473938, + "452": 0.9129747152328491, + "453": 0.8578234910964966, + "454": 0.7506998181343079, + "455": 0.7694582939147949, + "456": 0.8032817840576172, + "457": 0.8745932579040527, + "458": 0.8218798637390137, + "459": 0.7167028784751892, + "460": 0.7230066061019897, + "461": 0.780096173286438, + "462": 0.742834210395813, + "463": 0.8151125311851501, + "464": 0.9034141302108765, + "465": 0.8522658348083496, + "466": 0.8728897571563721, + "467": 0.8497021794319153, + "468": 0.7926927804946899, + "469": 0.8608328700065613, + "470": 0.8690261840820312, + "471": 1.0343111753463745, + "472": 0.9771400690078735, + "473": 0.8763394951820374, + "474": 0.8738444447517395, + "475": 0.902094304561615, + "476": 0.9636691808700562, + "477": 1.0210340023040771, + "478": 0.9713259339332581, + "479": 1.132158875465393, + "480": 0.8729812502861023, + "481": 0.7968984246253967, + "482": 0.7133024334907532, + "483": 0.6922010183334351, + "484": 0.7628591060638428, + "485": 0.8299069404602051 + }, + "loss": { + "432": 2.3060684204101562, + "433": 2.276191234588623, + "434": 2.3359580039978027, + "435": 2.3350090980529785, + "436": 2.3345181941986084, + "437": 2.312978744506836, + "438": 2.2810492515563965, + "439": 2.3284752368927, + "440": 2.285083293914795, + "441": 2.281200885772705, + "442": 2.3516182899475098, + "443": 2.284452199935913, + "444": 2.3611257076263428, + "445": 2.3429629802703857, + "446": 2.3390960693359375, + "447": 2.313493251800537, + "448": 2.366291046142578, + "449": 2.295682430267334, + "450": 2.271575450897217, + "451": 2.2555222511291504, + "452": 2.3030929565429688, + "453": 2.2810072898864746, + "454": 2.2763078212738037, + "455": 2.27951717376709, + "456": 2.2690677642822266, + "457": 2.291797637939453, + "458": 2.2770628929138184, + "459": 2.255760669708252, + "460": 2.235276699066162, + "461": 2.276254653930664, + "462": 2.2803478240966797, + "463": 2.2522382736206055, + "464": 2.285918712615967, + "465": 2.2736093997955322, + "466": 2.265974998474121, + "467": 2.2974672317504883, + "468": 2.276423454284668, + "469": 2.271206855773926, + "470": 2.3198113441467285, + "471": 2.278186798095703, + "472": 2.319728374481201, + "473": 2.2563881874084473, + "474": 2.278583288192749, + "475": 2.288527488708496, + "476": 2.2797842025756836, + "477": 2.272386074066162, + "478": 2.3173093795776367, + "479": 2.2954373359680176, + "480": 2.322462320327759, + "481": 2.232104778289795, + "482": 2.26352596282959, + "483": 2.218979597091675, + "484": 2.2360379695892334, + "485": 2.2544596195220947 + }, + "lr": { + "432": 0.464, + "433": 0.464, + "434": 0.464, + "435": 0.464, + "436": 0.464, + "437": 0.464, + "438": 0.464, + "439": 0.464, + "440": 0.464, + "441": 0.464, + "442": 0.464, + "443": 0.464, + "444": 0.464, + "445": 0.464, + "446": 0.464, + "447": 0.464, + "448": 0.464, + "449": 0.464, + "450": 0.464, + "451": 0.464, + "452": 0.464, + "453": 0.464, + "454": 0.464, + "455": 0.464, + "456": 0.464, + "457": 0.464, + "458": 0.464, + "459": 0.464, + "460": 0.464, + "461": 0.464, + "462": 0.464, + "463": 0.464, + "464": 0.464, + "465": 0.464, + "466": 0.464, + "467": 0.464, + "468": 0.464, + "469": 0.464, + "470": 0.464, + "471": 0.464, + "472": 0.464, + "473": 0.464, + "474": 0.464, + "475": 0.464, + "476": 0.464, + "477": 0.464, + "478": 0.464, + "479": 0.464, + "480": 0.464, + "481": 0.464, + "482": 0.464, + "483": 0.464, + "484": 0.464, + "485": 0.464 + } + }, + "step_size_list": [ + 0.441444, + 0.431705, + 0.427998, + 0.423688, + 0.425832, + 0.428918, + 0.409811, + 0.436526, + 0.436493, + 0.422499, + 0.427945, + 0.428037, + 0.412631, + 0.412359, + 0.425382, + 0.41237, + 0.426223, + 0.435705, + 0.440303, + 0.434517, + 0.428058, + 0.431691, + 0.438797, + 0.437629, + 0.435282, + 0.430653, + 0.434123, + 0.440717, + 0.440121, + 0.436901, + 0.439336, + 0.434278, + 0.428506, + 0.431982, + 0.430423, + 0.43247, + 0.436074, + 0.431349, + 0.431416, + 0.418416, + 0.423554, + 0.430043, + 0.430527, + 0.428639, + 0.423936, + 0.419365, + 0.423955, + 0.410783, + 0.431175, + 0.43527, + 0.441002, + 0.441865, + 0.437579, + 0.43329 + ], + "train_epoch_time": 4.842189073562622, + "train_loss": 2.2583873221316675, + "train_score": 0.3253160867920223, + "val_loss": 2.321609194325251, + "val_score": 0.3118586760574586 + }, + { + "epoch": 9, + "grad_norm": 0.68621426820755, + "learning_rate": 0.464, + "model_norm": 87.66968536376953, + "step_logs": { + "grad_norm": { + "486": 0.7682287096977234, + "487": 0.7649548053741455, + "488": 0.9660560488700867, + "489": 0.9740469455718994, + "490": 0.8074227571487427, + "491": 0.7553938031196594, + "492": 0.8166458010673523, + "493": 0.840368390083313, + "494": 0.8939388990402222, + "495": 0.8838664889335632, + "496": 0.7769646644592285, + "497": 0.7230124473571777, + "498": 0.7600592970848083, + "499": 0.7785629034042358, + "500": 0.7993154525756836, + "501": 0.802345335483551, + "502": 0.8185387253761292, + "503": 0.8377872705459595, + "504": 0.791024386882782, + "505": 0.8677616715431213, + "506": 0.7530810832977295, + "507": 0.7036998867988586, + "508": 0.8211548328399658, + "509": 0.9169280529022217, + "510": 0.886559784412384, + "511": 0.7795705795288086, + "512": 0.7227236032485962, + "513": 0.74482661485672, + "514": 0.8771432638168335, + "515": 0.8894820809364319, + "516": 0.8720706105232239, + "517": 0.9437795281410217, + "518": 0.8314654231071472, + "519": 0.774798572063446, + "520": 0.871120035648346, + "521": 0.8756012916564941, + "522": 0.8168379068374634, + "523": 0.8266562819480896, + "524": 0.8706150650978088, + "525": 0.8751426339149475, + "526": 1.0410782098770142, + "527": 0.9852874875068665, + "528": 0.8789790272712708, + "529": 0.8169657588005066, + "530": 0.7922195196151733, + "531": 0.8600579500198364, + "532": 0.8866491913795471, + "533": 0.8692024350166321, + "534": 0.8261463642120361, + "535": 0.8096601963043213, + "536": 0.8385648131370544, + "537": 0.7517908215522766, + "538": 0.6367648243904114, + "539": 0.68621426820755 + }, + "loss": { + "486": 2.2780823707580566, + "487": 2.2468371391296387, + "488": 2.2590010166168213, + "489": 2.281876802444458, + "490": 2.2365760803222656, + "491": 2.248771905899048, + "492": 2.2549967765808105, + "493": 2.2779555320739746, + "494": 2.2223997116088867, + "495": 2.2605366706848145, + "496": 2.2568793296813965, + "497": 2.226069450378418, + "498": 2.236762046813965, + "499": 2.2230029106140137, + "500": 2.2332019805908203, + "501": 2.2457990646362305, + "502": 2.2245359420776367, + "503": 2.2295455932617188, + "504": 2.2105393409729004, + "505": 2.2362282276153564, + "506": 2.2436485290527344, + "507": 2.1828665733337402, + "508": 2.2447609901428223, + "509": 2.2604458332061768, + "510": 2.238154172897339, + "511": 2.2404041290283203, + "512": 2.2351233959198, + "513": 2.2271037101745605, + "514": 2.2268412113189697, + "515": 2.2441372871398926, + "516": 2.2347819805145264, + "517": 2.246114492416382, + "518": 2.2599329948425293, + "519": 2.2221996784210205, + "520": 2.2236242294311523, + "521": 2.2674546241760254, + "522": 2.2368814945220947, + "523": 2.2382960319519043, + "524": 2.2562508583068848, + "525": 2.2370681762695312, + "526": 2.255112648010254, + "527": 2.2622761726379395, + "528": 2.208037853240967, + "529": 2.2299203872680664, + "530": 2.2126784324645996, + "531": 2.2205216884613037, + "532": 2.234477996826172, + "533": 2.234245777130127, + "534": 2.235602617263794, + "535": 2.2378177642822266, + "536": 2.2401041984558105, + "537": 2.212348461151123, + "538": 2.173534393310547, + "539": 2.2230114936828613 + }, + "lr": { + "486": 0.464, + "487": 0.464, + "488": 0.464, + "489": 0.464, + "490": 0.464, + "491": 0.464, + "492": 0.464, + "493": 0.464, + "494": 0.464, + "495": 0.464, + "496": 0.464, + "497": 0.464, + "498": 0.464, + "499": 0.464, + "500": 0.464, + "501": 0.464, + "502": 0.464, + "503": 0.464, + "504": 0.464, + "505": 0.464, + "506": 0.464, + "507": 0.464, + "508": 0.464, + "509": 0.464, + "510": 0.464, + "511": 0.464, + "512": 0.464, + "513": 0.464, + "514": 0.464, + "515": 0.464, + "516": 0.464, + "517": 0.464, + "518": 0.464, + "519": 0.464, + "520": 0.464, + "521": 0.464, + "522": 0.464, + "523": 0.464, + "524": 0.464, + "525": 0.464, + "526": 0.464, + "527": 0.464, + "528": 0.464, + "529": 0.464, + "530": 0.464, + "531": 0.464, + "532": 0.464, + "533": 0.464, + "534": 0.464, + "535": 0.464, + "536": 0.464, + "537": 0.464, + "538": 0.464, + "539": 0.464 + } + }, + "step_size_list": [ + 0.437693, + 0.437562, + 0.423417, + 0.423179, + 0.43461, + 0.438203, + 0.434207, + 0.432866, + 0.428273, + 0.429559, + 0.436889, + 0.440027, + 0.437769, + 0.436393, + 0.435119, + 0.435067, + 0.433695, + 0.432418, + 0.435407, + 0.430378, + 0.438297, + 0.440801, + 0.433771, + 0.427142, + 0.429044, + 0.436528, + 0.440137, + 0.43865, + 0.429567, + 0.428918, + 0.430047, + 0.424908, + 0.433252, + 0.436635, + 0.429958, + 0.430249, + 0.433969, + 0.433309, + 0.430451, + 0.429858, + 0.417453, + 0.421988, + 0.429161, + 0.433872, + 0.435352, + 0.430713, + 0.428985, + 0.430247, + 0.433309, + 0.434472, + 0.432502, + 0.438038, + 0.444751, + 0.442266 + ], + "train_epoch_time": 4.842597723007202, + "train_loss": 2.2036793600709057, + "train_score": 0.3462719690337927, + "val_loss": 2.2865703012858414, + "val_score": 0.32615617796966867 + }, + { + "epoch": 10, + "grad_norm": 0.9096331000328064, + "learning_rate": 0.464, + "model_norm": 87.72895812988281, + "step_logs": { + "grad_norm": { + "540": 0.8921065330505371, + "541": 0.8062778115272522, + "542": 0.7187851667404175, + "543": 0.8345754146575928, + "544": 0.9551219344139099, + "545": 1.0174036026000977, + "546": 0.868165910243988, + "547": 0.7207449078559875, + "548": 0.7266085147857666, + "549": 0.6963417530059814, + "550": 0.6848155856132507, + "551": 0.797979474067688, + "552": 1.1448265314102173, + "553": 0.9235154986381531, + "554": 0.7863271832466125, + "555": 0.7994105815887451, + "556": 0.8655866384506226, + "557": 0.8106824159622192, + "558": 0.7803632020950317, + "559": 0.8532289266586304, + "560": 0.8216025829315186, + "561": 0.7853909134864807, + "562": 0.7817880511283875, + "563": 0.7839528322219849, + "564": 0.8452964425086975, + "565": 0.8417201638221741, + "566": 0.8234250545501709, + "567": 0.8137179017066956, + "568": 0.8520249128341675, + "569": 0.8314141035079956, + "570": 0.8129189014434814, + "571": 0.7779190540313721, + "572": 0.7898862957954407, + "573": 0.8024963140487671, + "574": 0.7748833298683167, + "575": 0.7861606478691101, + "576": 0.8677897453308105, + "577": 0.8517161011695862, + "578": 0.7643398642539978, + "579": 0.7023939490318298, + "580": 0.7665775418281555, + "581": 0.8610114455223083, + "582": 0.8333024382591248, + "583": 0.7674388289451599, + "584": 0.7553243041038513, + "585": 0.7182978987693787, + "586": 0.7470502853393555, + "587": 0.8406779766082764, + "588": 0.9378892779350281, + "589": 0.9103984832763672, + "590": 0.8171938061714172, + "591": 0.9231045842170715, + "592": 1.0083247423171997, + "593": 0.9096331000328064 + }, + "loss": { + "540": 2.215850830078125, + "541": 2.236928701400757, + "542": 2.1841204166412354, + "543": 2.200261116027832, + "544": 2.235758066177368, + "545": 2.247260093688965, + "546": 2.2625303268432617, + "547": 2.1999077796936035, + "548": 2.1832923889160156, + "549": 2.182635545730591, + "550": 2.1730289459228516, + "551": 2.1816012859344482, + "552": 2.22847843170166, + "553": 2.2593977451324463, + "554": 2.1985721588134766, + "555": 2.213987350463867, + "556": 2.2262539863586426, + "557": 2.20068359375, + "558": 2.1766653060913086, + "559": 2.1952016353607178, + "560": 2.213383913040161, + "561": 2.168879508972168, + "562": 2.222299814224243, + "563": 2.2107787132263184, + "564": 2.206465482711792, + "565": 2.192739963531494, + "566": 2.220984697341919, + "567": 2.1941514015197754, + "568": 2.1865310668945312, + "569": 2.213221549987793, + "570": 2.2188873291015625, + "571": 2.20096492767334, + "572": 2.185303211212158, + "573": 2.1692090034484863, + "574": 2.1808671951293945, + "575": 2.18877911567688, + "576": 2.164050579071045, + "577": 2.194611072540283, + "578": 2.1801581382751465, + "579": 2.1817259788513184, + "580": 2.1732735633850098, + "581": 2.1978366374969482, + "582": 2.1710891723632812, + "583": 2.1611733436584473, + "584": 2.1839187145233154, + "585": 2.165365219116211, + "586": 2.172858238220215, + "587": 2.2187869548797607, + "588": 2.2081055641174316, + "589": 2.2036852836608887, + "590": 2.1941847801208496, + "591": 2.179983139038086, + "592": 2.2141075134277344, + "593": 2.193830728530884 + }, + "lr": { + "540": 0.464, + "541": 0.464, + "542": 0.464, + "543": 0.464, + "544": 0.464, + "545": 0.464, + "546": 0.464, + "547": 0.464, + "548": 0.464, + "549": 0.464, + "550": 0.464, + "551": 0.464, + "552": 0.464, + "553": 0.464, + "554": 0.464, + "555": 0.464, + "556": 0.464, + "557": 0.464, + "558": 0.464, + "559": 0.464, + "560": 0.464, + "561": 0.464, + "562": 0.464, + "563": 0.464, + "564": 0.464, + "565": 0.464, + "566": 0.464, + "567": 0.464, + "568": 0.464, + "569": 0.464, + "570": 0.464, + "571": 0.464, + "572": 0.464, + "573": 0.464, + "574": 0.464, + "575": 0.464, + "576": 0.464, + "577": 0.464, + "578": 0.464, + "579": 0.464, + "580": 0.464, + "581": 0.464, + "582": 0.464, + "583": 0.464, + "584": 0.464, + "585": 0.464, + "586": 0.464, + "587": 0.464, + "588": 0.464, + "589": 0.464, + "590": 0.464, + "591": 0.464, + "592": 0.464, + "593": 0.464 + } + }, + "step_size_list": [ + 0.428311, + 0.434692, + 0.439861, + 0.432254, + 0.423875, + 0.419203, + 0.430712, + 0.439901, + 0.439352, + 0.441257, + 0.441876, + 0.434572, + 0.408291, + 0.426637, + 0.43558, + 0.434878, + 0.430395, + 0.433935, + 0.435719, + 0.430851, + 0.433339, + 0.435279, + 0.43617, + 0.435888, + 0.431576, + 0.431643, + 0.43331, + 0.43364, + 0.430816, + 0.43265, + 0.434012, + 0.436177, + 0.435175, + 0.434101, + 0.436141, + 0.435472, + 0.429338, + 0.430952, + 0.436842, + 0.440871, + 0.436611, + 0.430325, + 0.431949, + 0.436408, + 0.437486, + 0.439694, + 0.437906, + 0.432071, + 0.424745, + 0.426762, + 0.433398, + 0.425421, + 0.419327, + 0.426666 + ], + "train_epoch_time": 4.842779159545898, + "train_loss": 2.188983239402388, + "train_score": 0.3554407281034133, + "val_loss": 2.275259516953601, + "val_score": 0.3314302889405655 + }, + { + "epoch": 11, + "grad_norm": 0.8140822052955627, + "learning_rate": 0.464, + "model_norm": 87.79326629638672, + "step_logs": { + "grad_norm": { + "594": 0.8143988251686096, + "595": 0.7691192626953125, + "596": 0.7511328458786011, + "597": 0.7437258362770081, + "598": 0.8332107663154602, + "599": 0.9590739011764526, + "600": 0.9414939880371094, + "601": 0.9596734046936035, + "602": 0.9427944421768188, + "603": 0.8789117336273193, + "604": 0.7952477335929871, + "605": 0.767628014087677, + "606": 0.7885223627090454, + "607": 0.7696501612663269, + "608": 0.8021920919418335, + "609": 0.7617115378379822, + "610": 0.7050363421440125, + "611": 0.6947162747383118, + "612": 0.7675658464431763, + "613": 0.9178090691566467, + "614": 0.8634825348854065, + "615": 0.7828942537307739, + "616": 0.7477189898490906, + "617": 0.8095752000808716, + "618": 0.7939623594284058, + "619": 0.7833624482154846, + "620": 0.810734212398529, + "621": 0.8283319473266602, + "622": 0.9187922477722168, + "623": 0.874248206615448, + "624": 0.9026694297790527, + "625": 1.0013917684555054, + "626": 0.8714126348495483, + "627": 0.7520427107810974, + "628": 0.795939564704895, + "629": 0.9054486155509949, + "630": 0.9085151553153992, + "631": 0.8748542666435242, + "632": 0.9008886218070984, + "633": 0.9165480732917786, + "634": 1.0872836112976074, + "635": 1.1195107698440552, + "636": 1.0432250499725342, + "637": 0.8284159898757935, + "638": 0.8854562640190125, + "639": 0.9075348973274231, + "640": 0.8460088968276978, + "641": 0.7267642617225647, + "642": 0.7285876870155334, + "643": 0.7849075198173523, + "644": 0.8879084587097168, + "645": 0.9245346784591675, + "646": 0.8939499258995056, + "647": 0.8140822052955627 + }, + "loss": { + "594": 2.1735947132110596, + "595": 2.1577436923980713, + "596": 2.18422794342041, + "597": 2.1580567359924316, + "598": 2.178335189819336, + "599": 2.138554096221924, + "600": 2.2060482501983643, + "601": 2.1847915649414062, + "602": 2.20259952545166, + "603": 2.1774821281433105, + "604": 2.175421714782715, + "605": 2.175705909729004, + "606": 2.178267478942871, + "607": 2.1804356575012207, + "608": 2.171926259994507, + "609": 2.132654905319214, + "610": 2.132364273071289, + "611": 2.1559953689575195, + "612": 2.1362977027893066, + "613": 2.184828996658325, + "614": 2.176003932952881, + "615": 2.1745426654815674, + "616": 2.186737060546875, + "617": 2.146594524383545, + "618": 2.184572219848633, + "619": 2.143556833267212, + "620": 2.152655601501465, + "621": 2.1449923515319824, + "622": 2.170114755630493, + "623": 2.163323402404785, + "624": 2.193357229232788, + "625": 2.1667516231536865, + "626": 2.188030958175659, + "627": 2.1220176219940186, + "628": 2.1484503746032715, + "629": 2.1130330562591553, + "630": 2.171513319015503, + "631": 2.1274163722991943, + "632": 2.1388068199157715, + "633": 2.182231903076172, + "634": 2.1721105575561523, + "635": 2.195622444152832, + "636": 2.188385009765625, + "637": 2.1716933250427246, + "638": 2.168369770050049, + "639": 2.15797758102417, + "640": 2.1595335006713867, + "641": 2.1329140663146973, + "642": 2.1231374740600586, + "643": 2.1575169563293457, + "644": 2.146681070327759, + "645": 2.1851820945739746, + "646": 2.167185068130493, + "647": 2.164590358734131 + }, + "lr": { + "594": 0.464, + "595": 0.464, + "596": 0.464, + "597": 0.464, + "598": 0.464, + "599": 0.464, + "600": 0.464, + "601": 0.464, + "602": 0.464, + "603": 0.464, + "604": 0.464, + "605": 0.464, + "606": 0.464, + "607": 0.464, + "608": 0.464, + "609": 0.464, + "610": 0.464, + "611": 0.464, + "612": 0.464, + "613": 0.464, + "614": 0.464, + "615": 0.464, + "616": 0.464, + "617": 0.464, + "618": 0.464, + "619": 0.464, + "620": 0.464, + "621": 0.464, + "622": 0.464, + "623": 0.464, + "624": 0.464, + "625": 0.464, + "626": 0.464, + "627": 0.464, + "628": 0.464, + "629": 0.464, + "630": 0.464, + "631": 0.464, + "632": 0.464, + "633": 0.464, + "634": 0.464, + "635": 0.464, + "636": 0.464, + "637": 0.464, + "638": 0.464, + "639": 0.464, + "640": 0.464, + "641": 0.464, + "642": 0.464, + "643": 0.464, + "644": 0.464, + "645": 0.464, + "646": 0.464, + "647": 0.464 + } + }, + "step_size_list": [ + 0.433324, + 0.436253, + 0.437766, + 0.437957, + 0.432054, + 0.4219, + 0.424434, + 0.422665, + 0.424277, + 0.428715, + 0.434683, + 0.436569, + 0.435181, + 0.436489, + 0.434157, + 0.436452, + 0.440194, + 0.441092, + 0.436098, + 0.425903, + 0.429831, + 0.43552, + 0.438019, + 0.433306, + 0.434886, + 0.435102, + 0.433305, + 0.431945, + 0.425591, + 0.428849, + 0.427183, + 0.41901, + 0.429424, + 0.43698, + 0.43429, + 0.425683, + 0.426398, + 0.428255, + 0.426457, + 0.425958, + 0.41198, + 0.409738, + 0.416003, + 0.432306, + 0.428089, + 0.426257, + 0.43087, + 0.438791, + 0.438561, + 0.435171, + 0.42757, + 0.425395, + 0.427433, + 0.433227 + ], + "train_epoch_time": 4.844959735870361, + "train_loss": 2.1331555023768027, + "train_score": 0.36913782276074203, + "val_loss": 2.2340438579445454, + "val_score": 0.343391217265693 + }, + { + "epoch": 12, + "grad_norm": 0.6578726768493652, + "learning_rate": 0.464, + "model_norm": 87.8521728515625, + "step_logs": { + "grad_norm": { + "648": 0.7942488789558411, + "649": 0.8315700888633728, + "650": 0.8246243596076965, + "651": 0.8345829248428345, + "652": 0.84620600938797, + "653": 0.8950802087783813, + "654": 0.8761196732521057, + "655": 0.760361909866333, + "656": 0.713737964630127, + "657": 0.6862717866897583, + "658": 0.7494129538536072, + "659": 0.7932485342025757, + "660": 0.8148427605628967, + "661": 0.8828828930854797, + "662": 0.8043906688690186, + "663": 0.7468248009681702, + "664": 0.7115612626075745, + "665": 0.7042707204818726, + "666": 0.7152650952339172, + "667": 0.6333206295967102, + "668": 0.6261699199676514, + "669": 0.7369368672370911, + "670": 0.8285579681396484, + "671": 0.8881261348724365, + "672": 0.8377741575241089, + "673": 0.7106428742408752, + "674": 0.6459449529647827, + "675": 0.6162481904029846, + "676": 0.6551740169525146, + "677": 0.68690425157547, + "678": 0.6702134013175964, + "679": 0.6396130323410034, + "680": 0.6492288708686829, + "681": 0.6239981055259705, + "682": 0.6616118550300598, + "683": 0.6591967940330505, + "684": 0.6587465405464172, + "685": 0.6365826725959778, + "686": 0.5938937067985535, + "687": 0.5714289546012878, + "688": 0.6066100001335144, + "689": 0.5469507575035095, + "690": 0.5249015092849731, + "691": 0.5263426303863525, + "692": 0.5630566477775574, + "693": 0.6263416409492493, + "694": 0.6880822777748108, + "695": 0.6980996131896973, + "696": 0.7213163375854492, + "697": 0.7061492204666138, + "698": 0.6284188032150269, + "699": 0.662869393825531, + "700": 0.6534774303436279, + "701": 0.6578726768493652 + }, + "loss": { + "648": 2.1203856468200684, + "649": 2.1409246921539307, + "650": 2.1532139778137207, + "651": 2.1269664764404297, + "652": 2.1562998294830322, + "653": 2.141098976135254, + "654": 2.134981870651245, + "655": 2.119570255279541, + "656": 2.1185030937194824, + "657": 2.1137454509735107, + "658": 2.108039617538452, + "659": 2.1088359355926514, + "660": 2.1146793365478516, + "661": 2.1406309604644775, + "662": 2.1560144424438477, + "663": 2.1203246116638184, + "664": 2.110269069671631, + "665": 2.1034622192382812, + "666": 2.107478380203247, + "667": 2.075822353363037, + "668": 2.073240280151367, + "669": 2.109856128692627, + "670": 2.1094839572906494, + "671": 2.093839406967163, + "672": 2.1225485801696777, + "673": 2.1203927993774414, + "674": 2.0864572525024414, + "675": 2.0666136741638184, + "676": 2.106548309326172, + "677": 2.082718849182129, + "678": 2.086256980895996, + "679": 2.103593587875366, + "680": 2.080355167388916, + "681": 2.0590333938598633, + "682": 2.083422899246216, + "683": 2.099015712738037, + "684": 2.0764753818511963, + "685": 2.0707638263702393, + "686": 2.0588200092315674, + "687": 2.0492873191833496, + "688": 2.0794143676757812, + "689": 2.0440845489501953, + "690": 2.0369601249694824, + "691": 2.064953327178955, + "692": 2.0237653255462646, + "693": 2.0306992530822754, + "694": 2.0534825325012207, + "695": 2.049145221710205, + "696": 2.0784759521484375, + "697": 2.0491557121276855, + "698": 2.047065496444702, + "699": 2.035550594329834, + "700": 2.069148063659668, + "701": 2.0469658374786377 + }, + "lr": { + "648": 0.464, + "649": 0.4611358024691358, + "650": 0.4582716049382716, + "651": 0.4554074074074074, + "652": 0.45254320987654323, + "653": 0.44967901234567903, + "654": 0.4468148148148149, + "655": 0.44395061728395063, + "656": 0.44108641975308643, + "657": 0.43822222222222224, + "658": 0.43535802469135804, + "659": 0.43249382716049384, + "660": 0.42962962962962964, + "661": 0.42676543209876544, + "662": 0.4239012345679013, + "663": 0.4210370370370371, + "664": 0.41817283950617284, + "665": 0.41530864197530865, + "666": 0.41244444444444445, + "667": 0.4095802469135803, + "668": 0.40671604938271605, + "669": 0.40385185185185185, + "670": 0.4009876543209877, + "671": 0.3981234567901235, + "672": 0.3952592592592593, + "673": 0.39239506172839506, + "674": 0.38953086419753086, + "675": 0.3866666666666667, + "676": 0.3838024691358025, + "677": 0.38093827160493826, + "678": 0.3780740740740741, + "679": 0.3752098765432099, + "680": 0.3723456790123457, + "681": 0.36948148148148147, + "682": 0.36661728395061727, + "683": 0.3637530864197531, + "684": 0.3608888888888889, + "685": 0.3580246913580247, + "686": 0.35516049382716053, + "687": 0.35229629629629633, + "688": 0.34943209876543213, + "689": 0.34656790123456793, + "690": 0.3437037037037037, + "691": 0.3408395061728395, + "692": 0.33797530864197534, + "693": 0.33511111111111114, + "694": 0.33224691358024694, + "695": 0.32938271604938274, + "696": 0.32651851851851854, + "697": 0.32365432098765434, + "698": 0.32079012345679014, + "699": 0.3179259259259259, + "700": 0.31506172839506175, + "701": 0.31219753086419755 + } + }, + "step_size_list": [ + 0.434042, + 0.429174, + 0.427347, + 0.423805, + 0.420915, + 0.414783, + 0.413594, + 0.418605, + 0.418873, + 0.417824, + 0.411494, + 0.406279, + 0.402483, + 0.395996, + 0.39855, + 0.398945, + 0.398197, + 0.395922, + 0.392781, + 0.39399, + 0.391654, + 0.383899, + 0.376426, + 0.370351, + 0.371013, + 0.374878, + 0.374928, + 0.373401, + 0.369359, + 0.365181, + 0.363288, + 0.362002, + 0.358811, + 0.357009, + 0.353021, + 0.350554, + 0.347774, + 0.345907, + 0.344675, + 0.342678, + 0.338952, + 0.337996, + 0.335896, + 0.333221, + 0.329259, + 0.324604, + 0.319991, + 0.316968, + 0.313698, + 0.311392, + 0.311162, + 0.307379, + 0.305141, + 0.302223 + ], + "train_epoch_time": 4.845282554626465, + "train_loss": 2.0502974032671584, + "train_score": 0.3936233411560442, + "val_loss": 2.1602321713991746, + "val_score": 0.3634292483329773 + }, + { + "epoch": 13, + "grad_norm": 0.4436917304992676, + "learning_rate": 0.3093333333333334, + "model_norm": 87.8878402709961, + "step_logs": { + "grad_norm": { + "702": 0.6699285507202148, + "703": 0.5805220007896423, + "704": 0.6074411869049072, + "705": 0.6303211450576782, + "706": 0.5985572338104248, + "707": 0.608280599117279, + "708": 0.6303935050964355, + "709": 0.6220338344573975, + "710": 0.6174399256706238, + "711": 0.5413736701011658, + "712": 0.5112146735191345, + "713": 0.4871719181537628, + "714": 0.4399351477622986, + "715": 0.44969791173934937, + "716": 0.4524420499801636, + "717": 0.46417248249053955, + "718": 0.48964205384254456, + "719": 0.5409447550773621, + "720": 0.5520158410072327, + "721": 0.5363030433654785, + "722": 0.5482470393180847, + "723": 0.5800043940544128, + "724": 0.546682596206665, + "725": 0.5169199705123901, + "726": 0.514824390411377, + "727": 0.4663446843624115, + "728": 0.45961812138557434, + "729": 0.45055440068244934, + "730": 0.46595969796180725, + "731": 0.46011409163475037, + "732": 0.44026613235473633, + "733": 0.43594837188720703, + "734": 0.4653131663799286, + "735": 0.4784716069698334, + "736": 0.42221033573150635, + "737": 0.4199584424495697, + "738": 0.43594419956207275, + "739": 0.4137166738510132, + "740": 0.42047253251075745, + "741": 0.4431529641151428, + "742": 0.4429382085800171, + "743": 0.4476552903652191, + "744": 0.4506986737251282, + "745": 0.41717639565467834, + "746": 0.44431763887405396, + "747": 0.4076555073261261, + "748": 0.43802496790885925, + "749": 0.4421563446521759, + "750": 0.44402697682380676, + "751": 0.42554566264152527, + "752": 0.40179625153541565, + "753": 0.404940128326416, + "754": 0.43356096744537354, + "755": 0.4436917304992676 + }, + "loss": { + "702": 2.0448737144470215, + "703": 2.046647787094116, + "704": 2.041076898574829, + "705": 2.0410609245300293, + "706": 2.0298800468444824, + "707": 2.032756805419922, + "708": 2.0318081378936768, + "709": 2.058128833770752, + "710": 2.0420947074890137, + "711": 2.0357046127319336, + "712": 2.0125179290771484, + "713": 2.0334067344665527, + "714": 2.0085813999176025, + "715": 2.0451345443725586, + "716": 1.9992128610610962, + "717": 1.9843368530273438, + "718": 2.0078001022338867, + "719": 2.046980619430542, + "720": 1.9874895811080933, + "721": 1.9991458654403687, + "722": 2.0271124839782715, + "723": 2.005398988723755, + "724": 2.0288848876953125, + "725": 2.0452847480773926, + "726": 2.0108683109283447, + "727": 2.010511875152588, + "728": 2.0094542503356934, + "729": 2.0190353393554688, + "730": 1.988690733909607, + "731": 2.029412269592285, + "732": 1.9948115348815918, + "733": 2.005605697631836, + "734": 2.004103899002075, + "735": 2.025423049926758, + "736": 1.9900341033935547, + "737": 1.9794466495513916, + "738": 2.007734775543213, + "739": 2.0177109241485596, + "740": 1.9895695447921753, + "741": 2.0379581451416016, + "742": 1.989262342453003, + "743": 1.984004259109497, + "744": 1.9994091987609863, + "745": 1.9834046363830566, + "746": 2.0179667472839355, + "747": 2.0069119930267334, + "748": 1.9827308654785156, + "749": 1.969693660736084, + "750": 2.015787363052368, + "751": 1.982442855834961, + "752": 2.0177993774414062, + "753": 2.00639009475708, + "754": 2.002668857574463, + "755": 2.0029540061950684 + }, + "lr": { + "702": 0.3093333333333334, + "703": 0.30646913580246915, + "704": 0.30360493827160495, + "705": 0.30074074074074075, + "706": 0.29787654320987655, + "707": 0.29501234567901236, + "708": 0.29214814814814816, + "709": 0.28928395061728396, + "710": 0.2864197530864198, + "711": 0.2835555555555556, + "712": 0.28069135802469136, + "713": 0.27782716049382716, + "714": 0.27496296296296296, + "715": 0.27209876543209877, + "716": 0.26923456790123457, + "717": 0.26637037037037037, + "718": 0.2635061728395062, + "719": 0.260641975308642, + "720": 0.25777777777777783, + "721": 0.2549135802469136, + "722": 0.2520493827160494, + "723": 0.24918518518518518, + "724": 0.246320987654321, + "725": 0.2434567901234568, + "726": 0.24059259259259264, + "727": 0.23772839506172844, + "728": 0.2348641975308642, + "729": 0.232, + "730": 0.2291358024691358, + "731": 0.2262716049382716, + "732": 0.22340740740740744, + "733": 0.22054320987654322, + "734": 0.21767901234567902, + "735": 0.2148148148148148, + "736": 0.21195061728395065, + "737": 0.20908641975308642, + "738": 0.20622222222222222, + "739": 0.203358024691358, + "740": 0.20049382716049385, + "741": 0.19762962962962966, + "742": 0.19476543209876543, + "743": 0.19190123456790123, + "744": 0.18903703703703706, + "745": 0.18617283950617286, + "746": 0.18330864197530863, + "747": 0.18044444444444444, + "748": 0.17758024691358026, + "749": 0.17471604938271607, + "750": 0.17185185185185184, + "751": 0.16898765432098764, + "752": 0.16612345679012347, + "753": 0.16325925925925927, + "754": 0.16039506172839507, + "755": 0.15753086419753085 + } + }, + "step_size_list": [ + 0.299177, + 0.298927, + 0.295496, + 0.292188, + 0.290247, + 0.287299, + 0.284033, + 0.281626, + 0.278962, + 0.277883, + 0.275667, + 0.273394, + 0.271368, + 0.268487, + 0.265574, + 0.262573, + 0.259425, + 0.255875, + 0.252782, + 0.250323, + 0.247426, + 0.244084, + 0.241932, + 0.239646, + 0.236837, + 0.234711, + 0.232, + 0.229325, + 0.226305, + 0.223632, + 0.221009, + 0.218263, + 0.215149, + 0.212238, + 0.209957, + 0.207157, + 0.204229, + 0.201619, + 0.198724, + 0.195766, + 0.192913, + 0.190059, + 0.187239, + 0.184665, + 0.18168, + 0.179106, + 0.176067, + 0.173214, + 0.17042, + 0.167693, + 0.165027, + 0.162177, + 0.159197, + 0.156321 + ], + "train_epoch_time": 4.845399618148804, + "train_loss": 1.9869464773700773, + "train_score": 0.41210993530931567, + "val_loss": 2.0999501641116924, + "val_score": 0.38056562175433206 + }, + { + "epoch": 14, + "grad_norm": 0.3497675955295563, + "learning_rate": 0.1546666666666667, + "model_norm": 87.89995574951172, + "step_logs": { + "grad_norm": { + "756": 0.3825933039188385, + "757": 0.4019700288772583, + "758": 0.43566250801086426, + "759": 0.4353422224521637, + "760": 0.39662641286849976, + "761": 0.40551745891571045, + "762": 0.38110724091529846, + "763": 0.3760072886943817, + "764": 0.42468348145484924, + "765": 0.39241504669189453, + "766": 0.3947967290878296, + "767": 0.3975926339626312, + "768": 0.37576502561569214, + "769": 0.40324899554252625, + "770": 0.4067135155200958, + "771": 0.37126943469047546, + "772": 0.360101580619812, + "773": 0.3835076689720154, + "774": 0.3931007981300354, + "775": 0.4015660881996155, + "776": 0.3725912868976593, + "777": 0.36091774702072144, + "778": 0.3680638074874878, + "779": 0.3820505440235138, + "780": 0.367154985666275, + "781": 0.37965452671051025, + "782": 0.37628594040870667, + "783": 0.3540761470794678, + "784": 0.36645638942718506, + "785": 0.3694341480731964, + "786": 0.3758610785007477, + "787": 0.38044625520706177, + "788": 0.3860872685909271, + "789": 0.3585388660430908, + "790": 0.37094998359680176, + "791": 0.3838367462158203, + "792": 0.36435091495513916, + "793": 0.3566299080848694, + "794": 0.3682991862297058, + "795": 0.3411950170993805, + "796": 0.3374997675418854, + "797": 0.35412564873695374, + "798": 0.3519652783870697, + "799": 0.35775846242904663, + "800": 0.37194088101387024, + "801": 0.38294243812561035, + "802": 0.33733099699020386, + "803": 0.3885990381240845, + "804": 0.3471088707447052, + "805": 0.35841116309165955, + "806": 0.3394443392753601, + "807": 0.38250747323036194, + "808": 0.3515566885471344, + "809": 0.3497675955295563 + }, + "loss": { + "756": 2.0030295848846436, + "757": 2.0008554458618164, + "758": 1.9976857900619507, + "759": 2.0156116485595703, + "760": 1.9824223518371582, + "761": 1.9933888912200928, + "762": 1.9906694889068604, + "763": 2.0045247077941895, + "764": 1.9395992755889893, + "765": 1.9741698503494263, + "766": 1.9911028146743774, + "767": 1.9915744066238403, + "768": 2.0097122192382812, + "769": 1.9591145515441895, + "770": 2.0089528560638428, + "771": 1.9596589803695679, + "772": 1.991560697555542, + "773": 1.9729987382888794, + "774": 1.9763469696044922, + "775": 1.968724012374878, + "776": 1.9851090908050537, + "777": 1.9672082662582397, + "778": 1.9833989143371582, + "779": 1.9909121990203857, + "780": 1.9740742444992065, + "781": 1.9451031684875488, + "782": 1.9947526454925537, + "783": 1.9418306350708008, + "784": 1.9938074350357056, + "785": 1.9893717765808105, + "786": 1.9707704782485962, + "787": 1.992068886756897, + "788": 1.9773051738739014, + "789": 1.9256902933120728, + "790": 1.9413520097732544, + "791": 1.994657278060913, + "792": 2.005741596221924, + "793": 1.9567921161651611, + "794": 1.9880865812301636, + "795": 1.9676276445388794, + "796": 1.9862022399902344, + "797": 1.9289112091064453, + "798": 1.963991403579712, + "799": 1.990053653717041, + "800": 1.955742597579956, + "801": 1.9384512901306152, + "802": 1.9901102781295776, + "803": 1.974186658859253, + "804": 1.9994986057281494, + "805": 1.9842350482940674, + "806": 1.9754936695098877, + "807": 1.979050636291504, + "808": 1.9623892307281494, + "809": 1.9532264471054077 + }, + "lr": { + "756": 0.1546666666666667, + "757": 0.15180246913580248, + "758": 0.14893827160493828, + "759": 0.14607407407407405, + "760": 0.1432098765432099, + "761": 0.14034567901234568, + "762": 0.13748148148148148, + "763": 0.13461728395061728, + "764": 0.1317530864197531, + "765": 0.12888888888888891, + "766": 0.1260246913580247, + "767": 0.12316049382716047, + "768": 0.12029629629629632, + "769": 0.1174320987654321, + "770": 0.1145679012345679, + "771": 0.1117037037037037, + "772": 0.10883950617283954, + "773": 0.10597530864197532, + "774": 0.10311111111111111, + "775": 0.1002469135802469, + "776": 0.09738271604938274, + "777": 0.09451851851851853, + "778": 0.09165432098765432, + "779": 0.0887901234567901, + "780": 0.08592592592592595, + "781": 0.08306172839506173, + "782": 0.08019753086419754, + "783": 0.07733333333333332, + "784": 0.07446913580246917, + "785": 0.07160493827160495, + "786": 0.06874074074074074, + "787": 0.06587654320987653, + "788": 0.06301234567901237, + "789": 0.06014814814814816, + "790": 0.05728395061728395, + "791": 0.05441975308641974, + "792": 0.051555555555555584, + "793": 0.04869135802469137, + "794": 0.04582716049382716, + "795": 0.04296296296296295, + "796": 0.04009876543209879, + "797": 0.03723456790123458, + "798": 0.03437037037037037, + "799": 0.03150617283950616, + "800": 0.028641975308642, + "801": 0.025777777777777792, + "802": 0.02291358024691358, + "803": 0.02004938271604937, + "804": 0.01718518518518521, + "805": 0.014320987654321, + "806": 0.01145679012345679, + "807": 0.00859259259259258, + "808": 0.005728395061728421, + "809": 0.0028641975308642104 + } + }, + "step_size_list": [ + 0.153797, + 0.150878, + 0.147892, + 0.145078, + 0.142401, + 0.139538, + 0.136795, + 0.133981, + 0.130951, + 0.128244, + 0.125406, + 0.122561, + 0.11979, + 0.116863, + 0.11403, + 0.111267, + 0.108455, + 0.105558, + 0.102697, + 0.099837, + 0.0970522, + 0.0942237, + 0.0913683, + 0.0885021, + 0.0856746, + 0.0828069, + 0.0799699, + 0.0771407, + 0.0742828, + 0.0714295, + 0.0685718, + 0.0657193, + 0.062863, + 0.0600276, + 0.0571679, + 0.0543106, + 0.0514677, + 0.0486144, + 0.0457556, + 0.0429084, + 0.0400527, + 0.0371896, + 0.0343332, + 0.0314743, + 0.028613, + 0.0257527, + 0.0228986, + 0.020034, + 0.0171763, + 0.0143144, + 0.011453, + 0.00858986, + 0.00572736, + 0.00286394 + ], + "train_epoch_time": 4.84417200088501, + "train_loss": 1.9697177921169287, + "train_score": 0.417135939707544, + "val_loss": 2.0870154908549226, + "val_score": 0.3851490741687582 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:35:32.427588", + "final_model_norm": 87.89995574951172, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:33:50.841040", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 0, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 10.551793098449707, + "learning_rate": 1e-10, + "model_norm": 87.31610870361328, + "step_logs": { + "grad_norm": { + "0": 23.23117446899414, + "1": 22.989797592163086, + "2": 6.277790546417236, + "3": 7.638795375823975, + "4": 11.775469779968262, + "5": 4.806578636169434, + "6": 9.401572227478027, + "7": 3.420166492462158, + "8": 4.973513126373291, + "9": 5.9297661781311035, + "10": 4.668890953063965, + "11": 5.522274494171143, + "12": 19.95566177368164, + "13": 14.449785232543945, + "14": 5.967987060546875, + "15": 21.7133731842041, + "16": 9.069622039794922, + "17": 9.446586608886719, + "18": 17.0661563873291, + "19": 17.564598083496094, + "20": 3.7146430015563965, + "21": 6.430539608001709, + "22": 5.4503865242004395, + "23": 14.734153747558594, + "24": 9.41879940032959, + "25": 6.050946235656738, + "26": 16.967670440673828, + "27": 14.951860427856445, + "28": 7.332396984100342, + "29": 10.911014556884766, + "30": 4.50460958480835, + "31": 12.328603744506836, + "32": 6.336200714111328, + "33": 5.86729621887207, + "34": 5.471032619476318, + "35": 8.557619094848633, + "36": 5.094140529632568, + "37": 5.427487850189209, + "38": 13.277605056762695, + "39": 5.008471965789795, + "40": 10.797125816345215, + "41": 1.5552220344543457, + "42": 3.259514570236206, + "43": 2.6860194206237793, + "44": 2.179973602294922, + "45": 5.520071983337402, + "46": 2.507417678833008, + "47": 3.0781381130218506, + "48": 1.5036157369613647, + "49": 18.435609817504883, + "50": 4.062704563140869, + "51": 0.755198061466217, + "52": 2.889233350753784, + "53": 10.551793098449707 + }, + "loss": { + "0": 4.53324556350708, + "1": 4.532902717590332, + "2": 3.803471088409424, + "3": 4.067673206329346, + "4": 4.131890296936035, + "5": 4.486215114593506, + "6": 3.857367753982544, + "7": 4.027968406677246, + "8": 4.340281963348389, + "9": 3.8410356044769287, + "10": 4.843392848968506, + "11": 3.9144158363342285, + "12": 5.213651657104492, + "13": 7.589272975921631, + "14": 6.168381690979004, + "15": 4.244146347045898, + "16": 6.628342628479004, + "17": 5.097823619842529, + "18": 3.928354263305664, + "19": 4.672650337219238, + "20": 4.380564212799072, + "21": 4.482689380645752, + "22": 4.928394317626953, + "23": 6.558607578277588, + "24": 6.239792823791504, + "25": 5.458795070648193, + "26": 8.716230392456055, + "27": 7.661637306213379, + "28": 6.927092552185059, + "29": 6.035472393035889, + "30": 6.779300689697266, + "31": 6.210404396057129, + "32": 6.136882781982422, + "33": 6.734975814819336, + "34": 4.608994483947754, + "35": 5.8745293617248535, + "36": 7.247208595275879, + "37": 5.258144378662109, + "38": 7.626327991485596, + "39": 5.682893753051758, + "40": 5.561727523803711, + "41": 4.468381881713867, + "42": 3.837730884552002, + "43": 5.029836654663086, + "44": 4.081175327301025, + "45": 3.951021194458008, + "46": 4.729595184326172, + "47": 4.054864883422852, + "48": 3.86513090133667, + "49": 8.084235191345215, + "50": 4.109991550445557, + "51": 3.730285167694092, + "52": 3.627291202545166, + "53": 5.988303184509277 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 1e-10, + 0.00923366, + 0.0331336, + 0.0419477, + 0.0341536, + 0.0795234, + 0.0505291, + 0.116348, + 0.109895, + 0.0986902, + 0.137925, + 0.118473, + 0.0236085, + 0.0568112, + 0.154835, + 0.0169846, + 0.107181, + 0.0855159, + 0.0250951, + 0.0280549, + 0.2454, + 0.142993, + 0.189159, + 0.0534065, + 0.10879, + 0.186788, + 0.0542348, + 0.0608224, + 0.176478, + 0.086306, + 0.316131, + 0.0722022, + 0.20689, + 0.245649, + 0.211966, + 0.13052, + 0.314539, + 0.240819, + 0.0776754, + 0.286607, + 0.0852488, + 0.671069, + 0.388397, + 0.53192, + 0.581874, + 0.20132, + 0.570901, + 0.447994, + 0.749549, + 0.0453699, + 0.332449, + 0.928984, + 0.46497, + 0.0971205 + ], + "train_epoch_time": 4.84472393989563, + "train_loss": 3.850578463060442, + "train_score": 0.08525600790378866, + "val_loss": 3.862292156427517, + "val_score": 0.08233621570225014 + }, + { + "epoch": 1, + "grad_norm": 3.6986000537872314, + "learning_rate": 1.0, + "model_norm": 87.37684631347656, + "step_logs": { + "grad_norm": { + "54": 1.182322382926941, + "55": 16.677125930786133, + "56": 1.5781805515289307, + "57": 3.6878888607025146, + "58": 1.1136995553970337, + "59": 16.71405601501465, + "60": 2.185511350631714, + "61": 3.47221040725708, + "62": 1.9137296676635742, + "63": 4.228087425231934, + "64": 1.695902943611145, + "65": 5.307374477386475, + "66": 8.767341613769531, + "67": 1.54763925075531, + "68": 3.743389844894409, + "69": 2.758080005645752, + "70": 3.3279528617858887, + "71": 7.328800201416016, + "72": 1.728035569190979, + "73": 3.4959888458251953, + "74": 8.857303619384766, + "75": 0.954953670501709, + "76": 1.0097728967666626, + "77": 1.0383492708206177, + "78": 5.7253594398498535, + "79": 1.7573403120040894, + "80": 3.0066046714782715, + "81": 2.5207180976867676, + "82": 2.5117576122283936, + "83": 5.960236549377441, + "84": 1.522796869277954, + "85": 1.59621262550354, + "86": 0.4780735671520233, + "87": 0.3492223918437958, + "88": 1.115275263786316, + "89": 8.024332046508789, + "90": 1.662909984588623, + "91": 5.380059719085693, + "92": 1.5348799228668213, + "93": 1.0621405839920044, + "94": 1.9930458068847656, + "95": 0.8633806705474854, + "96": 1.5750083923339844, + "97": 3.6639809608459473, + "98": 2.683743476867676, + "99": 1.2608025074005127, + "100": 8.151193618774414, + "101": 1.1206763982772827, + "102": 3.7155747413635254, + "103": 1.0862257480621338, + "104": 8.485272407531738, + "105": 1.4894580841064453, + "106": 1.1201480627059937, + "107": 3.6986000537872314 + }, + "loss": { + "54": 3.861490249633789, + "55": 8.558002471923828, + "56": 3.711120128631592, + "57": 3.637338399887085, + "58": 3.6370863914489746, + "59": 8.956304550170898, + "60": 3.992795467376709, + "61": 4.36627721786499, + "62": 3.619739294052124, + "63": 4.753203392028809, + "64": 3.838421583175659, + "65": 3.8743603229522705, + "66": 5.395791053771973, + "67": 3.989959716796875, + "68": 3.738399028778076, + "69": 4.704212188720703, + "70": 4.339101314544678, + "71": 4.819789409637451, + "72": 4.133965492248535, + "73": 3.702777862548828, + "74": 4.930734634399414, + "75": 3.6834280490875244, + "76": 3.4976930618286133, + "77": 3.5633816719055176, + "78": 4.216736316680908, + "79": 3.9362308979034424, + "80": 3.5475428104400635, + "81": 4.4532270431518555, + "82": 3.6925930976867676, + "83": 4.420823097229004, + "84": 3.861217975616455, + "85": 3.8212168216705322, + "86": 3.4784512519836426, + "87": 3.365506172180176, + "88": 3.4324467182159424, + "89": 5.209386825561523, + "90": 3.713132619857788, + "91": 4.07759952545166, + "92": 3.924821615219116, + "93": 3.474189519882202, + "94": 3.6700921058654785, + "95": 3.408604145050049, + "96": 3.536109447479248, + "97": 3.993644952774048, + "98": 4.522773742675781, + "99": 3.428499698638916, + "100": 5.0790486335754395, + "101": 3.4795517921447754, + "102": 3.686734437942505, + "103": 3.5189476013183594, + "104": 4.491830825805664, + "105": 3.652750015258789, + "106": 3.355496883392334, + "107": 3.7562854290008545 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.846738, + 0.0579727, + 0.748746, + 0.348484, + 0.854328, + 0.0602566, + 0.625729, + 0.420061, + 0.66406, + 0.347163, + 0.727461, + 0.21574, + 0.12311, + 0.769141, + 0.347924, + 0.552935, + 0.439325, + 0.152162, + 0.734663, + 0.377305, + 0.111665, + 0.889847, + 0.872784, + 0.868595, + 0.20463, + 0.718244, + 0.439739, + 0.583629, + 0.539297, + 0.199289, + 0.769064, + 0.749969, + 0.968192, + 0.982204, + 0.846605, + 0.139272, + 0.72867, + 0.219815, + 0.769158, + 0.860318, + 0.648861, + 0.901433, + 0.740324, + 0.373028, + 0.556717, + 0.811804, + 0.132612, + 0.84712, + 0.348151, + 0.856423, + 0.110932, + 0.767063, + 0.842484, + 0.354497 + ], + "train_epoch_time": 4.842365980148315, + "train_loss": 3.5994698837805683, + "train_score": 0.12767777074785794, + "val_loss": 3.6159108990779836, + "val_score": 0.1264799799936921 + }, + { + "epoch": 2, + "grad_norm": 0.936374843120575, + "learning_rate": 1.0, + "model_norm": 87.47958374023438, + "step_logs": { + "grad_norm": { + "108": 1.0130778551101685, + "109": 0.7867873311042786, + "110": 1.87189781665802, + "111": 1.7924425601959229, + "112": 0.9850931167602539, + "113": 1.0074862241744995, + "114": 2.594512939453125, + "115": 1.3440529108047485, + "116": 8.823569297790527, + "117": 1.658522605895996, + "118": 0.7072761058807373, + "119": 1.7998427152633667, + "120": 1.2994506359100342, + "121": 1.7640235424041748, + "122": 1.523087978363037, + "123": 2.234794855117798, + "124": 1.9781229496002197, + "125": 1.7689653635025024, + "126": 1.3036879301071167, + "127": 1.8978241682052612, + "128": 0.9382278323173523, + "129": 3.1003024578094482, + "130": 1.2334017753601074, + "131": 6.025845527648926, + "132": 1.1387182474136353, + "133": 0.6895479559898376, + "134": 2.426396131515503, + "135": 1.4569425582885742, + "136": 7.109930992126465, + "137": 0.9290447235107422, + "138": 0.8100121021270752, + "139": 1.3584754467010498, + "140": 0.9901715517044067, + "141": 2.779709577560425, + "142": 1.0672603845596313, + "143": 3.458022117614746, + "144": 2.07422137260437, + "145": 0.9171708822250366, + "146": 3.3331644535064697, + "147": 1.2579619884490967, + "148": 1.2128691673278809, + "149": 1.8638805150985718, + "150": 1.8696259260177612, + "151": 1.37785804271698, + "152": 0.5921233892440796, + "153": 0.9394212365150452, + "154": 4.332217216491699, + "155": 1.2041782140731812, + "156": 1.0545597076416016, + "157": 1.414461612701416, + "158": 0.6525693535804749, + "159": 1.5009300708770752, + "160": 1.1042121648788452, + "161": 0.936374843120575 + }, + "loss": { + "108": 3.5776641368865967, + "109": 3.255084991455078, + "110": 3.3824307918548584, + "111": 3.5914969444274902, + "112": 3.3415751457214355, + "113": 3.4208984375, + "114": 3.4317774772644043, + "115": 3.507892370223999, + "116": 4.4293718338012695, + "117": 3.582699775695801, + "118": 3.2451202869415283, + "119": 3.324535846710205, + "120": 3.5358033180236816, + "121": 3.269261598587036, + "122": 3.635152578353882, + "123": 3.265742063522339, + "124": 3.8618967533111572, + "125": 3.4195780754089355, + "126": 3.3901875019073486, + "127": 3.3553900718688965, + "128": 3.3553123474121094, + "129": 3.3570055961608887, + "130": 3.3605549335479736, + "131": 3.921452045440674, + "132": 3.466475486755371, + "133": 3.0382943153381348, + "134": 3.324812889099121, + "135": 3.254194498062134, + "136": 4.093168258666992, + "137": 3.3552699089050293, + "138": 3.134474754333496, + "139": 3.121914863586426, + "140": 3.2171969413757324, + "141": 3.2393674850463867, + "142": 3.1525063514709473, + "143": 3.4248101711273193, + "144": 3.5027499198913574, + "145": 3.2340564727783203, + "146": 3.378838300704956, + "147": 3.461606502532959, + "148": 3.19130802154541, + "149": 3.1711950302124023, + "150": 3.3687026500701904, + "151": 3.482800006866455, + "152": 2.9598186016082764, + "153": 2.9557132720947266, + "154": 3.7361207008361816, + "155": 3.1387643814086914, + "156": 3.023664951324463, + "157": 3.1793694496154785, + "158": 3.0000743865966797, + "159": 2.9984164237976074, + "160": 3.201073408126831, + "161": 2.98128604888916 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 0.874558, + 0.913169, + 0.658774, + 0.690949, + 0.873208, + 0.870809, + 0.504857, + 0.795236, + 0.10216, + 0.722603, + 0.92844, + 0.672404, + 0.807245, + 0.677546, + 0.758105, + 0.566684, + 0.663741, + 0.686084, + 0.799575, + 0.650741, + 0.884036, + 0.411249, + 0.815432, + 0.177627, + 0.842438, + 0.927431, + 0.530399, + 0.754065, + 0.139372, + 0.886036, + 0.905254, + 0.771864, + 0.867773, + 0.456072, + 0.846986, + 0.364195, + 0.619523, + 0.884914, + 0.378207, + 0.813952, + 0.812692, + 0.646099, + 0.658406, + 0.785822, + 0.944084, + 0.870103, + 0.284762, + 0.812354, + 0.844667, + 0.760666, + 0.933731, + 0.726922, + 0.840019, + 0.871801 + ], + "train_epoch_time": 4.841893672943115, + "train_loss": 3.183151743846439, + "train_score": 0.19856191717056154, + "val_loss": 3.1889810083106793, + "val_score": 0.19537528726591993 + }, + { + "epoch": 3, + "grad_norm": 0.6600807905197144, + "learning_rate": 1.0, + "model_norm": 87.58049774169922, + "step_logs": { + "grad_norm": { + "162": 2.349169969558716, + "163": 0.9088624715805054, + "164": 6.740917682647705, + "165": 1.0101063251495361, + "166": 1.0578159093856812, + "167": 0.6850082874298096, + "168": 1.4120283126831055, + "169": 0.728011965751648, + "170": 0.613506555557251, + "171": 0.7622566223144531, + "172": 1.65699303150177, + "173": 0.810117244720459, + "174": 2.547241449356079, + "175": 1.0112279653549194, + "176": 4.82560920715332, + "177": 0.9750214219093323, + "178": 0.9130164980888367, + "179": 1.0064336061477661, + "180": 1.4664839506149292, + "181": 0.9779031872749329, + "182": 1.3843203783035278, + "183": 1.1455594301223755, + "184": 0.661656379699707, + "185": 0.6008827686309814, + "186": 1.5262645483016968, + "187": 0.733101487159729, + "188": 0.4733677804470062, + "189": 0.690822958946228, + "190": 1.532776951789856, + "191": 0.8726674318313599, + "192": 0.9893300533294678, + "193": 0.7634308934211731, + "194": 0.5948365330696106, + "195": 0.5662253499031067, + "196": 0.6097018718719482, + "197": 1.115746021270752, + "198": 0.9085699319839478, + "199": 0.6395397782325745, + "200": 0.5406814217567444, + "201": 0.5202569365501404, + "202": 0.5106058716773987, + "203": 0.6065346002578735, + "204": 0.5745587944984436, + "205": 0.6677202582359314, + "206": 0.7044236063957214, + "207": 0.8624392747879028, + "208": 0.8338705897331238, + "209": 0.6331600546836853, + "210": 0.6282129883766174, + "211": 0.7349923253059387, + "212": 0.7543413639068604, + "213": 0.6560853719711304, + "214": 0.6241986751556396, + "215": 0.6600807905197144 + }, + "loss": { + "162": 3.18389630317688, + "163": 2.9645843505859375, + "164": 3.8013947010040283, + "165": 3.05190110206604, + "166": 2.959916114807129, + "167": 2.9502933025360107, + "168": 2.9609286785125732, + "169": 3.033219575881958, + "170": 2.8299102783203125, + "171": 2.863287925720215, + "172": 2.9857888221740723, + "173": 2.996337413787842, + "174": 3.021493911743164, + "175": 2.930809259414673, + "176": 3.494567394256592, + "177": 3.101743221282959, + "178": 2.8797178268432617, + "179": 2.9090380668640137, + "180": 2.994272470474243, + "181": 3.0475385189056396, + "182": 2.8672924041748047, + "183": 3.0865540504455566, + "184": 2.8795323371887207, + "185": 2.78538179397583, + "186": 2.892137050628662, + "187": 3.0214877128601074, + "188": 2.7861251831054688, + "189": 2.757634401321411, + "190": 2.958156108856201, + "191": 2.9685380458831787, + "192": 2.796172618865967, + "193": 2.960533857345581, + "194": 2.7535245418548584, + "195": 2.758512496948242, + "196": 2.7589588165283203, + "197": 2.8154330253601074, + "198": 2.928706407546997, + "199": 2.850471019744873, + "200": 2.761180877685547, + "201": 2.740119695663452, + "202": 2.7113795280456543, + "203": 2.7149839401245117, + "204": 2.748411178588867, + "205": 2.731147289276123, + "206": 2.7936136722564697, + "207": 2.7666540145874023, + "208": 2.8514158725738525, + "209": 2.7581472396850586, + "210": 2.7208895683288574, + "211": 2.7478692531585693, + "212": 2.8006601333618164, + "213": 2.7553091049194336, + "214": 2.7745625972747803, + "215": 2.702153444290161 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 0.535721, + 0.877719, + 0.143333, + 0.85678, + 0.841028, + 0.926335, + 0.748117, + 0.919653, + 0.937645, + 0.907883, + 0.685034, + 0.901294, + 0.482227, + 0.85146, + 0.23085, + 0.867117, + 0.873564, + 0.851718, + 0.735773, + 0.864382, + 0.749528, + 0.824685, + 0.929353, + 0.939132, + 0.712897, + 0.918328, + 0.961342, + 0.920361, + 0.715765, + 0.886313, + 0.851049, + 0.910388, + 0.939628, + 0.945079, + 0.936883, + 0.818945, + 0.876476, + 0.933058, + 0.949725, + 0.952935, + 0.954127, + 0.936548, + 0.943346, + 0.924536, + 0.918432, + 0.881506, + 0.891322, + 0.93225, + 0.932381, + 0.910501, + 0.90778, + 0.927547, + 0.934393, + 0.925393 + ], + "train_epoch_time": 4.842341899871826, + "train_loss": 2.7814284674235363, + "train_score": 0.21141947627153082, + "val_loss": 2.808646624178618, + "val_score": 0.20431346881677034 + }, + { + "epoch": 4, + "grad_norm": 0.6852704882621765, + "learning_rate": 1.0, + "model_norm": 87.67063903808594, + "step_logs": { + "grad_norm": { + "216": 0.6668159365653992, + "217": 0.526739776134491, + "218": 0.5584539175033569, + "219": 0.7145861983299255, + "220": 0.8104191422462463, + "221": 0.4488040804862976, + "222": 0.48834919929504395, + "223": 0.7027773261070251, + "224": 0.7009124755859375, + "225": 0.606742262840271, + "226": 0.7202913761138916, + "227": 0.7018812894821167, + "228": 0.689257800579071, + "229": 0.6530488133430481, + "230": 0.6661697030067444, + "231": 0.7815045118331909, + "232": 0.6919880509376526, + "233": 0.5697280168533325, + "234": 0.6913193464279175, + "235": 0.7303526401519775, + "236": 0.6265333890914917, + "237": 0.603069007396698, + "238": 0.7266407012939453, + "239": 0.6770203113555908, + "240": 0.6387922763824463, + "241": 0.6717566251754761, + "242": 0.6977189779281616, + "243": 0.651513934135437, + "244": 0.6178772449493408, + "245": 0.6837935447692871, + "246": 0.6591500639915466, + "247": 0.5391072630882263, + "248": 0.6287344098091125, + "249": 0.7252734899520874, + "250": 0.691916823387146, + "251": 0.5559298992156982, + "252": 0.5315951704978943, + "253": 0.5950450897216797, + "254": 0.6224647760391235, + "255": 0.5911554098129272, + "256": 0.6148871779441833, + "257": 0.6896852254867554, + "258": 0.7248898148536682, + "259": 0.6099466681480408, + "260": 0.6298787593841553, + "261": 0.6692044138908386, + "262": 0.6525567173957825, + "263": 0.5846357941627502, + "264": 0.6014005541801453, + "265": 0.6742257475852966, + "266": 0.6430836915969849, + "267": 0.6430049538612366, + "268": 0.690177321434021, + "269": 0.6852704882621765 + }, + "loss": { + "216": 2.7904422283172607, + "217": 2.7123732566833496, + "218": 2.7137954235076904, + "219": 2.693697452545166, + "220": 2.818570852279663, + "221": 2.714264154434204, + "222": 2.667057991027832, + "223": 2.7210018634796143, + "224": 2.7749829292297363, + "225": 2.70729398727417, + "226": 2.74062442779541, + "227": 2.730461835861206, + "228": 2.759340524673462, + "229": 2.7050209045410156, + "230": 2.7378931045532227, + "231": 2.7252767086029053, + "232": 2.7979393005371094, + "233": 2.6583619117736816, + "234": 2.753272533416748, + "235": 2.7250847816467285, + "236": 2.7271876335144043, + "237": 2.686765670776367, + "238": 2.7256574630737305, + "239": 2.713956356048584, + "240": 2.708928108215332, + "241": 2.6793227195739746, + "242": 2.731151580810547, + "243": 2.685105323791504, + "244": 2.705883502960205, + "245": 2.667982578277588, + "246": 2.73250412940979, + "247": 2.6658506393432617, + "248": 2.6772403717041016, + "249": 2.6917781829833984, + "250": 2.736544132232666, + "251": 2.6529805660247803, + "252": 2.6589348316192627, + "253": 2.639176845550537, + "254": 2.6941981315612793, + "255": 2.6540732383728027, + "256": 2.691887855529785, + "257": 2.6696085929870605, + "258": 2.697352409362793, + "259": 2.6578001976013184, + "260": 2.666073799133301, + "261": 2.670083999633789, + "262": 2.7231509685516357, + "263": 2.6504769325256348, + "264": 2.6443305015563965, + "265": 2.6431427001953125, + "266": 2.6884877681732178, + "267": 2.6233601570129395, + "268": 2.683504581451416, + "269": 2.661273717880249 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 0.926207, + 0.951343, + 0.945662, + 0.913423, + 0.895649, + 0.964223, + 0.957204, + 0.916795, + 0.918679, + 0.936339, + 0.913531, + 0.917253, + 0.920738, + 0.92693, + 0.925031, + 0.899238, + 0.921174, + 0.942462, + 0.920139, + 0.910854, + 0.932863, + 0.936608, + 0.911695, + 0.922131, + 0.929959, + 0.92233, + 0.918171, + 0.926748, + 0.934104, + 0.919433, + 0.926353, + 0.948307, + 0.931248, + 0.910988, + 0.919563, + 0.944959, + 0.949541, + 0.937136, + 0.932917, + 0.938231, + 0.934381, + 0.918199, + 0.911242, + 0.934589, + 0.930746, + 0.922627, + 0.927483, + 0.939427, + 0.935989, + 0.920817, + 0.928581, + 0.926954, + 0.918481, + 0.918925 + ], + "train_epoch_time": 4.841772556304932, + "train_loss": 2.6756673847756733, + "train_score": 0.23037123384229422, + "val_loss": 2.6977650867400844, + "val_score": 0.22342763360414383 + }, + { + "epoch": 5, + "grad_norm": 0.6575700044631958, + "learning_rate": 1.0, + "model_norm": 87.76293182373047, + "step_logs": { + "grad_norm": { + "270": 0.6253135800361633, + "271": 0.5745961666107178, + "272": 0.5815318822860718, + "273": 0.6147587299346924, + "274": 0.6545942425727844, + "275": 0.5514642596244812, + "276": 0.5357682108879089, + "277": 0.6064366102218628, + "278": 0.6165779829025269, + "279": 0.5857059955596924, + "280": 0.6177246570587158, + "281": 0.7129228115081787, + "282": 0.7288950085639954, + "283": 0.6149364113807678, + "284": 0.616297721862793, + "285": 0.6054685115814209, + "286": 0.6007561683654785, + "287": 0.6003897786140442, + "288": 0.670651376247406, + "289": 0.7251616716384888, + "290": 0.6183573603630066, + "291": 0.5742987990379333, + "292": 0.6593526601791382, + "293": 0.6468791365623474, + "294": 0.5772930979728699, + "295": 0.6269423961639404, + "296": 0.6214361786842346, + "297": 0.6635381579399109, + "298": 0.8135142922401428, + "299": 0.6042878031730652, + "300": 0.5405102968215942, + "301": 0.6937423348426819, + "302": 0.7899835109710693, + "303": 0.6307160258293152, + "304": 0.526786744594574, + "305": 0.5233730673789978, + "306": 0.5989813804626465, + "307": 0.7113780975341797, + "308": 0.6406801342964172, + "309": 0.5731056928634644, + "310": 0.6452125310897827, + "311": 0.9156473278999329, + "312": 0.7744340896606445, + "313": 0.48323923349380493, + "314": 0.38585710525512695, + "315": 0.4710135757923126, + "316": 0.6377565860748291, + "317": 0.6965519189834595, + "318": 0.6850444078445435, + "319": 0.6682303547859192, + "320": 0.6728943586349487, + "321": 0.7384703755378723, + "322": 0.7777429819107056, + "323": 0.6575700044631958 + }, + "loss": { + "270": 2.669743537902832, + "271": 2.6243810653686523, + "272": 2.6640257835388184, + "273": 2.620114803314209, + "274": 2.6805260181427, + "275": 2.625105142593384, + "276": 2.636050224304199, + "277": 2.6242499351501465, + "278": 2.648617744445801, + "279": 2.6396660804748535, + "280": 2.6394271850585938, + "281": 2.634902000427246, + "282": 2.6951990127563477, + "283": 2.6385412216186523, + "284": 2.6460447311401367, + "285": 2.624196767807007, + "286": 2.632338523864746, + "287": 2.600693702697754, + "288": 2.6380844116210938, + "289": 2.6424827575683594, + "290": 2.668303966522217, + "291": 2.608004570007324, + "292": 2.623488426208496, + "293": 2.63919734954834, + "294": 2.6157071590423584, + "295": 2.616673469543457, + "296": 2.6420910358428955, + "297": 2.591675281524658, + "298": 2.6333885192871094, + "299": 2.6473581790924072, + "300": 2.6024539470672607, + "301": 2.6129446029663086, + "302": 2.6780552864074707, + "303": 2.6315975189208984, + "304": 2.5968313217163086, + "305": 2.5613808631896973, + "306": 2.573054313659668, + "307": 2.5965569019317627, + "308": 2.6412408351898193, + "309": 2.5610084533691406, + "310": 2.596482276916504, + "311": 2.628560781478882, + "312": 2.704296112060547, + "313": 2.5825698375701904, + "314": 2.530683994293213, + "315": 2.5261120796203613, + "316": 2.5743026733398438, + "317": 2.5945258140563965, + "318": 2.591357469558716, + "319": 2.5900416374206543, + "320": 2.5771846771240234, + "321": 2.5963213443756104, + "322": 2.6339271068573, + "323": 2.5780746936798096 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 0.931766, + 0.94082, + 0.940317, + 0.932731, + 0.925988, + 0.945248, + 0.948365, + 0.934518, + 0.933038, + 0.938985, + 0.932588, + 0.912036, + 0.910281, + 0.933133, + 0.933034, + 0.934712, + 0.935845, + 0.935189, + 0.92145, + 0.909503, + 0.933141, + 0.940528, + 0.923483, + 0.926547, + 0.94011, + 0.930141, + 0.931895, + 0.921708, + 0.88837, + 0.935482, + 0.946853, + 0.915671, + 0.895643, + 0.929729, + 0.949279, + 0.949243, + 0.934825, + 0.911205, + 0.927898, + 0.939739, + 0.925784, + 0.862455, + 0.900181, + 0.956745, + 0.971425, + 0.957935, + 0.926785, + 0.914493, + 0.91697, + 0.920639, + 0.919248, + 0.90496, + 0.897001, + 0.922628 + ], + "train_epoch_time": 4.8422532081604, + "train_loss": 2.583251646635693, + "train_score": 0.2353636119039021, + "val_loss": 2.6131799234725306, + "val_score": 0.23102934851848983 + }, + { + "epoch": 6, + "grad_norm": 0.5000882744789124, + "learning_rate": 1.0, + "model_norm": 87.84900665283203, + "step_logs": { + "grad_norm": { + "324": 0.5971531867980957, + "325": 0.6022903323173523, + "326": 0.6419209241867065, + "327": 0.6169332265853882, + "328": 0.6087033152580261, + "329": 0.7084470987319946, + "330": 0.7457408905029297, + "331": 0.5256245136260986, + "332": 0.42459234595298767, + "333": 0.5654624700546265, + "334": 0.6345568895339966, + "335": 0.58400958776474, + "336": 0.6193705201148987, + "337": 0.6653412580490112, + "338": 0.6821584701538086, + "339": 0.6362888216972351, + "340": 0.5809125304222107, + "341": 0.5886433124542236, + "342": 0.7237919569015503, + "343": 0.7224463224411011, + "344": 0.6026124358177185, + "345": 0.5036521553993225, + "346": 0.5330109000205994, + "347": 0.618767261505127, + "348": 0.6761471629142761, + "349": 0.6541870832443237, + "350": 0.6549535989761353, + "351": 0.711733341217041, + "352": 0.6920038461685181, + "353": 0.5846808552742004, + "354": 0.5397324562072754, + "355": 0.4805957078933716, + "356": 0.4639611542224884, + "357": 0.5837191343307495, + "358": 0.68544602394104, + "359": 0.6101338267326355, + "360": 0.5560598373413086, + "361": 0.6159241199493408, + "362": 0.626372218132019, + "363": 0.5856086611747742, + "364": 0.576715350151062, + "365": 0.5504580140113831, + "366": 0.49138495326042175, + "367": 0.49599599838256836, + "368": 0.5381412506103516, + "369": 0.5965025424957275, + "370": 0.5716441869735718, + "371": 0.534985363483429, + "372": 0.6050951480865479, + "373": 0.7219687700271606, + "374": 0.8310479521751404, + "375": 0.7454206347465515, + "376": 0.5600138306617737, + "377": 0.5000882744789124 + }, + "loss": { + "324": 2.5617666244506836, + "325": 2.5578224658966064, + "326": 2.5838518142700195, + "327": 2.5559661388397217, + "328": 2.586496114730835, + "329": 2.5217881202697754, + "330": 2.606095314025879, + "331": 2.5538928508758545, + "332": 2.493557929992676, + "333": 2.5040602684020996, + "334": 2.55924654006958, + "335": 2.508248805999756, + "336": 2.578103542327881, + "337": 2.54435396194458, + "338": 2.5729055404663086, + "339": 2.5638070106506348, + "340": 2.539839267730713, + "341": 2.520904064178467, + "342": 2.5694704055786133, + "343": 2.542829751968384, + "344": 2.5430386066436768, + "345": 2.5030465126037598, + "346": 2.5153956413269043, + "347": 2.52713680267334, + "348": 2.5745956897735596, + "349": 2.550501585006714, + "350": 2.5476882457733154, + "351": 2.5550897121429443, + "352": 2.567355155944824, + "353": 2.5307085514068604, + "354": 2.511206865310669, + "355": 2.4994359016418457, + "356": 2.472632884979248, + "357": 2.4980854988098145, + "358": 2.571336269378662, + "359": 2.5367343425750732, + "360": 2.4759137630462646, + "361": 2.5135397911071777, + "362": 2.518010377883911, + "363": 2.5342636108398438, + "364": 2.465496063232422, + "365": 2.5191147327423096, + "366": 2.491001605987549, + "367": 2.482466697692871, + "368": 2.4892263412475586, + "369": 2.4983930587768555, + "370": 2.4938693046569824, + "371": 2.5119211673736572, + "372": 2.4812660217285156, + "373": 2.529157876968384, + "374": 2.5594825744628906, + "375": 2.5710878372192383, + "376": 2.519064426422119, + "377": 2.4687352180480957 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 0.93493, + 0.933785, + 0.926151, + 0.930705, + 0.933162, + 0.909494, + 0.903589, + 0.948685, + 0.965112, + 0.939986, + 0.927069, + 0.936339, + 0.930752, + 0.91997, + 0.917069, + 0.926821, + 0.937705, + 0.935694, + 0.907489, + 0.906925, + 0.933359, + 0.951772, + 0.946546, + 0.929582, + 0.918454, + 0.922597, + 0.92235, + 0.909812, + 0.914694, + 0.936732, + 0.945178, + 0.955836, + 0.958287, + 0.936156, + 0.916288, + 0.931641, + 0.941228, + 0.929831, + 0.927724, + 0.936628, + 0.936811, + 0.943271, + 0.953774, + 0.952789, + 0.945028, + 0.933525, + 0.938512, + 0.9461, + 0.931289, + 0.906581, + 0.881121, + 0.90248, + 0.941399, + 0.951791 + ], + "train_epoch_time": 4.841815710067749, + "train_loss": 2.483801632187459, + "train_score": 0.2878642843905275, + "val_loss": 2.508345295996945, + "val_score": 0.28070734039239853 + }, + { + "epoch": 7, + "grad_norm": 0.534000039100647, + "learning_rate": 1.0, + "model_norm": 87.9289321899414, + "step_logs": { + "grad_norm": { + "378": 0.6672523021697998, + "379": 0.6729017496109009, + "380": 0.5894371271133423, + "381": 0.6475037336349487, + "382": 0.683190643787384, + "383": 0.6214357614517212, + "384": 0.6065525412559509, + "385": 0.6320510506629944, + "386": 0.6907786726951599, + "387": 0.7481032609939575, + "388": 0.5592978000640869, + "389": 0.4814859628677368, + "390": 0.6105024814605713, + "391": 0.621249794960022, + "392": 0.5701602101325989, + "393": 0.6558672189712524, + "394": 0.8090927004814148, + "395": 0.702446460723877, + "396": 0.5572105050086975, + "397": 0.5946030616760254, + "398": 0.8846548199653625, + "399": 0.8287051916122437, + "400": 0.5312759280204773, + "401": 0.3815591037273407, + "402": 0.34675461053848267, + "403": 0.4594365358352661, + "404": 0.5876360535621643, + "405": 0.6224352717399597, + "406": 0.6259280443191528, + "407": 0.6102624535560608, + "408": 0.6231469511985779, + "409": 0.620484471321106, + "410": 0.6014580726623535, + "411": 0.6007525324821472, + "412": 0.5963590741157532, + "413": 0.5844481587409973, + "414": 0.6238325238227844, + "415": 0.6103806495666504, + "416": 0.5578613877296448, + "417": 0.5720828771591187, + "418": 0.5470981597900391, + "419": 0.5328080654144287, + "420": 0.5312775373458862, + "421": 0.5537199974060059, + "422": 0.59703129529953, + "423": 0.5705642104148865, + "424": 0.5319480299949646, + "425": 0.5432087779045105, + "426": 0.5285447239875793, + "427": 0.538119375705719, + "428": 0.5494590401649475, + "429": 0.5210990309715271, + "430": 0.5022335648536682, + "431": 0.534000039100647 + }, + "loss": { + "378": 2.492499351501465, + "379": 2.518148899078369, + "380": 2.4952762126922607, + "381": 2.4923958778381348, + "382": 2.4948248863220215, + "383": 2.5114943981170654, + "384": 2.4629573822021484, + "385": 2.4895424842834473, + "386": 2.4806649684906006, + "387": 2.518702507019043, + "388": 2.4814648628234863, + "389": 2.4595227241516113, + "390": 2.467555046081543, + "391": 2.4997243881225586, + "392": 2.4422643184661865, + "393": 2.477545738220215, + "394": 2.5091967582702637, + "395": 2.506126642227173, + "396": 2.4630508422851562, + "397": 2.462289571762085, + "398": 2.4936091899871826, + "399": 2.5650851726531982, + "400": 2.4836599826812744, + "401": 2.443777084350586, + "402": 2.4244513511657715, + "403": 2.430521011352539, + "404": 2.4571049213409424, + "405": 2.4842782020568848, + "406": 2.4711008071899414, + "407": 2.4833016395568848, + "408": 2.4345924854278564, + "409": 2.4896140098571777, + "410": 2.4557390213012695, + "411": 2.485828161239624, + "412": 2.4550909996032715, + "413": 2.4628920555114746, + "414": 2.4521701335906982, + "415": 2.491764783859253, + "416": 2.429023265838623, + "417": 2.449615001678467, + "418": 2.419907569885254, + "419": 2.4501054286956787, + "420": 2.416001319885254, + "421": 2.4260528087615967, + "422": 2.434298038482666, + "423": 2.457561492919922, + "424": 2.424787998199463, + "425": 2.4322166442871094, + "426": 2.427424669265747, + "427": 2.4174022674560547, + "428": 2.4062047004699707, + "429": 2.4244260787963867, + "430": 2.4041099548339844, + "431": 2.4113759994506836 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 0.91801, + 0.91751, + 0.934913, + 0.922417, + 0.914458, + 0.928606, + 0.930503, + 0.925726, + 0.91226, + 0.900009, + 0.940707, + 0.954992, + 0.92978, + 0.928334, + 0.937599, + 0.920122, + 0.884606, + 0.910378, + 0.940709, + 0.933016, + 0.864361, + 0.881939, + 0.946233, + 0.971074, + 0.975803, + 0.958384, + 0.934345, + 0.927665, + 0.926549, + 0.930246, + 0.926141, + 0.928228, + 0.931398, + 0.932321, + 0.932462, + 0.935152, + 0.926482, + 0.930441, + 0.939796, + 0.937381, + 0.941757, + 0.945239, + 0.94481, + 0.940565, + 0.931781, + 0.937881, + 0.944868, + 0.942809, + 0.945589, + 0.943491, + 0.940969, + 0.946968, + 0.950155, + 0.944174 + ], + "train_epoch_time": 4.841690540313721, + "train_loss": 2.423061289575213, + "train_score": 0.27203080164992827, + "val_loss": 2.4458374610314, + "val_score": 0.2699438507671882 + }, + { + "epoch": 8, + "grad_norm": 0.5307457447052002, + "learning_rate": 1.0, + "model_norm": 88.01598358154297, + "step_logs": { + "grad_norm": { + "432": 0.6045862436294556, + "433": 0.5658586621284485, + "434": 0.560275137424469, + "435": 0.5634326934814453, + "436": 0.5883510708808899, + "437": 0.573127031326294, + "438": 0.7011948227882385, + "439": 1.0743197202682495, + "440": 0.6005929112434387, + "441": 0.47543981671333313, + "442": 0.4811388850212097, + "443": 0.6034538149833679, + "444": 0.7212246656417847, + "445": 0.6774283647537231, + "446": 0.5909043550491333, + "447": 0.5820765495300293, + "448": 0.6224314570426941, + "449": 0.5919395685195923, + "450": 0.6028775572776794, + "451": 0.5705131888389587, + "452": 0.555813729763031, + "453": 0.5373350381851196, + "454": 0.5404369235038757, + "455": 0.544927179813385, + "456": 0.5368862152099609, + "457": 0.5788453817367554, + "458": 0.5847495198249817, + "459": 0.5616357326507568, + "460": 0.5424827337265015, + "461": 0.5723328590393066, + "462": 0.6037271618843079, + "463": 0.6554265022277832, + "464": 0.6995989680290222, + "465": 0.6474835872650146, + "466": 0.52692711353302, + "467": 0.49869897961616516, + "468": 0.5641749501228333, + "469": 0.6213555932044983, + "470": 0.6797971725463867, + "471": 0.8086366057395935, + "472": 0.664639413356781, + "473": 0.6444742679595947, + "474": 0.477022647857666, + "475": 0.43518105149269104, + "476": 0.4455399215221405, + "477": 0.4941543936729431, + "478": 0.6534720659255981, + "479": 0.6960119605064392, + "480": 0.7720495462417603, + "481": 0.901447594165802, + "482": 0.5485942363739014, + "483": 0.5247522592544556, + "484": 0.5808753967285156, + "485": 0.5307457447052002 + }, + "loss": { + "432": 2.4294066429138184, + "433": 2.4400105476379395, + "434": 2.4061295986175537, + "435": 2.4413838386535645, + "436": 2.415982246398926, + "437": 2.4369966983795166, + "438": 2.4323067665100098, + "439": 2.520164966583252, + "440": 2.5271079540252686, + "441": 2.4397060871124268, + "442": 2.40366268157959, + "443": 2.4279680252075195, + "444": 2.46299409866333, + "445": 2.4677391052246094, + "446": 2.428255081176758, + "447": 2.4093849658966064, + "448": 2.4300594329833984, + "449": 2.4308786392211914, + "450": 2.4155869483947754, + "451": 2.430352210998535, + "452": 2.392518997192383, + "453": 2.4181995391845703, + "454": 2.4137353897094727, + "455": 2.416621685028076, + "456": 2.3817925453186035, + "457": 2.4191548824310303, + "458": 2.4184141159057617, + "459": 2.4197685718536377, + "460": 2.3851068019866943, + "461": 2.4107916355133057, + "462": 2.3969180583953857, + "463": 2.418745994567871, + "464": 2.4208900928497314, + "465": 2.4515137672424316, + "466": 2.401920795440674, + "467": 2.394526481628418, + "468": 2.3786001205444336, + "469": 2.4157373905181885, + "470": 2.402095317840576, + "471": 2.441835880279541, + "472": 2.462407112121582, + "473": 2.4406094551086426, + "474": 2.3807761669158936, + "475": 2.356658458709717, + "476": 2.3443193435668945, + "477": 2.3668155670166016, + "478": 2.3903822898864746, + "479": 2.4566798210144043, + "480": 2.42482852935791, + "481": 2.461195230484009, + "482": 2.420844554901123, + "483": 2.3973467350006104, + "484": 2.397913694381714, + "485": 2.444324493408203 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 0.930034, + 0.938426, + 0.938764, + 0.938953, + 0.93315, + 0.936862, + 0.908206, + 0.813679, + 0.933386, + 0.955725, + 0.954058, + 0.930239, + 0.904489, + 0.914928, + 0.932926, + 0.934308, + 0.926171, + 0.932774, + 0.930031, + 0.93724, + 0.939354, + 0.943664, + 0.94295, + 0.942118, + 0.942942, + 0.935233, + 0.933974, + 0.93881, + 0.941892, + 0.936385, + 0.92934, + 0.91844, + 0.908194, + 0.92123, + 0.94536, + 0.950633, + 0.937288, + 0.926003, + 0.912249, + 0.881917, + 0.917686, + 0.921582, + 0.95439, + 0.961372, + 0.959382, + 0.950945, + 0.918003, + 0.910254, + 0.890545, + 0.858307, + 0.941478, + 0.945688, + 0.934268, + 0.945518 + ], + "train_epoch_time": 4.842552185058594, + "train_loss": 2.3655868504276576, + "train_score": 0.32127645255161325, + "val_loss": 2.405880637886056, + "val_score": 0.3133027771857795 + }, + { + "epoch": 9, + "grad_norm": 0.5573657751083374, + "learning_rate": 1.0, + "model_norm": 88.09248352050781, + "step_logs": { + "grad_norm": { + "486": 0.4535903334617615, + "487": 0.43654677271842957, + "488": 0.4688446521759033, + "489": 0.5269677639007568, + "490": 0.5538724660873413, + "491": 0.5637977719306946, + "492": 0.5148653984069824, + "493": 0.48916780948638916, + "494": 0.4811687767505646, + "495": 0.53957200050354, + "496": 0.551761269569397, + "497": 0.49464598298072815, + "498": 0.5652946829795837, + "499": 0.559206485748291, + "500": 0.5420349836349487, + "501": 0.5247375965118408, + "502": 0.5506398677825928, + "503": 0.5332450866699219, + "504": 0.4832785427570343, + "505": 0.48463431000709534, + "506": 0.5219869613647461, + "507": 0.5007066130638123, + "508": 0.456484317779541, + "509": 0.4804133474826813, + "510": 0.5664798617362976, + "511": 0.5971078276634216, + "512": 0.6086966395378113, + "513": 0.5204153060913086, + "514": 0.44804394245147705, + "515": 0.49400898814201355, + "516": 0.4965521991252899, + "517": 0.5368234515190125, + "518": 0.5534977912902832, + "519": 0.5647637248039246, + "520": 0.5865835547447205, + "521": 0.5835053324699402, + "522": 0.5663397312164307, + "523": 0.5447574853897095, + "524": 0.6109901666641235, + "525": 0.7890527844429016, + "526": 0.808137059211731, + "527": 0.6618877649307251, + "528": 0.5339902639389038, + "529": 0.4922279119491577, + "530": 0.5829690098762512, + "531": 0.5641847252845764, + "532": 0.5310723185539246, + "533": 0.5086075663566589, + "534": 0.483493447303772, + "535": 0.5759891271591187, + "536": 0.6187015771865845, + "537": 0.6298069953918457, + "538": 0.6174057722091675, + "539": 0.5573657751083374 + }, + "loss": { + "486": 2.3655457496643066, + "487": 2.3609251976013184, + "488": 2.3451809883117676, + "489": 2.395559549331665, + "490": 2.3671340942382812, + "491": 2.411898612976074, + "492": 2.3650951385498047, + "493": 2.3698604106903076, + "494": 2.3744654655456543, + "495": 2.3806395530700684, + "496": 2.387601852416992, + "497": 2.38102388381958, + "498": 2.377661943435669, + "499": 2.3691205978393555, + "500": 2.3674073219299316, + "501": 2.40138578414917, + "502": 2.3700547218322754, + "503": 2.397083282470703, + "504": 2.3748817443847656, + "505": 2.341590404510498, + "506": 2.343320369720459, + "507": 2.376145124435425, + "508": 2.334594249725342, + "509": 2.367121696472168, + "510": 2.368175745010376, + "511": 2.3799967765808105, + "512": 2.368922233581543, + "513": 2.4109578132629395, + "514": 2.360182762145996, + "515": 2.359098434448242, + "516": 2.344724416732788, + "517": 2.3594279289245605, + "518": 2.362699508666992, + "519": 2.361393928527832, + "520": 2.37358021736145, + "521": 2.3732872009277344, + "522": 2.376909017562866, + "523": 2.3884658813476562, + "524": 2.3681800365448, + "525": 2.423929214477539, + "526": 2.4424996376037598, + "527": 2.3778748512268066, + "528": 2.364795684814453, + "529": 2.340841770172119, + "530": 2.3853812217712402, + "531": 2.3833248615264893, + "532": 2.34861421585083, + "533": 2.376563787460327, + "534": 2.3223228454589844, + "535": 2.3330297470092773, + "536": 2.366586208343506, + "537": 2.365109443664551, + "538": 2.355536937713623, + "539": 2.3546268939971924 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 0.958325, + 0.961206, + 0.955233, + 0.945215, + 0.939145, + 0.938178, + 0.946933, + 0.951941, + 0.953514, + 0.942377, + 0.940067, + 0.951131, + 0.937031, + 0.938089, + 0.941574, + 0.945777, + 0.93988, + 0.944009, + 0.953132, + 0.952243, + 0.945057, + 0.949889, + 0.957278, + 0.953516, + 0.936547, + 0.930317, + 0.927469, + 0.94682, + 0.959208, + 0.95082, + 0.950048, + 0.942445, + 0.939115, + 0.936737, + 0.932417, + 0.93307, + 0.936794, + 0.94151, + 0.926941, + 0.886188, + 0.882074, + 0.915651, + 0.943138, + 0.950794, + 0.933501, + 0.937403, + 0.943358, + 0.948386, + 0.952082, + 0.933618, + 0.925177, + 0.922632, + 0.925143, + 0.938115 + ], + "train_epoch_time": 4.842437267303467, + "train_loss": 2.3432632475706563, + "train_score": 0.32907550211787395, + "val_loss": 2.383026872792556, + "val_score": 0.3201017148801298 + }, + { + "epoch": 10, + "grad_norm": 0.5111677050590515, + "learning_rate": 1.0, + "model_norm": 88.16957092285156, + "step_logs": { + "grad_norm": { + "540": 0.4596772789955139, + "541": 0.4341847896575928, + "542": 0.5254841446876526, + "543": 0.566222071647644, + "544": 0.5521520972251892, + "545": 0.5631356835365295, + "546": 0.5399010181427002, + "547": 0.5117794275283813, + "548": 0.4850256145000458, + "549": 0.49450379610061646, + "550": 0.5503708124160767, + "551": 0.5276839733123779, + "552": 0.5123575329780579, + "553": 0.5299116373062134, + "554": 0.5598269701004028, + "555": 0.5512658953666687, + "556": 0.5198515057563782, + "557": 0.5541303157806396, + "558": 0.641183614730835, + "559": 0.694770336151123, + "560": 0.5791642665863037, + "561": 0.4883069097995758, + "562": 0.54417484998703, + "563": 0.5214710235595703, + "564": 0.49169886112213135, + "565": 0.5291573405265808, + "566": 0.5903640389442444, + "567": 0.5659109950065613, + "568": 0.5247699618339539, + "569": 0.5331757664680481, + "570": 0.5237985849380493, + "571": 0.5638059377670288, + "572": 0.5260524153709412, + "573": 0.5002376437187195, + "574": 0.557741105556488, + "575": 0.5511950850486755, + "576": 0.544090211391449, + "577": 0.5877402424812317, + "578": 0.6141374111175537, + "579": 0.5222635269165039, + "580": 0.48640304803848267, + "581": 0.49971556663513184, + "582": 0.4673531651496887, + "583": 0.42435377836227417, + "584": 0.4753330647945404, + "585": 0.4752807021141052, + "586": 0.4638010859489441, + "587": 0.502336859703064, + "588": 0.4949812889099121, + "589": 0.4878048598766327, + "590": 0.5196456909179688, + "591": 0.5034701824188232, + "592": 0.4684698283672333, + "593": 0.5111677050590515 + }, + "loss": { + "540": 2.3485372066497803, + "541": 2.327695846557617, + "542": 2.3510007858276367, + "543": 2.397897243499756, + "544": 2.332845687866211, + "545": 2.351045608520508, + "546": 2.3531222343444824, + "547": 2.354424238204956, + "548": 2.3402605056762695, + "549": 2.33109974861145, + "550": 2.356163501739502, + "551": 2.3666553497314453, + "552": 2.3335275650024414, + "553": 2.3365063667297363, + "554": 2.33563232421875, + "555": 2.376941680908203, + "556": 2.335899829864502, + "557": 2.337794065475464, + "558": 2.323544979095459, + "559": 2.4092025756835938, + "560": 2.334195137023926, + "561": 2.356853485107422, + "562": 2.3534493446350098, + "563": 2.354688882827759, + "564": 2.3419101238250732, + "565": 2.314380645751953, + "566": 2.336700916290283, + "567": 2.335231065750122, + "568": 2.347973108291626, + "569": 2.3590574264526367, + "570": 2.3474464416503906, + "571": 2.3411898612976074, + "572": 2.3616764545440674, + "573": 2.349451780319214, + "574": 2.328526258468628, + "575": 2.3436975479125977, + "576": 2.328582763671875, + "577": 2.3568406105041504, + "578": 2.364205837249756, + "579": 2.3688583374023438, + "580": 2.325706720352173, + "581": 2.3392255306243896, + "582": 2.3274292945861816, + "583": 2.3014719486236572, + "584": 2.3278751373291016, + "585": 2.3259596824645996, + "586": 2.2896718978881836, + "587": 2.3136181831359863, + "588": 2.3155267238616943, + "589": 2.2978756427764893, + "590": 2.3300156593322754, + "591": 2.3338265419006348, + "592": 2.3021931648254395, + "593": 2.3216135501861572 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 0.95695, + 0.961082, + 0.944531, + 0.937337, + 0.938664, + 0.936818, + 0.941675, + 0.947308, + 0.952144, + 0.950164, + 0.939602, + 0.944441, + 0.946748, + 0.943315, + 0.937126, + 0.939916, + 0.945317, + 0.938374, + 0.918723, + 0.908943, + 0.932965, + 0.95185, + 0.940811, + 0.945409, + 0.950916, + 0.942958, + 0.930598, + 0.93583, + 0.944606, + 0.943172, + 0.944788, + 0.936428, + 0.944655, + 0.949438, + 0.937386, + 0.93913, + 0.940234, + 0.93172, + 0.926127, + 0.945562, + 0.951598, + 0.949329, + 0.95518, + 0.962351, + 0.953717, + 0.95369, + 0.955133, + 0.948286, + 0.949753, + 0.950772, + 0.945228, + 0.948491, + 0.954504, + 0.946724 + ], + "train_epoch_time": 4.84199595451355, + "train_loss": 2.321312928986515, + "train_score": 0.3224040530162357, + "val_loss": 2.3717165173947743, + "val_score": 0.3129574491222198 + }, + { + "epoch": 11, + "grad_norm": 0.5891222953796387, + "learning_rate": 1.0, + "model_norm": 88.25027465820312, + "step_logs": { + "grad_norm": { + "594": 0.5133320093154907, + "595": 0.482785701751709, + "596": 0.4656589925289154, + "597": 0.4678007662296295, + "598": 0.5461351275444031, + "599": 0.5860890746116638, + "600": 0.5101441144943237, + "601": 0.44936949014663696, + "602": 0.45749667286872864, + "603": 0.525749921798706, + "604": 0.5417052507400513, + "605": 0.49314481019973755, + "606": 0.5035368800163269, + "607": 0.5096091032028198, + "608": 0.5163124203681946, + "609": 0.548633337020874, + "610": 0.514327883720398, + "611": 0.485579252243042, + "612": 0.5117934942245483, + "613": 0.5537772178649902, + "614": 0.5970088243484497, + "615": 0.5690841674804688, + "616": 0.5433055758476257, + "617": 0.5305554866790771, + "618": 0.5305752158164978, + "619": 0.5523653030395508, + "620": 0.5469662547111511, + "621": 0.510524332523346, + "622": 0.489940345287323, + "623": 0.5190476775169373, + "624": 0.5540767312049866, + "625": 0.5712870955467224, + "626": 0.591969907283783, + "627": 0.5713293552398682, + "628": 0.4952087700366974, + "629": 0.5064335465431213, + "630": 0.5567072033882141, + "631": 0.5270132422447205, + "632": 0.44978469610214233, + "633": 0.41745802760124207, + "634": 0.42759495973587036, + "635": 0.46977895498275757, + "636": 0.46791911125183105, + "637": 0.4392073154449463, + "638": 0.4680866003036499, + "639": 0.5004833340644836, + "640": 0.5214317440986633, + "641": 0.5833013653755188, + "642": 0.5800824761390686, + "643": 0.5446174144744873, + "644": 0.5960995554924011, + "645": 0.6510222554206848, + "646": 0.6233162879943848, + "647": 0.5891222953796387 + }, + "loss": { + "594": 2.3173465728759766, + "595": 2.341799020767212, + "596": 2.318256378173828, + "597": 2.2926080226898193, + "598": 2.3009142875671387, + "599": 2.324228286743164, + "600": 2.3181920051574707, + "601": 2.3202199935913086, + "602": 2.307252883911133, + "603": 2.328923463821411, + "604": 2.3133187294006348, + "605": 2.32059383392334, + "606": 2.3075478076934814, + "607": 2.315316677093506, + "608": 2.295219898223877, + "609": 2.3372952938079834, + "610": 2.3392343521118164, + "611": 2.2881274223327637, + "612": 2.3340096473693848, + "613": 2.305269241333008, + "614": 2.354292392730713, + "615": 2.3354878425598145, + "616": 2.3175840377807617, + "617": 2.3408193588256836, + "618": 2.3563551902770996, + "619": 2.3441519737243652, + "620": 2.3236570358276367, + "621": 2.3328073024749756, + "622": 2.2992446422576904, + "623": 2.310436248779297, + "624": 2.3252806663513184, + "625": 2.3196282386779785, + "626": 2.322413921356201, + "627": 2.3667993545532227, + "628": 2.3076698780059814, + "629": 2.298457622528076, + "630": 2.2946012020111084, + "631": 2.3172450065612793, + "632": 2.2990386486053467, + "633": 2.2998156547546387, + "634": 2.2614967823028564, + "635": 2.2691798210144043, + "636": 2.29813814163208, + "637": 2.3086698055267334, + "638": 2.301081418991089, + "639": 2.277754306793213, + "640": 2.286409854888916, + "641": 2.321709156036377, + "642": 2.324002504348755, + "643": 2.3309974670410156, + "644": 2.3036136627197266, + "645": 2.341291904449463, + "646": 2.337948799133301, + "647": 2.319767475128174 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 0.946203, + 0.952594, + 0.955322, + 0.954447, + 0.939131, + 0.931189, + 0.946852, + 0.958299, + 0.95661, + 0.943981, + 0.940358, + 0.95021, + 0.947922, + 0.946895, + 0.945115, + 0.939505, + 0.946483, + 0.951001, + 0.946869, + 0.937633, + 0.929631, + 0.935162, + 0.94013, + 0.943284, + 0.943633, + 0.938898, + 0.939518, + 0.947093, + 0.95039, + 0.944909, + 0.938074, + 0.934274, + 0.929848, + 0.935491, + 0.949547, + 0.947156, + 0.936739, + 0.943459, + 0.957856, + 0.963495, + 0.961147, + 0.953627, + 0.95453, + 0.959897, + 0.954554, + 0.947881, + 0.943879, + 0.931729, + 0.932492, + 0.940183, + 0.928397, + 0.917, + 0.923284, + 0.9304 + ], + "train_epoch_time": 4.841830015182495, + "train_loss": 2.3295666375837505, + "train_score": 0.32586419487444196, + "val_loss": 2.3782753353140795, + "val_score": 0.31583668183787683 + }, + { + "epoch": 12, + "grad_norm": 0.3471137583255768, + "learning_rate": 1.0, + "model_norm": 88.31282043457031, + "step_logs": { + "grad_norm": { + "648": 1.1777749061584473, + "649": 0.5775364637374878, + "650": 0.6844466924667358, + "651": 0.596462070941925, + "652": 0.8689321875572205, + "653": 0.6130152940750122, + "654": 0.4199494421482086, + "655": 0.3861088156700134, + "656": 0.48311862349510193, + "657": 0.48955103754997253, + "658": 0.43584492802619934, + "659": 0.42765292525291443, + "660": 0.45243245363235474, + "661": 0.4535365700721741, + "662": 0.45955464243888855, + "663": 0.45431095361709595, + "664": 0.4133497476577759, + "665": 0.39969709515571594, + "666": 0.41830238699913025, + "667": 0.43993914127349854, + "668": 0.4705626666545868, + "669": 0.4866509735584259, + "670": 0.46342143416404724, + "671": 0.41562455892562866, + "672": 0.39392420649528503, + "673": 0.3934900760650635, + "674": 0.46160849928855896, + "675": 0.4165823757648468, + "676": 0.4142131805419922, + "677": 0.4793565273284912, + "678": 0.5682106614112854, + "679": 0.5044792294502258, + "680": 0.3850363492965698, + "681": 0.370151162147522, + "682": 0.3876659870147705, + "683": 0.44797414541244507, + "684": 0.4645005762577057, + "685": 0.5066595077514648, + "686": 0.4528997540473938, + "687": 0.3925599753856659, + "688": 0.3621465861797333, + "689": 0.34414830803871155, + "690": 0.3602757453918457, + "691": 0.35121628642082214, + "692": 0.3378406763076782, + "693": 0.3606458008289337, + "694": 0.4013032019138336, + "695": 0.43680357933044434, + "696": 0.4419235289096832, + "697": 0.43803393840789795, + "698": 0.45218780636787415, + "699": 0.44148799777030945, + "700": 0.3915144205093384, + "701": 0.3471137583255768 + }, + "loss": { + "648": 2.3302783966064453, + "649": 2.363994598388672, + "650": 2.3300065994262695, + "651": 2.3496322631835938, + "652": 2.373607635498047, + "653": 2.408134937286377, + "654": 2.3167662620544434, + "655": 2.3106939792633057, + "656": 2.300048828125, + "657": 2.3136980533599854, + "658": 2.292933702468872, + "659": 2.297440528869629, + "660": 2.28442645072937, + "661": 2.2821733951568604, + "662": 2.304558753967285, + "663": 2.3276240825653076, + "664": 2.2810564041137695, + "665": 2.2977817058563232, + "666": 2.253488540649414, + "667": 2.261998176574707, + "668": 2.276346206665039, + "669": 2.289158582687378, + "670": 2.2793164253234863, + "671": 2.2820611000061035, + "672": 2.2701451778411865, + "673": 2.2513554096221924, + "674": 2.274477005004883, + "675": 2.2709403038024902, + "676": 2.240373134613037, + "677": 2.2823562622070312, + "678": 2.264913558959961, + "679": 2.2986536026000977, + "680": 2.2662601470947266, + "681": 2.2509055137634277, + "682": 2.249601364135742, + "683": 2.259070873260498, + "684": 2.272066116333008, + "685": 2.2733898162841797, + "686": 2.291858673095703, + "687": 2.296607255935669, + "688": 2.258047580718994, + "689": 2.2477993965148926, + "690": 2.252373218536377, + "691": 2.2413618564605713, + "692": 2.212996482849121, + "693": 2.2121965885162354, + "694": 2.2410457134246826, + "695": 2.268937587738037, + "696": 2.241586446762085, + "697": 2.27162504196167, + "698": 2.2394938468933105, + "699": 2.26474928855896, + "700": 2.2560529708862305, + "701": 2.226684093475342 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 0.770632, + 0.928713, + 0.898449, + 0.913597, + 0.844333, + 0.901005, + 0.928917, + 0.928143, + 0.906876, + 0.900402, + 0.903169, + 0.898755, + 0.889045, + 0.883147, + 0.876874, + 0.872313, + 0.871809, + 0.868052, + 0.859237, + 0.850594, + 0.840702, + 0.832872, + 0.83039, + 0.831037, + 0.827753, + 0.821781, + 0.807742, + 0.807618, + 0.801766, + 0.788405, + 0.770091, + 0.773994, + 0.781945, + 0.777455, + 0.769807, + 0.757572, + 0.750078, + 0.739394, + 0.740082, + 0.740399, + 0.736969, + 0.7325, + 0.725261, + 0.720014, + 0.714965, + 0.707207, + 0.698089, + 0.689303, + 0.682773, + 0.677571, + 0.670205, + 0.665561, + 0.663703, + 0.66081 + ], + "train_epoch_time": 4.842498064041138, + "train_loss": 2.2332572330190255, + "train_score": 0.3521801021639552, + "val_loss": 2.3010796326309886, + "val_score": 0.33304481268202807 + }, + { + "epoch": 13, + "grad_norm": 0.19276770949363708, + "learning_rate": 0.6666666666666667, + "model_norm": 88.34199523925781, + "step_logs": { + "grad_norm": { + "702": 0.3367370367050171, + "703": 0.3300202190876007, + "704": 0.35249218344688416, + "705": 0.37002718448638916, + "706": 0.39258378744125366, + "707": 0.4061930775642395, + "708": 0.3715112507343292, + "709": 0.3333435654640198, + "710": 0.30995285511016846, + "711": 0.3120267689228058, + "712": 0.32090482115745544, + "713": 0.34134599566459656, + "714": 0.34763240814208984, + "715": 0.34040433168411255, + "716": 0.34016522765159607, + "717": 0.32474440336227417, + "718": 0.2861678898334503, + "719": 0.2720697820186615, + "720": 0.26680541038513184, + "721": 0.2540389597415924, + "722": 0.24680061638355255, + "723": 0.27230924367904663, + "724": 0.27121758460998535, + "725": 0.3167182505130768, + "726": 0.304227352142334, + "727": 0.27463096380233765, + "728": 0.2517527937889099, + "729": 0.2406729757785797, + "730": 0.2385374903678894, + "731": 0.2672624886035919, + "732": 0.2643625736236572, + "733": 0.2615050971508026, + "734": 0.2672218382358551, + "735": 0.2772354185581207, + "736": 0.28493732213974, + "737": 0.30446380376815796, + "738": 0.2752019762992859, + "739": 0.2577574849128723, + "740": 0.22526749968528748, + "741": 0.21702386438846588, + "742": 0.1880822777748108, + "743": 0.20799009501934052, + "744": 0.22810305655002594, + "745": 0.2072276622056961, + "746": 0.20557722449302673, + "747": 0.2135082185268402, + "748": 0.24156875908374786, + "749": 0.2122519463300705, + "750": 0.21947483718395233, + "751": 0.2319253534078598, + "752": 0.2278103083372116, + "753": 0.21703630685806274, + "754": 0.22236396372318268, + "755": 0.19276770949363708 + }, + "loss": { + "702": 2.2213964462280273, + "703": 2.2200703620910645, + "704": 2.2125444412231445, + "705": 2.215944290161133, + "706": 2.2394566535949707, + "707": 2.2375903129577637, + "708": 2.247518539428711, + "709": 2.228762149810791, + "710": 2.2198896408081055, + "711": 2.2327170372009277, + "712": 2.2222037315368652, + "713": 2.2404026985168457, + "714": 2.215725898742676, + "715": 2.2390024662017822, + "716": 2.2179675102233887, + "717": 2.248314142227173, + "718": 2.21696400642395, + "719": 2.2123448848724365, + "720": 2.203108787536621, + "721": 2.219628095626831, + "722": 2.2158589363098145, + "723": 2.2054171562194824, + "724": 2.203853130340576, + "725": 2.2004506587982178, + "726": 2.2177019119262695, + "727": 2.1910629272460938, + "728": 2.2068214416503906, + "729": 2.21448016166687, + "730": 2.216590642929077, + "731": 2.1941914558410645, + "732": 2.2182273864746094, + "733": 2.2038538455963135, + "734": 2.1972885131835938, + "735": 2.1980886459350586, + "736": 2.2136335372924805, + "737": 2.1935997009277344, + "738": 2.193983554840088, + "739": 2.2163987159729004, + "740": 2.2128028869628906, + "741": 2.184307813644409, + "742": 2.2243218421936035, + "743": 2.2232890129089355, + "744": 2.20253849029541, + "745": 2.2184159755706787, + "746": 2.1896615028381348, + "747": 2.211479663848877, + "748": 2.196040630340576, + "749": 2.158949613571167, + "750": 2.183164119720459, + "751": 2.1787428855895996, + "752": 2.1821770668029785, + "753": 2.183241367340088, + "754": 2.17989444732666, + "755": 2.1832149028778076 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 0.655513, + 0.649964, + 0.642516, + 0.635424, + 0.6281, + 0.62124, + 0.617688, + 0.613916, + 0.609147, + 0.603076, + 0.596576, + 0.589586, + 0.583168, + 0.577654, + 0.571595, + 0.566448, + 0.562007, + 0.556499, + 0.550614, + 0.54503, + 0.539184, + 0.532232, + 0.526202, + 0.518491, + 0.512968, + 0.507867, + 0.50252, + 0.496752, + 0.490717, + 0.483814, + 0.477857, + 0.471829, + 0.465587, + 0.459246, + 0.452995, + 0.446367, + 0.441061, + 0.435411, + 0.429968, + 0.423979, + 0.418357, + 0.411923, + 0.405456, + 0.399682, + 0.393561, + 0.387336, + 0.38078, + 0.37507, + 0.368863, + 0.362568, + 0.356507, + 0.350521, + 0.344329, + 0.338528 + ], + "train_epoch_time": 4.843467473983765, + "train_loss": 2.187423188990808, + "train_score": 0.3627488342641587, + "val_loss": 2.261908656557173, + "val_score": 0.34299207058757647 + }, + { + "epoch": 14, + "grad_norm": 0.1918720155954361, + "learning_rate": 0.33333333333333337, + "model_norm": 88.35152435302734, + "step_logs": { + "grad_norm": { + "756": 0.2120075225830078, + "757": 0.18507209420204163, + "758": 0.18970555067062378, + "759": 0.2302817851305008, + "760": 0.2818502187728882, + "761": 0.22272615134716034, + "762": 0.19985193014144897, + "763": 0.18835432827472687, + "764": 0.22415927052497864, + "765": 0.21204973757266998, + "766": 0.21221452951431274, + "767": 0.19515444338321686, + "768": 0.20536620914936066, + "769": 0.18919330835342407, + "770": 0.22594144940376282, + "771": 0.21108511090278625, + "772": 0.18369100987911224, + "773": 0.22985200583934784, + "774": 0.2018551528453827, + "775": 0.20708796381950378, + "776": 0.204896941781044, + "777": 0.20150117576122284, + "778": 0.20800389349460602, + "779": 0.19345587491989136, + "780": 0.1838405430316925, + "781": 0.18990516662597656, + "782": 0.19190816581249237, + "783": 0.20387667417526245, + "784": 0.1956193745136261, + "785": 0.1914726197719574, + "786": 0.1904798001050949, + "787": 0.20598207414150238, + "788": 0.2333458513021469, + "789": 0.20045232772827148, + "790": 0.1941618174314499, + "791": 0.18953397870063782, + "792": 0.18890880048274994, + "793": 0.1807178109884262, + "794": 0.18581968545913696, + "795": 0.18459971249103546, + "796": 0.19288456439971924, + "797": 0.17857621610164642, + "798": 0.1772775799036026, + "799": 0.18140459060668945, + "800": 0.15560737252235413, + "801": 0.186567023396492, + "802": 0.20617219805717468, + "803": 0.18182601034641266, + "804": 0.1799536645412445, + "805": 0.21246781945228577, + "806": 0.17745335400104523, + "807": 0.18768306076526642, + "808": 0.19940459728240967, + "809": 0.1918720155954361 + }, + "loss": { + "756": 2.1920931339263916, + "757": 2.1925957202911377, + "758": 2.1922717094421387, + "759": 2.1849136352539062, + "760": 2.167494773864746, + "761": 2.1916823387145996, + "762": 2.196556568145752, + "763": 2.166910171508789, + "764": 2.1777358055114746, + "765": 2.18515682220459, + "766": 2.2083334922790527, + "767": 2.1872642040252686, + "768": 2.2003586292266846, + "769": 2.1752116680145264, + "770": 2.1866278648376465, + "771": 2.1671388149261475, + "772": 2.189948081970215, + "773": 2.1878371238708496, + "774": 2.202288866043091, + "775": 2.184051275253296, + "776": 2.169708251953125, + "777": 2.1713132858276367, + "778": 2.1911211013793945, + "779": 2.1960315704345703, + "780": 2.193258047103882, + "781": 2.172991991043091, + "782": 2.1540231704711914, + "783": 2.1922898292541504, + "784": 2.1756553649902344, + "785": 2.1882693767547607, + "786": 2.159757614135742, + "787": 2.1898012161254883, + "788": 2.179352283477783, + "789": 2.194922924041748, + "790": 2.160473346710205, + "791": 2.19187068939209, + "792": 2.175341844558716, + "793": 2.1823716163635254, + "794": 2.200077533721924, + "795": 2.177055835723877, + "796": 2.1717052459716797, + "797": 2.1864142417907715, + "798": 2.1723575592041016, + "799": 2.1695990562438965, + "800": 2.1579627990722656, + "801": 2.1700949668884277, + "802": 2.1710829734802246, + "803": 2.1577982902526855, + "804": 2.158672332763672, + "805": 2.1799466609954834, + "806": 2.1822350025177, + "807": 2.158999443054199, + "808": 2.1973447799682617, + "809": 2.1833131313323975 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 0.332198, + 0.326327, + 0.320144, + 0.313617, + 0.306906, + 0.301437, + 0.2955, + 0.289436, + 0.283023, + 0.276986, + 0.270855, + 0.26482, + 0.258617, + 0.25256, + 0.246204, + 0.240146, + 0.234145, + 0.227767, + 0.221766, + 0.215592, + 0.209451, + 0.203316, + 0.197146, + 0.191047, + 0.184921, + 0.178747, + 0.172585, + 0.166404, + 0.160268, + 0.154122, + 0.147964, + 0.14178, + 0.135572, + 0.129476, + 0.123324, + 0.117171, + 0.11101, + 0.104856, + 0.098689, + 0.0925255, + 0.0863558, + 0.0802, + 0.0740344, + 0.0678663, + 0.061707, + 0.0555308, + 0.0493589, + 0.0431956, + 0.0370268, + 0.0308543, + 0.024687, + 0.0185157, + 0.0123443, + 0.00617252 + ], + "train_epoch_time": 4.843153238296509, + "train_loss": 2.174925650619879, + "train_score": 0.3647933105228621, + "val_loss": 2.250647404984957, + "val_score": 0.3442209032797239 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:37:14.167890", + "final_model_norm": 88.35152435302734, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:35:32.596938", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 1, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 1.9831418991088867, + "learning_rate": 1e-10, + "model_norm": 87.28739166259766, + "step_logs": { + "grad_norm": { + "0": 22.837112426757812, + "1": 22.75118637084961, + "2": 6.520443916320801, + "3": 8.1116943359375, + "4": 8.071544647216797, + "5": 5.051503658294678, + "6": 10.454682350158691, + "7": 3.2941524982452393, + "8": 7.0451741218566895, + "9": 3.3513622283935547, + "10": 7.286918640136719, + "11": 4.471482276916504, + "12": 17.447614669799805, + "13": 5.5175323486328125, + "14": 40.61209487915039, + "15": 6.867544174194336, + "16": 10.986544609069824, + "17": 9.684115409851074, + "18": 12.968350410461426, + "19": 4.472919464111328, + "20": 7.644439220428467, + "21": 5.183309555053711, + "22": 6.06050968170166, + "23": 13.362408638000488, + "24": 11.589578628540039, + "25": 5.267686367034912, + "26": 5.404995918273926, + "27": 14.979524612426758, + "28": 13.429210662841797, + "29": 6.378403663635254, + "30": 12.583794593811035, + "31": 5.356675148010254, + "32": 12.356690406799316, + "33": 10.635289192199707, + "34": 13.440580368041992, + "35": 16.028547286987305, + "36": 8.718912124633789, + "37": 5.2349419593811035, + "38": 16.089889526367188, + "39": 3.958259105682373, + "40": 15.132730484008789, + "41": 7.108010292053223, + "42": 8.280714988708496, + "43": 15.54705810546875, + "44": 7.04470157623291, + "45": 7.8422393798828125, + "46": 15.904803276062012, + "47": 3.8307065963745117, + "48": 8.149847030639648, + "49": 9.206463813781738, + "50": 4.509592533111572, + "51": 13.518691062927246, + "52": 3.18174409866333, + "53": 1.9831418991088867 + }, + "loss": { + "0": 4.5338897705078125, + "1": 4.527107238769531, + "2": 3.7987375259399414, + "3": 4.1109819412231445, + "4": 3.9307804107666016, + "5": 4.457658290863037, + "6": 3.9617631435394287, + "7": 3.9944658279418945, + "8": 4.307804107666016, + "9": 3.8398241996765137, + "10": 4.925821304321289, + "11": 4.240236282348633, + "12": 5.873170852661133, + "13": 6.514164924621582, + "14": 5.1177263259887695, + "15": 6.173100471496582, + "16": 3.955044984817505, + "17": 4.3252363204956055, + "18": 4.531990051269531, + "19": 4.689167022705078, + "20": 4.176158428192139, + "21": 5.519330024719238, + "22": 4.509194374084473, + "23": 5.555792331695557, + "24": 4.245941162109375, + "25": 3.864851951599121, + "26": 4.760091781616211, + "27": 6.997500419616699, + "28": 6.962763786315918, + "29": 6.041469097137451, + "30": 6.256098747253418, + "31": 6.706310272216797, + "32": 6.257039546966553, + "33": 6.636319160461426, + "34": 6.713165760040283, + "35": 7.423327922821045, + "36": 5.201717376708984, + "37": 4.588018417358398, + "38": 5.380327224731445, + "39": 4.172258377075195, + "40": 7.587206840515137, + "41": 7.414318084716797, + "42": 5.928915023803711, + "43": 8.181875228881836, + "44": 7.675028324127197, + "45": 6.547153949737549, + "46": 8.30854320526123, + "47": 6.58784294128418, + "48": 5.120253562927246, + "49": 5.55564546585083, + "50": 5.101111888885498, + "51": 7.125539779663086, + "52": 5.335868835449219, + "53": 4.103789806365967 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 1e-10, + 0.0093311, + 0.0326839, + 0.0405358, + 0.0481067, + 0.077747, + 0.0451921, + 0.117631, + 0.083257, + 0.142489, + 0.0962475, + 0.144862, + 0.0332416, + 0.161738, + 0.00607122, + 0.139794, + 0.0543936, + 0.0725561, + 0.0468772, + 0.209868, + 0.105301, + 0.207692, + 0.157592, + 0.0548153, + 0.0558641, + 0.178896, + 0.200332, + 0.0559123, + 0.0678596, + 0.196417, + 0.0698204, + 0.266508, + 0.0726545, + 0.0996299, + 0.0669997, + 0.0533815, + 0.114995, + 0.230527, + 0.0394101, + 0.316489, + 0.0611952, + 0.216137, + 0.143407, + 0.0627592, + 0.228863, + 0.17218, + 0.061312, + 0.459227, + 0.132843, + 0.115626, + 0.334076, + 0.0723383, + 0.513182, + 0.676053 + ], + "train_epoch_time": 4.845436096191406, + "train_loss": 4.499074915526074, + "train_score": 0.08541853481762036, + "val_loss": 4.512567051516335, + "val_score": 0.08252009189977438 + }, + { + "epoch": 1, + "grad_norm": 3.0589547157287598, + "learning_rate": 1.0, + "model_norm": 87.3234634399414, + "step_logs": { + "grad_norm": { + "54": 2.8141071796417236, + "55": 1.710223913192749, + "56": 3.732734203338623, + "57": 11.22106647491455, + "58": 2.0565707683563232, + "59": 3.3401100635528564, + "60": 7.947438716888428, + "61": 2.3921890258789062, + "62": 7.051760673522949, + "63": 3.568357467651367, + "64": 5.318252086639404, + "65": 9.018369674682617, + "66": 1.1958746910095215, + "67": 12.429364204406738, + "68": 0.9566407203674316, + "69": 7.411942958831787, + "70": 1.5008424520492554, + "71": 16.9825439453125, + "72": 3.371229648590088, + "73": 0.5242273211479187, + "74": 4.539724349975586, + "75": 5.06485652923584, + "76": 3.1363143920898438, + "77": 14.291738510131836, + "78": 2.0122501850128174, + "79": 6.145211696624756, + "80": 10.80793285369873, + "81": 1.067740559577942, + "82": 1.3276876211166382, + "83": 11.108071327209473, + "84": 3.60037899017334, + "85": 6.396505355834961, + "86": 2.36037278175354, + "87": 9.8271484375, + "88": 2.612766742706299, + "89": 1.112652063369751, + "90": 3.7345943450927734, + "91": 2.317619562149048, + "92": 2.6894021034240723, + "93": 7.344915390014648, + "94": 0.9760797619819641, + "95": 11.726158142089844, + "96": 1.5686811208724976, + "97": 9.840619087219238, + "98": 1.9007809162139893, + "99": 1.3968459367752075, + "100": 1.2916523218154907, + "101": 1.4621291160583496, + "102": 3.6534433364868164, + "103": 1.9408931732177734, + "104": 1.395183801651001, + "105": 4.99188232421875, + "106": 1.0339820384979248, + "107": 3.0589547157287598 + }, + "loss": { + "54": 4.496552467346191, + "55": 4.283427715301514, + "56": 4.070971488952637, + "57": 6.417243957519531, + "58": 4.241355895996094, + "59": 4.122255802154541, + "60": 5.691153049468994, + "61": 4.3569231033325195, + "62": 4.7263503074646, + "63": 4.9553303718566895, + "64": 4.079760551452637, + "65": 6.059639930725098, + "66": 3.8306164741516113, + "67": 6.762326717376709, + "68": 3.535759925842285, + "69": 4.7031965255737305, + "70": 3.6029560565948486, + "71": 9.066278457641602, + "72": 4.146097183227539, + "73": 3.396852493286133, + "74": 4.054537296295166, + "75": 4.677854537963867, + "76": 3.845867872238159, + "77": 6.700932502746582, + "78": 3.9027583599090576, + "79": 5.309556007385254, + "80": 5.890167236328125, + "81": 3.59029221534729, + "82": 3.580516815185547, + "83": 7.183401584625244, + "84": 4.102786064147949, + "85": 4.321220397949219, + "86": 4.31319522857666, + "87": 4.902420997619629, + "88": 4.417163372039795, + "89": 3.501753091812134, + "90": 4.025180816650391, + "91": 4.4595208168029785, + "92": 3.8459949493408203, + "93": 4.913617134094238, + "94": 3.500107765197754, + "95": 6.503312587738037, + "96": 3.8549532890319824, + "97": 4.363958358764648, + "98": 3.8823580741882324, + "99": 3.9704980850219727, + "100": 3.6405396461486816, + "101": 3.5652427673339844, + "102": 3.963061809539795, + "103": 4.422133445739746, + "104": 3.6809685230255127, + "105": 3.531747341156006, + "106": 3.408315658569336, + "107": 4.0542521476745605 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.531749, + 0.745481, + 0.368827, + 0.0925029, + 0.667289, + 0.424956, + 0.152692, + 0.603602, + 0.159728, + 0.437676, + 0.223896, + 0.129687, + 0.842695, + 0.0804973, + 0.885414, + 0.146191, + 0.76185, + 0.0591525, + 0.421836, + 0.961121, + 0.282367, + 0.267241, + 0.438821, + 0.0615737, + 0.658434, + 0.219481, + 0.0916103, + 0.862983, + 0.802466, + 0.104292, + 0.387635, + 0.174391, + 0.607589, + 0.0921701, + 0.564102, + 0.849785, + 0.365966, + 0.624128, + 0.515381, + 0.154092, + 0.880204, + 0.0864173, + 0.758053, + 0.0826776, + 0.682451, + 0.802755, + 0.813579, + 0.769341, + 0.372577, + 0.701295, + 0.790885, + 0.220856, + 0.864424, + 0.464253 + ], + "train_epoch_time": 4.841185808181763, + "train_loss": 3.364718777548463, + "train_score": 0.08575031382794701, + "val_loss": 3.3792267357577686, + "val_score": 0.08293269226492204 + }, + { + "epoch": 2, + "grad_norm": 0.6052566766738892, + "learning_rate": 1.0, + "model_norm": 87.44818878173828, + "step_logs": { + "grad_norm": { + "108": 0.7990756630897522, + "109": 1.3617959022521973, + "110": 1.1489999294281006, + "111": 3.39947247505188, + "112": 1.0340996980667114, + "113": 0.617216944694519, + "114": 3.1970701217651367, + "115": 1.1757283210754395, + "116": 3.6301863193511963, + "117": 1.1995385885238647, + "118": 2.378713369369507, + "119": 1.1363970041275024, + "120": 2.594503164291382, + "121": 0.8914473056793213, + "122": 0.9866601228713989, + "123": 2.352792263031006, + "124": 0.8244664669036865, + "125": 1.143155574798584, + "126": 0.9805446863174438, + "127": 1.0797122716903687, + "128": 2.487576484680176, + "129": 1.175310492515564, + "130": 2.6633682250976562, + "131": 1.119796872138977, + "132": 1.503003716468811, + "133": 1.1181613206863403, + "134": 0.870795488357544, + "135": 0.6408287882804871, + "136": 0.4828634262084961, + "137": 1.3867980241775513, + "138": 1.8460670709609985, + "139": 0.9291976690292358, + "140": 1.1664748191833496, + "141": 1.8698469400405884, + "142": 0.9238661527633667, + "143": 2.2968530654907227, + "144": 1.0769011974334717, + "145": 0.884735643863678, + "146": 1.1556028127670288, + "147": 2.6860485076904297, + "148": 1.066886305809021, + "149": 0.6632800698280334, + "150": 1.0318211317062378, + "151": 2.3003132343292236, + "152": 1.0402331352233887, + "153": 0.9450657367706299, + "154": 0.9584382176399231, + "155": 0.5830466747283936, + "156": 0.6201723217964172, + "157": 0.9507369995117188, + "158": 0.9885178208351135, + "159": 0.5497141480445862, + "160": 0.4122600853443146, + "161": 0.6052566766738892 + }, + "loss": { + "108": 3.3414113521575928, + "109": 3.5784389972686768, + "110": 3.518871307373047, + "111": 3.5433592796325684, + "112": 3.6816158294677734, + "113": 3.349371910095215, + "114": 3.5908660888671875, + "115": 3.424107074737549, + "116": 3.5753965377807617, + "117": 3.6244006156921387, + "118": 3.7597837448120117, + "119": 3.2557806968688965, + "120": 3.5738797187805176, + "121": 3.4056382179260254, + "122": 3.288278579711914, + "123": 3.3868770599365234, + "124": 3.456759452819824, + "125": 3.249878406524658, + "126": 3.1945924758911133, + "127": 3.185201644897461, + "128": 3.378434181213379, + "129": 3.424539804458618, + "130": 3.3112215995788574, + "131": 3.3461813926696777, + "132": 3.216909646987915, + "133": 3.3869524002075195, + "134": 3.165609359741211, + "135": 3.143808364868164, + "136": 2.9743454456329346, + "137": 3.0846259593963623, + "138": 3.309664011001587, + "139": 3.4011752605438232, + "140": 3.1722803115844727, + "141": 3.2864184379577637, + "142": 3.1983742713928223, + "143": 3.104551315307617, + "144": 3.2795639038085938, + "145": 3.118144989013672, + "146": 3.100399971008301, + "147": 3.398005247116089, + "148": 3.237244129180908, + "149": 2.979027032852173, + "150": 3.002187728881836, + "151": 3.1980438232421875, + "152": 3.3180489540100098, + "153": 3.041016101837158, + "154": 3.1343166828155518, + "155": 2.90212345123291, + "156": 2.891636610031128, + "157": 2.897037982940674, + "158": 3.0314197540283203, + "159": 2.9041519165039062, + "160": 2.7970480918884277, + "161": 2.820652484893799 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 0.912786, + 0.794206, + 0.842042, + 0.380125, + 0.873187, + 0.94619, + 0.412672, + 0.832048, + 0.351753, + 0.834376, + 0.570622, + 0.834499, + 0.514998, + 0.895519, + 0.871061, + 0.550292, + 0.910481, + 0.832602, + 0.8692, + 0.845309, + 0.521971, + 0.832165, + 0.482827, + 0.842198, + 0.740129, + 0.844185, + 0.893041, + 0.938691, + 0.962284, + 0.762346, + 0.660131, + 0.887368, + 0.82341, + 0.652769, + 0.882276, + 0.540644, + 0.849755, + 0.888481, + 0.8228, + 0.485053, + 0.850481, + 0.931238, + 0.849392, + 0.547257, + 0.859801, + 0.871953, + 0.87219, + 0.944672, + 0.937643, + 0.865049, + 0.861198, + 0.950546, + 0.970514, + 0.939022 + ], + "train_epoch_time": 4.844262361526489, + "train_loss": 2.898751284026005, + "train_score": 0.16185998026832105, + "val_loss": 2.906747388511246, + "val_score": 0.1593893511492404 + }, + { + "epoch": 3, + "grad_norm": 0.5803899168968201, + "learning_rate": 1.0, + "model_norm": 87.54922485351562, + "step_logs": { + "grad_norm": { + "162": 0.7100263833999634, + "163": 0.9402167201042175, + "164": 1.0552407503128052, + "165": 0.5816885828971863, + "166": 0.536823034286499, + "167": 0.5009744763374329, + "168": 0.668644905090332, + "169": 0.7584171891212463, + "170": 0.7275749444961548, + "171": 0.6691548228263855, + "172": 0.9205804467201233, + "173": 0.7444524765014648, + "174": 0.6788049340248108, + "175": 0.5134831070899963, + "176": 0.5923011302947998, + "177": 0.9358205795288086, + "178": 1.0203570127487183, + "179": 0.7821270227432251, + "180": 0.5423383712768555, + "181": 0.7544683814048767, + "182": 0.6733540296554565, + "183": 0.745196521282196, + "184": 0.7250047326087952, + "185": 0.8175691962242126, + "186": 0.6404562592506409, + "187": 0.5733717679977417, + "188": 0.8767086863517761, + "189": 1.206737756729126, + "190": 0.707180380821228, + "191": 0.5744861960411072, + "192": 0.5868545770645142, + "193": 0.5949322581291199, + "194": 0.7133055329322815, + "195": 0.6171315312385559, + "196": 0.5165334343910217, + "197": 0.7953967452049255, + "198": 1.1480768918991089, + "199": 0.7780097126960754, + "200": 0.7076628804206848, + "201": 0.5793918371200562, + "202": 0.5321841835975647, + "203": 0.7195229530334473, + "204": 0.8034334182739258, + "205": 0.8028842806816101, + "206": 0.8330018520355225, + "207": 0.7464473247528076, + "208": 0.6742626428604126, + "209": 0.817547619342804, + "210": 0.6382176876068115, + "211": 0.36893272399902344, + "212": 0.5235304236412048, + "213": 0.8875744342803955, + "214": 0.8481656908988953, + "215": 0.5803899168968201 + }, + "loss": { + "162": 2.8880667686462402, + "163": 2.873842239379883, + "164": 3.054826259613037, + "165": 2.973947286605835, + "166": 2.804098606109619, + "167": 2.7878613471984863, + "168": 2.8069705963134766, + "169": 2.899956464767456, + "170": 2.86919903755188, + "171": 2.824172019958496, + "172": 2.8239760398864746, + "173": 2.9311931133270264, + "174": 2.7902462482452393, + "175": 2.810206890106201, + "176": 2.7419300079345703, + "177": 2.8503379821777344, + "178": 2.9393467903137207, + "179": 2.9799861907958984, + "180": 2.7808358669281006, + "181": 2.8034653663635254, + "182": 2.834273338317871, + "183": 2.7865006923675537, + "184": 2.8316876888275146, + "185": 2.8238914012908936, + "186": 2.841508388519287, + "187": 2.7296905517578125, + "188": 2.809426784515381, + "189": 2.9049742221832275, + "190": 2.9583373069763184, + "191": 2.7423365116119385, + "192": 2.7391369342803955, + "193": 2.7513647079467773, + "194": 2.744466781616211, + "195": 2.8149805068969727, + "196": 2.7198293209075928, + "197": 2.738754987716675, + "198": 2.8643856048583984, + "199": 2.9525084495544434, + "200": 2.76503586769104, + "201": 2.739008665084839, + "202": 2.709150791168213, + "203": 2.707935333251953, + "204": 2.807283878326416, + "205": 2.77246356010437, + "206": 2.80674409866333, + "207": 2.7761611938476562, + "208": 2.782247543334961, + "209": 2.7127556800842285, + "210": 2.8310041427612305, + "211": 2.665283441543579, + "212": 2.6493804454803467, + "213": 2.7252862453460693, + "214": 2.8497817516326904, + "215": 2.722008228302002 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 0.919727, + 0.8667, + 0.845839, + 0.946174, + 0.951126, + 0.956927, + 0.926236, + 0.909775, + 0.915542, + 0.926549, + 0.869528, + 0.913629, + 0.923729, + 0.95519, + 0.939873, + 0.866834, + 0.849544, + 0.906916, + 0.949771, + 0.907835, + 0.925938, + 0.909385, + 0.91507, + 0.894174, + 0.932682, + 0.943202, + 0.879668, + 0.799589, + 0.922063, + 0.943241, + 0.940852, + 0.939566, + 0.915167, + 0.936639, + 0.953245, + 0.896458, + 0.812955, + 0.907025, + 0.916963, + 0.942258, + 0.950326, + 0.912749, + 0.896885, + 0.895853, + 0.889987, + 0.908801, + 0.924469, + 0.890319, + 0.932889, + 0.975102, + 0.950818, + 0.873719, + 0.887928, + 0.94173 + ], + "train_epoch_time": 4.842041015625, + "train_loss": 2.699241478439725, + "train_score": 0.24656451761808082, + "val_loss": 2.7276196362093588, + "val_score": 0.2379134975707353 + }, + { + "epoch": 4, + "grad_norm": 0.5657550692558289, + "learning_rate": 1.0, + "model_norm": 87.64610290527344, + "step_logs": { + "grad_norm": { + "216": 0.5650863647460938, + "217": 0.8314638137817383, + "218": 0.7408836483955383, + "219": 0.4571698009967804, + "220": 0.5305669903755188, + "221": 0.7171705365180969, + "222": 0.7380601763725281, + "223": 0.6865860819816589, + "224": 0.6067739129066467, + "225": 0.4946342408657074, + "226": 0.6058553457260132, + "227": 0.9833201169967651, + "228": 0.6633129715919495, + "229": 0.4711220860481262, + "230": 0.5343348383903503, + "231": 0.5930265188217163, + "232": 0.56217360496521, + "233": 0.5278383493423462, + "234": 0.6075369119644165, + "235": 0.7591191530227661, + "236": 0.79591304063797, + "237": 0.680607795715332, + "238": 0.7248187065124512, + "239": 0.828799843788147, + "240": 0.5097420811653137, + "241": 0.4332064688205719, + "242": 0.6931511163711548, + "243": 0.6310138702392578, + "244": 0.4703831076622009, + "245": 0.6393974423408508, + "246": 0.9215507507324219, + "247": 0.6482958793640137, + "248": 0.3930441439151764, + "249": 0.41156721115112305, + "250": 0.5275195240974426, + "251": 0.6965681314468384, + "252": 0.8604452013969421, + "253": 0.7343969941139221, + "254": 0.8054675459861755, + "255": 0.7035790681838989, + "256": 0.46521276235580444, + "257": 0.5560733675956726, + "258": 0.8042013049125671, + "259": 0.6615998148918152, + "260": 0.4059397876262665, + "261": 0.4841102361679077, + "262": 0.7663136124610901, + "263": 0.8003219366073608, + "264": 0.5811874270439148, + "265": 0.5593263506889343, + "266": 0.673582911491394, + "267": 0.9121878743171692, + "268": 0.81010901927948, + "269": 0.5657550692558289 + }, + "loss": { + "216": 2.6997551918029785, + "217": 2.6929354667663574, + "218": 2.8422014713287354, + "219": 2.6970372200012207, + "220": 2.6564443111419678, + "221": 2.725572109222412, + "222": 2.7633790969848633, + "223": 2.714470863342285, + "224": 2.7507214546203613, + "225": 2.6631269454956055, + "226": 2.7074687480926514, + "227": 2.7278785705566406, + "228": 2.8227436542510986, + "229": 2.677269458770752, + "230": 2.6587533950805664, + "231": 2.6636836528778076, + "232": 2.6790342330932617, + "233": 2.677955150604248, + "234": 2.637813091278076, + "235": 2.720576286315918, + "236": 2.733254909515381, + "237": 2.7667438983917236, + "238": 2.6778652667999268, + "239": 2.7923903465270996, + "240": 2.699666976928711, + "241": 2.63845157623291, + "242": 2.6659626960754395, + "243": 2.752375602722168, + "244": 2.64259672164917, + "245": 2.659594774246216, + "246": 2.7044730186462402, + "247": 2.778343677520752, + "248": 2.6148858070373535, + "249": 2.5854883193969727, + "250": 2.6170525550842285, + "251": 2.6342358589172363, + "252": 2.7025701999664307, + "253": 2.7219066619873047, + "254": 2.6559152603149414, + "255": 2.7618870735168457, + "256": 2.6095192432403564, + "257": 2.615939140319824, + "258": 2.6630914211273193, + "259": 2.7261133193969727, + "260": 2.5777173042297363, + "261": 2.5941927433013916, + "262": 2.618781566619873, + "263": 2.7158823013305664, + "264": 2.6539714336395264, + "265": 2.579207420349121, + "266": 2.622006416320801, + "267": 2.686002016067505, + "268": 2.6722254753112793, + "269": 2.6395888328552246 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 0.944163, + 0.886242, + 0.91194, + 0.962698, + 0.949682, + 0.913782, + 0.91028, + 0.920106, + 0.937275, + 0.956082, + 0.936517, + 0.849452, + 0.927699, + 0.960198, + 0.949043, + 0.938074, + 0.944301, + 0.950552, + 0.934611, + 0.904234, + 0.896151, + 0.922753, + 0.910669, + 0.890475, + 0.954086, + 0.965657, + 0.917339, + 0.932546, + 0.959818, + 0.928626, + 0.864297, + 0.929682, + 0.971308, + 0.968282, + 0.949518, + 0.91567, + 0.879527, + 0.909857, + 0.891156, + 0.917754, + 0.960183, + 0.944196, + 0.891721, + 0.925684, + 0.969026, + 0.956782, + 0.899183, + 0.894518, + 0.940171, + 0.94282, + 0.920369, + 0.865881, + 0.890634, + 0.942835 + ], + "train_epoch_time": 4.8414764404296875, + "train_loss": 2.5999833176775677, + "train_score": 0.2584850249964332, + "val_loss": 2.6212340383934785, + "val_score": 0.2511660449814167 + }, + { + "epoch": 5, + "grad_norm": 0.7623428702354431, + "learning_rate": 1.0, + "model_norm": 87.73600769042969, + "step_logs": { + "grad_norm": { + "270": 0.5762381553649902, + "271": 0.7667810916900635, + "272": 0.725177526473999, + "273": 0.6380300521850586, + "274": 0.6508094072341919, + "275": 0.6889649033546448, + "276": 0.8186596632003784, + "277": 0.809810221195221, + "278": 0.4971579313278198, + "279": 0.4767281711101532, + "280": 0.7546098232269287, + "281": 0.7490273118019104, + "282": 0.5387448072433472, + "283": 0.6713287234306335, + "284": 0.8918309807777405, + "285": 0.7312997579574585, + "286": 0.5076626539230347, + "287": 0.5812913775444031, + "288": 0.8608074188232422, + "289": 0.8124175667762756, + "290": 0.6171245574951172, + "291": 0.7660766243934631, + "292": 0.8741928339004517, + "293": 0.7114712595939636, + "294": 0.6230983138084412, + "295": 0.6871888637542725, + "296": 0.8769329190254211, + "297": 0.9192678928375244, + "298": 0.6888883113861084, + "299": 0.5806081891059875, + "300": 0.5481948852539062, + "301": 0.6057829856872559, + "302": 0.8242425322532654, + "303": 0.7228921055793762, + "304": 0.6174896359443665, + "305": 0.9535212516784668, + "306": 0.8166515231132507, + "307": 0.6627100110054016, + "308": 0.760825514793396, + "309": 0.6396526098251343, + "310": 0.4781973958015442, + "311": 0.602472722530365, + "312": 0.7694366574287415, + "313": 0.8572793006896973, + "314": 0.6807762980461121, + "315": 0.7034918069839478, + "316": 0.7197319269180298, + "317": 0.6302210092544556, + "318": 0.7197474241256714, + "319": 0.6211883425712585, + "320": 0.5064917206764221, + "321": 0.5672907829284668, + "322": 0.6895321011543274, + "323": 0.7623428702354431 + }, + "loss": { + "270": 2.5940680503845215, + "271": 2.6578333377838135, + "272": 2.671351909637451, + "273": 2.6633901596069336, + "274": 2.593959331512451, + "275": 2.6608104705810547, + "276": 2.6063003540039062, + "277": 2.735334873199463, + "278": 2.594287395477295, + "279": 2.56589412689209, + "280": 2.573725700378418, + "281": 2.7140870094299316, + "282": 2.5862767696380615, + "283": 2.6108508110046387, + "284": 2.621504068374634, + "285": 2.686147689819336, + "286": 2.5590689182281494, + "287": 2.5385873317718506, + "288": 2.5845212936401367, + "289": 2.6836318969726562, + "290": 2.601633071899414, + "291": 2.5854430198669434, + "292": 2.6249122619628906, + "293": 2.628178596496582, + "294": 2.538827896118164, + "295": 2.6066033840179443, + "296": 2.6027989387512207, + "297": 2.687753677368164, + "298": 2.6585183143615723, + "299": 2.5458569526672363, + "300": 2.54611873626709, + "301": 2.544062376022339, + "302": 2.5724222660064697, + "303": 2.649348258972168, + "304": 2.5425119400024414, + "305": 2.5965559482574463, + "306": 2.6575493812561035, + "307": 2.6183481216430664, + "308": 2.553880214691162, + "309": 2.6155781745910645, + "310": 2.5026512145996094, + "311": 2.5396928787231445, + "312": 2.547732353210449, + "313": 2.6292998790740967, + "314": 2.5586540699005127, + "315": 2.553309202194214, + "316": 2.6014316082000732, + "317": 2.5686278343200684, + "318": 2.547663927078247, + "319": 2.594808578491211, + "320": 2.513535261154175, + "321": 2.5114336013793945, + "322": 2.554598569869995, + "323": 2.567309856414795 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 0.939848, + 0.900408, + 0.91039, + 0.929004, + 0.92452, + 0.918108, + 0.886074, + 0.892957, + 0.954529, + 0.957592, + 0.900394, + 0.906325, + 0.946869, + 0.920548, + 0.868282, + 0.909465, + 0.95206, + 0.9376, + 0.874622, + 0.890494, + 0.931799, + 0.898073, + 0.872928, + 0.912158, + 0.928968, + 0.916941, + 0.871287, + 0.864152, + 0.918059, + 0.937904, + 0.944274, + 0.932728, + 0.883353, + 0.91023, + 0.930247, + 0.851007, + 0.888513, + 0.922623, + 0.898207, + 0.927459, + 0.95631, + 0.933306, + 0.895906, + 0.87738, + 0.916955, + 0.911649, + 0.909452, + 0.928235, + 0.907714, + 0.930791, + 0.951447, + 0.939787, + 0.914864, + 0.898323 + ], + "train_epoch_time": 4.842908143997192, + "train_loss": 2.5616736331321244, + "train_score": 0.28084312235265757, + "val_loss": 2.591733685590917, + "val_score": 0.2663201417016928 + }, + { + "epoch": 6, + "grad_norm": 0.544720470905304, + "learning_rate": 1.0, + "model_norm": 87.81867980957031, + "step_logs": { + "grad_norm": { + "324": 0.6881338357925415, + "325": 0.7093273997306824, + "326": 0.821570098400116, + "327": 0.7350651621818542, + "328": 0.6052749752998352, + "329": 0.6674264669418335, + "330": 0.7156462669372559, + "331": 0.6024705767631531, + "332": 0.5499410629272461, + "333": 0.6008399724960327, + "334": 0.6784842014312744, + "335": 0.6539338231086731, + "336": 0.6002705693244934, + "337": 0.7022740244865417, + "338": 0.7502356171607971, + "339": 0.7172045707702637, + "340": 0.5320237278938293, + "341": 0.5824774503707886, + "342": 0.6027033925056458, + "343": 0.6100599765777588, + "344": 0.7220253348350525, + "345": 0.711876630783081, + "346": 0.6622322201728821, + "347": 0.7625877261161804, + "348": 0.7873712182044983, + "349": 0.8393401503562927, + "350": 0.679058849811554, + "351": 0.6693992614746094, + "352": 0.6074686646461487, + "353": 0.542557954788208, + "354": 0.627860426902771, + "355": 0.654497504234314, + "356": 0.6317742466926575, + "357": 0.6605423092842102, + "358": 0.5392128825187683, + "359": 0.519406259059906, + "360": 0.8243221640586853, + "361": 0.6306591629981995, + "362": 0.5197187662124634, + "363": 0.5710395574569702, + "364": 0.6349229216575623, + "365": 0.6464499831199646, + "366": 0.6011934280395508, + "367": 0.5506454110145569, + "368": 0.544942319393158, + "369": 0.5747122168540955, + "370": 0.6067695617675781, + "371": 0.6816163063049316, + "372": 0.6515807509422302, + "373": 0.5940066576004028, + "374": 0.5205538868904114, + "375": 0.4759959280490875, + "376": 0.4977388083934784, + "377": 0.544720470905304 + }, + "loss": { + "324": 2.5615949630737305, + "325": 2.5503807067871094, + "326": 2.572110176086426, + "327": 2.6042826175689697, + "328": 2.529184579849243, + "329": 2.5294673442840576, + "330": 2.552457332611084, + "331": 2.5794436931610107, + "332": 2.4881858825683594, + "333": 2.5058255195617676, + "334": 2.5222389698028564, + "335": 2.52669095993042, + "336": 2.494293689727783, + "337": 2.5254476070404053, + "338": 2.5520334243774414, + "339": 2.5825424194335938, + "340": 2.514739990234375, + "341": 2.495880365371704, + "342": 2.5257925987243652, + "343": 2.5114736557006836, + "344": 2.5242955684661865, + "345": 2.564255714416504, + "346": 2.525627613067627, + "347": 2.5470619201660156, + "348": 2.5338234901428223, + "349": 2.5905356407165527, + "350": 2.522367000579834, + "351": 2.535236358642578, + "352": 2.5280518531799316, + "353": 2.470916509628296, + "354": 2.51663875579834, + "355": 2.5265355110168457, + "356": 2.4779067039489746, + "357": 2.5526695251464844, + "358": 2.495434045791626, + "359": 2.495894193649292, + "360": 2.505380630493164, + "361": 2.5761022567749023, + "362": 2.4901740550994873, + "363": 2.5038435459136963, + "364": 2.49552845954895, + "365": 2.5320253372192383, + "366": 2.5038163661956787, + "367": 2.496917724609375, + "368": 2.4635276794433594, + "369": 2.4992551803588867, + "370": 2.4868030548095703, + "371": 2.515239953994751, + "372": 2.5180130004882812, + "373": 2.4773757457733154, + "374": 2.4581429958343506, + "375": 2.474637985229492, + "376": 2.4543020725250244, + "377": 2.4802401065826416 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 0.915392, + 0.910215, + 0.884009, + 0.906013, + 0.932465, + 0.919072, + 0.908822, + 0.934267, + 0.942708, + 0.932806, + 0.916375, + 0.92198, + 0.932636, + 0.911042, + 0.900677, + 0.909431, + 0.94672, + 0.936358, + 0.932916, + 0.931017, + 0.906404, + 0.910072, + 0.920115, + 0.897538, + 0.890999, + 0.880302, + 0.916249, + 0.918802, + 0.93198, + 0.943782, + 0.927368, + 0.921851, + 0.925464, + 0.921266, + 0.94495, + 0.948726, + 0.880584, + 0.928336, + 0.948555, + 0.938864, + 0.925266, + 0.923768, + 0.932682, + 0.942759, + 0.943154, + 0.938017, + 0.931077, + 0.915452, + 0.922251, + 0.933521, + 0.947761, + 0.956225, + 0.951954, + 0.943559 + ], + "train_epoch_time": 4.844611406326294, + "train_loss": 2.475346781978306, + "train_score": 0.30142911581945214, + "val_loss": 2.5075279111566555, + "val_score": 0.2918654919423411 + }, + { + "epoch": 7, + "grad_norm": 0.58362877368927, + "learning_rate": 1.0, + "model_norm": 87.90218353271484, + "step_logs": { + "grad_norm": { + "378": 0.568577766418457, + "379": 0.6245736479759216, + "380": 0.5861882567405701, + "381": 0.4929637610912323, + "382": 0.6263610124588013, + "383": 0.6466138362884521, + "384": 0.5652489066123962, + "385": 0.6791654825210571, + "386": 0.6533142924308777, + "387": 0.5913721919059753, + "388": 0.7104600071907043, + "389": 0.8303994536399841, + "390": 0.7594033479690552, + "391": 0.5994577407836914, + "392": 0.5189815163612366, + "393": 0.5038019418716431, + "394": 0.5772358179092407, + "395": 0.5958951711654663, + "396": 0.6662114858627319, + "397": 0.6174487471580505, + "398": 0.508374810218811, + "399": 0.591770589351654, + "400": 0.5781763195991516, + "401": 0.5616276264190674, + "402": 0.6043291687965393, + "403": 0.5351585149765015, + "404": 0.5233591794967651, + "405": 0.5638923645019531, + "406": 0.6078712940216064, + "407": 0.5717411041259766, + "408": 0.5401195287704468, + "409": 0.4989900588989258, + "410": 0.5397713780403137, + "411": 0.5587407350540161, + "412": 0.5320559144020081, + "413": 0.6947481632232666, + "414": 0.718398928642273, + "415": 0.8130577206611633, + "416": 1.1151018142700195, + "417": 1.0088691711425781, + "418": 0.5021718144416809, + "419": 0.40648096799850464, + "420": 0.44817137718200684, + "421": 0.520017147064209, + "422": 0.5725849270820618, + "423": 0.6407066583633423, + "424": 0.6596114039421082, + "425": 0.6369249224662781, + "426": 0.6807340383529663, + "427": 0.5690308213233948, + "428": 0.5339997410774231, + "429": 0.5982414484024048, + "430": 0.649864673614502, + "431": 0.58362877368927 + }, + "loss": { + "378": 2.500174045562744, + "379": 2.473900318145752, + "380": 2.5107316970825195, + "381": 2.4709434509277344, + "382": 2.478297233581543, + "383": 2.5236899852752686, + "384": 2.473050355911255, + "385": 2.4877266883850098, + "386": 2.5103933811187744, + "387": 2.500427484512329, + "388": 2.4669265747070312, + "389": 2.559154510498047, + "390": 2.4983279705047607, + "391": 2.503368854522705, + "392": 2.472099542617798, + "393": 2.4667510986328125, + "394": 2.4647960662841797, + "395": 2.48909330368042, + "396": 2.4767849445343018, + "397": 2.509556770324707, + "398": 2.4718313217163086, + "399": 2.4844582080841064, + "400": 2.472041606903076, + "401": 2.463960647583008, + "402": 2.442333221435547, + "403": 2.45332670211792, + "404": 2.434847831726074, + "405": 2.4583888053894043, + "406": 2.430128574371338, + "407": 2.4709701538085938, + "408": 2.4280524253845215, + "409": 2.452427864074707, + "410": 2.43495774269104, + "411": 2.471374988555908, + "412": 2.4536819458007812, + "413": 2.456254243850708, + "414": 2.5245273113250732, + "415": 2.4855000972747803, + "416": 2.544663190841675, + "417": 2.5951011180877686, + "418": 2.5055694580078125, + "419": 2.433615207672119, + "420": 2.4409799575805664, + "421": 2.432000160217285, + "422": 2.4715933799743652, + "423": 2.467902183532715, + "424": 2.4839553833007812, + "425": 2.494706153869629, + "426": 2.4653239250183105, + "427": 2.4996910095214844, + "428": 2.415006399154663, + "429": 2.4614522457122803, + "430": 2.441516876220703, + "431": 2.483922243118286 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 0.939274, + 0.92692, + 0.935953, + 0.953131, + 0.926653, + 0.9235, + 0.939322, + 0.915157, + 0.92165, + 0.934639, + 0.907191, + 0.881271, + 0.896527, + 0.933033, + 0.948338, + 0.95107, + 0.936687, + 0.93342, + 0.917768, + 0.929404, + 0.950319, + 0.934163, + 0.936668, + 0.939843, + 0.930434, + 0.94485, + 0.946748, + 0.939257, + 0.929345, + 0.937958, + 0.94333, + 0.951688, + 0.94355, + 0.940591, + 0.945461, + 0.910536, + 0.907263, + 0.882625, + 0.803648, + 0.836048, + 0.952088, + 0.967168, + 0.960483, + 0.947332, + 0.937801, + 0.923217, + 0.919473, + 0.924807, + 0.914091, + 0.939172, + 0.944253, + 0.932227, + 0.920397, + 0.935834 + ], + "train_epoch_time": 4.844684600830078, + "train_loss": 2.4265488082743443, + "train_score": 0.2991156294150879, + "val_loss": 2.454537824703002, + "val_score": 0.2879996051676101 + }, + { + "epoch": 8, + "grad_norm": 0.5858098268508911, + "learning_rate": 1.0, + "model_norm": 87.97018432617188, + "step_logs": { + "grad_norm": { + "432": 0.5174282193183899, + "433": 0.5653510689735413, + "434": 0.6048844456672668, + "435": 0.6042084097862244, + "436": 0.6482882499694824, + "437": 0.5685992240905762, + "438": 0.4903128147125244, + "439": 0.5669147372245789, + "440": 0.6930384039878845, + "441": 0.6139175891876221, + "442": 0.4596112072467804, + "443": 0.4412894546985626, + "444": 0.4856876730918884, + "445": 0.5192708969116211, + "446": 0.6828057765960693, + "447": 0.6767101287841797, + "448": 0.6061641573905945, + "449": 0.5818963050842285, + "450": 0.5864323377609253, + "451": 0.5374137759208679, + "452": 0.48834657669067383, + "453": 0.5185680389404297, + "454": 0.5447973012924194, + "455": 0.5373228788375854, + "456": 0.5501261353492737, + "457": 0.7204861044883728, + "458": 0.7994251847267151, + "459": 0.6163647174835205, + "460": 0.5685983300209045, + "461": 0.5511287450790405, + "462": 0.5754255652427673, + "463": 0.6026052832603455, + "464": 0.6120621562004089, + "465": 0.5343873500823975, + "466": 0.44022950530052185, + "467": 0.4660203456878662, + "468": 0.5254347920417786, + "469": 0.5624760389328003, + "470": 0.5719310641288757, + "471": 0.5804353952407837, + "472": 0.5198171138763428, + "473": 0.4791575074195862, + "474": 0.5389455556869507, + "475": 0.5344741344451904, + "476": 0.48459771275520325, + "477": 0.48139262199401855, + "478": 0.5401302576065063, + "479": 0.5814175009727478, + "480": 0.5682058930397034, + "481": 0.5351996421813965, + "482": 0.5071942210197449, + "483": 0.5557736158370972, + "484": 0.6169612407684326, + "485": 0.5858098268508911 + }, + "loss": { + "432": 2.432744264602661, + "433": 2.4534616470336914, + "434": 2.44284725189209, + "435": 2.4656524658203125, + "436": 2.4371800422668457, + "437": 2.481755495071411, + "438": 2.406954288482666, + "439": 2.4203262329101562, + "440": 2.459423065185547, + "441": 2.4922173023223877, + "442": 2.4211604595184326, + "443": 2.3916897773742676, + "444": 2.4236063957214355, + "445": 2.4038991928100586, + "446": 2.4305214881896973, + "447": 2.4868862628936768, + "448": 2.4516091346740723, + "449": 2.4630825519561768, + "450": 2.4366302490234375, + "451": 2.436910629272461, + "452": 2.4233202934265137, + "453": 2.4246230125427246, + "454": 2.4233484268188477, + "455": 2.4261343479156494, + "456": 2.4060890674591064, + "457": 2.432471990585327, + "458": 2.491461992263794, + "459": 2.4399023056030273, + "460": 2.4218220710754395, + "461": 2.4430437088012695, + "462": 2.4276366233825684, + "463": 2.4408247470855713, + "464": 2.4266357421875, + "465": 2.4531545639038086, + "466": 2.3855481147766113, + "467": 2.409026622772217, + "468": 2.41092848777771, + "469": 2.439362049102783, + "470": 2.416613817214966, + "471": 2.4677913188934326, + "472": 2.4265360832214355, + "473": 2.416658401489258, + "474": 2.418534278869629, + "475": 2.424913167953491, + "476": 2.3806135654449463, + "477": 2.424333095550537, + "478": 2.4047040939331055, + "479": 2.4056622982025146, + "480": 2.4200260639190674, + "481": 2.4149255752563477, + "482": 2.405825138092041, + "483": 2.4439802169799805, + "484": 2.420701503753662, + "485": 2.461639404296875 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 0.947843, + 0.938846, + 0.930329, + 0.931072, + 0.920622, + 0.938847, + 0.952435, + 0.937739, + 0.911041, + 0.929701, + 0.958199, + 0.960882, + 0.953593, + 0.946894, + 0.912484, + 0.915692, + 0.930287, + 0.935685, + 0.934082, + 0.944057, + 0.953102, + 0.947459, + 0.942295, + 0.94384, + 0.940831, + 0.903585, + 0.886325, + 0.927771, + 0.937428, + 0.941473, + 0.936157, + 0.930763, + 0.928342, + 0.944997, + 0.960966, + 0.956869, + 0.945844, + 0.939101, + 0.936612, + 0.936101, + 0.947258, + 0.954652, + 0.943352, + 0.944375, + 0.952996, + 0.954386, + 0.942809, + 0.934352, + 0.937466, + 0.944014, + 0.94925, + 0.940563, + 0.927109, + 0.934838 + ], + "train_epoch_time": 4.845614671707153, + "train_loss": 2.3960921372368484, + "train_score": 0.30780801653348905, + "val_loss": 2.431917364096395, + "val_score": 0.3006691307205831 + }, + { + "epoch": 9, + "grad_norm": 0.5992441773414612, + "learning_rate": 1.0, + "model_norm": 88.04580688476562, + "step_logs": { + "grad_norm": { + "486": 0.5251680016517639, + "487": 0.5225484371185303, + "488": 0.5134865641593933, + "489": 0.5666976571083069, + "490": 0.6372716426849365, + "491": 0.7346780896186829, + "492": 0.9128353595733643, + "493": 1.184725046157837, + "494": 0.7774388194084167, + "495": 0.80953049659729, + "496": 1.1751888990402222, + "497": 0.6669449806213379, + "498": 0.5073861479759216, + "499": 0.4833794832229614, + "500": 0.4841935634613037, + "501": 0.4673689603805542, + "502": 0.4761419892311096, + "503": 0.5035741925239563, + "504": 0.5230599641799927, + "505": 0.5073630213737488, + "506": 0.5405907034873962, + "507": 0.4714689254760742, + "508": 0.4369595944881439, + "509": 0.4372880458831787, + "510": 0.44072920083999634, + "511": 0.464080274105072, + "512": 0.4588129222393036, + "513": 0.5141434669494629, + "514": 0.5300304293632507, + "515": 0.5264018177986145, + "516": 0.5475291609764099, + "517": 0.5486292243003845, + "518": 0.5629437565803528, + "519": 0.5145405530929565, + "520": 0.452772319316864, + "521": 0.4707406163215637, + "522": 0.47186407446861267, + "523": 0.4546498656272888, + "524": 0.47039440274238586, + "525": 0.4974057674407959, + "526": 0.5125047564506531, + "527": 0.4869077503681183, + "528": 0.4762588143348694, + "529": 0.5857961773872375, + "530": 0.6514634490013123, + "531": 0.5966834425926208, + "532": 0.5027233958244324, + "533": 0.44797879457473755, + "534": 0.4814678132534027, + "535": 0.5191361904144287, + "536": 0.5164108872413635, + "537": 0.48637133836746216, + "538": 0.5138224959373474, + "539": 0.5992441773414612 + }, + "loss": { + "486": 2.4083127975463867, + "487": 2.4202003479003906, + "488": 2.405160903930664, + "489": 2.423948287963867, + "490": 2.424227714538574, + "491": 2.4508426189422607, + "492": 2.478623390197754, + "493": 2.5330142974853516, + "494": 2.4962596893310547, + "495": 2.4829611778259277, + "496": 2.538888931274414, + "497": 2.5282459259033203, + "498": 2.43544340133667, + "499": 2.4205265045166016, + "500": 2.421490430831909, + "501": 2.395378589630127, + "502": 2.3809010982513428, + "503": 2.404325008392334, + "504": 2.410860061645508, + "505": 2.390225410461426, + "506": 2.42000150680542, + "507": 2.3842532634735107, + "508": 2.3806405067443848, + "509": 2.375567674636841, + "510": 2.3685219287872314, + "511": 2.3860483169555664, + "512": 2.384050130844116, + "513": 2.3912487030029297, + "514": 2.431748867034912, + "515": 2.3876798152923584, + "516": 2.4083287715911865, + "517": 2.371127128601074, + "518": 2.4180474281311035, + "519": 2.3849360942840576, + "520": 2.3860342502593994, + "521": 2.3552846908569336, + "522": 2.3718647956848145, + "523": 2.347947359085083, + "524": 2.384767532348633, + "525": 2.38918399810791, + "526": 2.3818764686584473, + "527": 2.364521026611328, + "528": 2.383082866668701, + "529": 2.3857955932617188, + "530": 2.434995174407959, + "531": 2.3783154487609863, + "532": 2.3764476776123047, + "533": 2.356940746307373, + "534": 2.372837543487549, + "535": 2.3827857971191406, + "536": 2.4039812088012695, + "537": 2.394853115081787, + "538": 2.391368865966797, + "539": 2.384692668914795 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 0.945841, + 0.9466, + 0.948035, + 0.937871, + 0.922712, + 0.900807, + 0.856098, + 0.783051, + 0.89201, + 0.883418, + 0.786174, + 0.919144, + 0.9498, + 0.953957, + 0.953826, + 0.956393, + 0.954553, + 0.949906, + 0.946305, + 0.948904, + 0.943058, + 0.955461, + 0.961445, + 0.96131, + 0.96061, + 0.956818, + 0.957717, + 0.947622, + 0.945391, + 0.945156, + 0.941407, + 0.940317, + 0.938501, + 0.947414, + 0.958811, + 0.955071, + 0.955167, + 0.957837, + 0.955664, + 0.950771, + 0.947744, + 0.952261, + 0.954572, + 0.932908, + 0.919839, + 0.930363, + 0.949511, + 0.959165, + 0.953428, + 0.946475, + 0.947448, + 0.952936, + 0.947686, + 0.92998 + ], + "train_epoch_time": 4.844855308532715, + "train_loss": 2.4032935036477263, + "train_score": 0.29880290525715525, + "val_loss": 2.44017634818803, + "val_score": 0.28712955719431016 + }, + { + "epoch": 10, + "grad_norm": 0.41238293051719666, + "learning_rate": 1.0, + "model_norm": 88.12128448486328, + "step_logs": { + "grad_norm": { + "540": 0.49666303396224976, + "541": 0.44918397068977356, + "542": 0.487358421087265, + "543": 0.5815653800964355, + "544": 0.647682785987854, + "545": 0.5364251136779785, + "546": 0.49116113781929016, + "547": 0.5506793260574341, + "548": 0.5831376910209656, + "549": 0.5818339586257935, + "550": 0.5751243829727173, + "551": 0.5565703511238098, + "552": 0.5409532785415649, + "553": 0.5474473237991333, + "554": 0.5073561668395996, + "555": 0.44616034626960754, + "556": 0.49524757266044617, + "557": 0.5654675960540771, + "558": 0.6029525995254517, + "559": 0.5905118584632874, + "560": 0.543039858341217, + "561": 0.5158525109291077, + "562": 0.4918135702610016, + "563": 0.4921610355377197, + "564": 0.5384225845336914, + "565": 0.5163503885269165, + "566": 0.5473888516426086, + "567": 0.5944982767105103, + "568": 0.6192070245742798, + "569": 0.5348639488220215, + "570": 0.531512439250946, + "571": 0.5428785681724548, + "572": 0.5575944185256958, + "573": 0.5357584953308105, + "574": 0.5161674618721008, + "575": 0.5172922015190125, + "576": 0.5503405332565308, + "577": 0.6006607413291931, + "578": 0.5994082689285278, + "579": 0.5745787620544434, + "580": 0.5670735836029053, + "581": 0.5644622445106506, + "582": 0.7373393774032593, + "583": 1.0047847032546997, + "584": 1.1907254457473755, + "585": 0.5557610988616943, + "586": 0.5193143486976624, + "587": 0.4940536320209503, + "588": 0.4744510352611542, + "589": 0.5412111282348633, + "590": 0.5464239716529846, + "591": 0.49607542157173157, + "592": 0.4465716779232025, + "593": 0.41238293051719666 + }, + "loss": { + "540": 2.4032466411590576, + "541": 2.345470428466797, + "542": 2.368605136871338, + "543": 2.3844478130340576, + "544": 2.3914103507995605, + "545": 2.415172576904297, + "546": 2.3889405727386475, + "547": 2.3648898601531982, + "548": 2.385829448699951, + "549": 2.373162269592285, + "550": 2.3915953636169434, + "551": 2.390748977661133, + "552": 2.3755428791046143, + "553": 2.3753912448883057, + "554": 2.378481864929199, + "555": 2.326127290725708, + "556": 2.375342607498169, + "557": 2.3681674003601074, + "558": 2.365988254547119, + "559": 2.3784890174865723, + "560": 2.3928966522216797, + "561": 2.3594465255737305, + "562": 2.3655896186828613, + "563": 2.3578858375549316, + "564": 2.3804662227630615, + "565": 2.353290557861328, + "566": 2.3801143169403076, + "567": 2.3858840465545654, + "568": 2.389169692993164, + "569": 2.3770551681518555, + "570": 2.359036922454834, + "571": 2.365795135498047, + "572": 2.3864266872406006, + "573": 2.3413143157958984, + "574": 2.350370168685913, + "575": 2.3472342491149902, + "576": 2.37874174118042, + "577": 2.384798526763916, + "578": 2.37863826751709, + "579": 2.4000275135040283, + "580": 2.3721818923950195, + "581": 2.3964407444000244, + "582": 2.370248794555664, + "583": 2.40104079246521, + "584": 2.529804229736328, + "585": 2.4456586837768555, + "586": 2.401750087738037, + "587": 2.3562047481536865, + "588": 2.3346498012542725, + "589": 2.394507884979248, + "590": 2.392519235610962, + "591": 2.3665237426757812, + "592": 2.367276668548584, + "593": 2.3399550914764404 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 0.951184, + 0.958762, + 0.952255, + 0.933775, + 0.919364, + 0.943778, + 0.951936, + 0.939748, + 0.933476, + 0.933424, + 0.935321, + 0.939157, + 0.941981, + 0.940659, + 0.948665, + 0.958968, + 0.950906, + 0.936759, + 0.928653, + 0.931703, + 0.941958, + 0.946619, + 0.951362, + 0.951145, + 0.942604, + 0.946389, + 0.940782, + 0.931041, + 0.92572, + 0.94324, + 0.943505, + 0.941365, + 0.938842, + 0.942242, + 0.946362, + 0.946073, + 0.940148, + 0.929675, + 0.929779, + 0.935648, + 0.936523, + 0.937666, + 0.897114, + 0.826282, + 0.781113, + 0.940604, + 0.946841, + 0.950754, + 0.954008, + 0.942362, + 0.941266, + 0.950576, + 0.959581, + 0.964936 + ], + "train_epoch_time": 4.843826770782471, + "train_loss": 2.341122251740487, + "train_score": 0.32124955166703145, + "val_loss": 2.391289904799171, + "val_score": 0.30632893233020053 + }, + { + "epoch": 11, + "grad_norm": 0.5648512244224548, + "learning_rate": 1.0, + "model_norm": 88.19158935546875, + "step_logs": { + "grad_norm": { + "594": 0.4455691874027252, + "595": 0.5053259134292603, + "596": 0.5574855804443359, + "597": 0.5647223591804504, + "598": 0.5500990152359009, + "599": 0.5880614519119263, + "600": 0.5595914125442505, + "601": 0.4719184935092926, + "602": 0.4335293471813202, + "603": 0.46494951844215393, + "604": 0.48453763127326965, + "605": 0.5016939043998718, + "606": 0.5016187429428101, + "607": 0.5204160809516907, + "608": 0.5653454065322876, + "609": 0.5589511394500732, + "610": 0.5241890549659729, + "611": 0.5061217546463013, + "612": 0.5021583437919617, + "613": 0.5518539547920227, + "614": 0.5411025881767273, + "615": 0.5096712112426758, + "616": 0.5380481481552124, + "617": 0.6245166063308716, + "618": 0.6331339478492737, + "619": 0.704375147819519, + "620": 0.6344725489616394, + "621": 0.5149283409118652, + "622": 0.4801318943500519, + "623": 0.44577476382255554, + "624": 0.44214946031570435, + "625": 0.48248186707496643, + "626": 0.5095715522766113, + "627": 0.5438368320465088, + "628": 0.6116637587547302, + "629": 0.5771706104278564, + "630": 0.5602117776870728, + "631": 0.5381978750228882, + "632": 0.551306962966919, + "633": 0.5591872930526733, + "634": 0.5699526071548462, + "635": 0.5426019430160522, + "636": 0.5143658518791199, + "637": 0.4915454089641571, + "638": 0.4730825424194336, + "639": 0.5039641261100769, + "640": 0.6594997048377991, + "641": 0.6140817403793335, + "642": 0.47418537735939026, + "643": 0.47246062755584717, + "644": 0.5561679601669312, + "645": 0.6243742108345032, + "646": 0.5975499153137207, + "647": 0.5648512244224548 + }, + "loss": { + "594": 2.3640313148498535, + "595": 2.352384090423584, + "596": 2.3432674407958984, + "597": 2.3749561309814453, + "598": 2.3416640758514404, + "599": 2.3634157180786133, + "600": 2.3780298233032227, + "601": 2.342787742614746, + "602": 2.347719192504883, + "603": 2.340097188949585, + "604": 2.331915855407715, + "605": 2.3315820693969727, + "606": 2.36726450920105, + "607": 2.3293869495391846, + "608": 2.3595693111419678, + "609": 2.353480577468872, + "610": 2.3440096378326416, + "611": 2.3488593101501465, + "612": 2.340973138809204, + "613": 2.339700698852539, + "614": 2.3572349548339844, + "615": 2.31711483001709, + "616": 2.358613967895508, + "617": 2.3390748500823975, + "618": 2.368706226348877, + "619": 2.3891701698303223, + "620": 2.37221622467041, + "621": 2.3539481163024902, + "622": 2.3576271533966064, + "623": 2.3381154537200928, + "624": 2.333294153213501, + "625": 2.3332927227020264, + "626": 2.3361706733703613, + "627": 2.361553430557251, + "628": 2.353455066680908, + "629": 2.3577561378479004, + "630": 2.3264617919921875, + "631": 2.3549859523773193, + "632": 2.337280750274658, + "633": 2.3584494590759277, + "634": 2.341733932495117, + "635": 2.368011951446533, + "636": 2.31221342086792, + "637": 2.3277416229248047, + "638": 2.3182497024536133, + "639": 2.3294882774353027, + "640": 2.350050449371338, + "641": 2.3857479095458984, + "642": 2.3451220989227295, + "643": 2.3097457885742188, + "644": 2.3297903537750244, + "645": 2.367734432220459, + "646": 2.359572172164917, + "647": 2.3486928939819336 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 0.959702, + 0.948519, + 0.937809, + 0.937084, + 0.939308, + 0.931827, + 0.938227, + 0.954626, + 0.961513, + 0.955849, + 0.952073, + 0.948789, + 0.949536, + 0.94506, + 0.936569, + 0.937756, + 0.944633, + 0.948291, + 0.948894, + 0.938895, + 0.941527, + 0.946922, + 0.942179, + 0.923045, + 0.921986, + 0.905935, + 0.921788, + 0.946682, + 0.953389, + 0.959237, + 0.959792, + 0.952486, + 0.947351, + 0.941071, + 0.926367, + 0.934017, + 0.936813, + 0.942064, + 0.93895, + 0.93783, + 0.935139, + 0.941473, + 0.945884, + 0.950661, + 0.953952, + 0.948304, + 0.9153, + 0.926757, + 0.954253, + 0.953906, + 0.937748, + 0.923938, + 0.929659, + 0.936398 + ], + "train_epoch_time": 4.845021963119507, + "train_loss": 2.3404482294189366, + "train_score": 0.32949022589147176, + "val_loss": 2.39555838543293, + "val_score": 0.3175633250298374 + }, + { + "epoch": 12, + "grad_norm": 0.3189324140548706, + "learning_rate": 1.0, + "model_norm": 88.23883819580078, + "step_logs": { + "grad_norm": { + "648": 0.5454807281494141, + "649": 0.49072641134262085, + "650": 0.4966956675052643, + "651": 0.5081068277359009, + "652": 0.49845921993255615, + "653": 0.4477521777153015, + "654": 0.4483279883861542, + "655": 0.4255515933036804, + "656": 0.426658034324646, + "657": 0.46205249428749084, + "658": 0.45439672470092773, + "659": 0.4755401015281677, + "660": 0.5065727233886719, + "661": 0.47532299160957336, + "662": 0.4635762572288513, + "663": 0.4944394826889038, + "664": 0.5047117471694946, + "665": 0.4712497591972351, + "666": 0.45057788491249084, + "667": 0.43672606348991394, + "668": 0.416374146938324, + "669": 0.3980002999305725, + "670": 0.3841959834098816, + "671": 0.3838506042957306, + "672": 0.4220142364501953, + "673": 0.46137169003486633, + "674": 0.5507155060768127, + "675": 0.4968607723712921, + "676": 0.4080953001976013, + "677": 0.39979761838912964, + "678": 0.3902898132801056, + "679": 0.40663012862205505, + "680": 0.41750550270080566, + "681": 0.4333384931087494, + "682": 0.4720942974090576, + "683": 0.4359283447265625, + "684": 0.3877403438091278, + "685": 0.3725552558898926, + "686": 0.3724731206893921, + "687": 0.37966641783714294, + "688": 0.3572253882884979, + "689": 0.37735888361930847, + "690": 0.36963099241256714, + "691": 0.34954479336738586, + "692": 0.307496577501297, + "693": 0.31760311126708984, + "694": 0.3197334110736847, + "695": 0.2972220778465271, + "696": 0.2828977406024933, + "697": 0.3033442795276642, + "698": 0.34348076581954956, + "699": 0.32761478424072266, + "700": 0.30834025144577026, + "701": 0.3189324140548706 + }, + "loss": { + "648": 2.366711139678955, + "649": 2.344536781311035, + "650": 2.326383590698242, + "651": 2.334792375564575, + "652": 2.325902223587036, + "653": 2.312213659286499, + "654": 2.3022360801696777, + "655": 2.3183021545410156, + "656": 2.296701669692993, + "657": 2.3039915561676025, + "658": 2.312582015991211, + "659": 2.291131019592285, + "660": 2.2965288162231445, + "661": 2.3163814544677734, + "662": 2.297990083694458, + "663": 2.307262897491455, + "664": 2.3095717430114746, + "665": 2.326970100402832, + "666": 2.3024840354919434, + "667": 2.3115291595458984, + "668": 2.269829273223877, + "669": 2.291423797607422, + "670": 2.3064560890197754, + "671": 2.280780553817749, + "672": 2.2601380348205566, + "673": 2.280937671661377, + "674": 2.3007781505584717, + "675": 2.309664011001587, + "676": 2.2695772647857666, + "677": 2.2876391410827637, + "678": 2.295290231704712, + "679": 2.2544820308685303, + "680": 2.260594606399536, + "681": 2.2877604961395264, + "682": 2.3171114921569824, + "683": 2.2861509323120117, + "684": 2.2774858474731445, + "685": 2.28551983833313, + "686": 2.2663800716400146, + "687": 2.278992176055908, + "688": 2.2726635932922363, + "689": 2.2567200660705566, + "690": 2.2422826290130615, + "691": 2.261599063873291, + "692": 2.2625021934509277, + "693": 2.2599430084228516, + "694": 2.2674577236175537, + "695": 2.2820215225219727, + "696": 2.263211250305176, + "697": 2.2413551807403564, + "698": 2.2700552940368652, + "699": 2.2490994930267334, + "700": 2.2653770446777344, + "701": 2.2599213123321533 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 0.940857, + 0.945566, + 0.938506, + 0.930963, + 0.927018, + 0.93006, + 0.924117, + 0.922323, + 0.916105, + 0.904851, + 0.900551, + 0.891108, + 0.880382, + 0.880269, + 0.876153, + 0.865786, + 0.858563, + 0.858399, + 0.855368, + 0.851699, + 0.848152, + 0.844951, + 0.840943, + 0.834886, + 0.82419, + 0.813575, + 0.79549, + 0.797803, + 0.802797, + 0.798097, + 0.793364, + 0.785353, + 0.778387, + 0.771096, + 0.761199, + 0.759214, + 0.758311, + 0.753941, + 0.74791, + 0.741456, + 0.737494, + 0.729718, + 0.724393, + 0.720276, + 0.717475, + 0.710766, + 0.704675, + 0.700255, + 0.695056, + 0.687684, + 0.679157, + 0.674163, + 0.669473, + 0.662803 + ], + "train_epoch_time": 4.8442628383636475, + "train_loss": 2.251964889605726, + "train_score": 0.3482794565569551, + "val_loss": 2.305449564065506, + "val_score": 0.3320805830057672 + }, + { + "epoch": 13, + "grad_norm": 0.1924910694360733, + "learning_rate": 0.6666666666666667, + "model_norm": 88.2591781616211, + "step_logs": { + "grad_norm": { + "702": 0.30999884009361267, + "703": 0.3081570565700531, + "704": 0.329313188791275, + "705": 0.3394043743610382, + "706": 0.3209366500377655, + "707": 0.30572962760925293, + "708": 0.32642218470573425, + "709": 0.3607868254184723, + "710": 0.3755326569080353, + "711": 0.3518368601799011, + "712": 0.29647645354270935, + "713": 0.2808452248573303, + "714": 0.2867784798145294, + "715": 0.2971489727497101, + "716": 0.312147319316864, + "717": 0.30570414662361145, + "718": 0.2687722146511078, + "719": 0.26342299580574036, + "720": 0.2747038006782532, + "721": 0.26705795526504517, + "722": 0.2823183536529541, + "723": 0.2924771308898926, + "724": 0.30509480834007263, + "725": 0.2584215998649597, + "726": 0.22097358107566833, + "727": 0.2101549655199051, + "728": 0.21350732445716858, + "729": 0.20022758841514587, + "730": 0.21657611429691315, + "731": 0.21615301072597504, + "732": 0.24807579815387726, + "733": 0.26091575622558594, + "734": 0.25208982825279236, + "735": 0.2280517965555191, + "736": 0.22254043817520142, + "737": 0.2286107838153839, + "738": 0.21865878999233246, + "739": 0.23763251304626465, + "740": 0.21739302575588226, + "741": 0.2225056141614914, + "742": 0.21373525261878967, + "743": 0.18853570520877838, + "744": 0.20300988852977753, + "745": 0.19326931238174438, + "746": 0.20534896850585938, + "747": 0.23516331613063812, + "748": 0.2517220675945282, + "749": 0.23871637880802155, + "750": 0.21123512089252472, + "751": 0.1957864761352539, + "752": 0.2079763114452362, + "753": 0.22575657069683075, + "754": 0.216865673661232, + "755": 0.1924910694360733 + }, + "loss": { + "702": 2.249041795730591, + "703": 2.243256092071533, + "704": 2.2466816902160645, + "705": 2.2427403926849365, + "706": 2.2588908672332764, + "707": 2.23553466796875, + "708": 2.242371082305908, + "709": 2.2314717769622803, + "710": 2.248633861541748, + "711": 2.243069648742676, + "712": 2.2298953533172607, + "713": 2.2373337745666504, + "714": 2.260085105895996, + "715": 2.245069980621338, + "716": 2.2305984497070312, + "717": 2.2537453174591064, + "718": 2.239100694656372, + "719": 2.247382640838623, + "720": 2.2421340942382812, + "721": 2.2236971855163574, + "722": 2.2386865615844727, + "723": 2.2479429244995117, + "724": 2.260164737701416, + "725": 2.2370758056640625, + "726": 2.2144763469696045, + "727": 2.223630666732788, + "728": 2.2569363117218018, + "729": 2.2281651496887207, + "730": 2.235809326171875, + "731": 2.2433576583862305, + "732": 2.2419254779815674, + "733": 2.2283365726470947, + "734": 2.240781784057617, + "735": 2.2361268997192383, + "736": 2.2119195461273193, + "737": 2.2380967140197754, + "738": 2.2211649417877197, + "739": 2.2472715377807617, + "740": 2.221181869506836, + "741": 2.217360496520996, + "742": 2.23199462890625, + "743": 2.233004570007324, + "744": 2.2375247478485107, + "745": 2.2161431312561035, + "746": 2.227876663208008, + "747": 2.2121622562408447, + "748": 2.2314999103546143, + "749": 2.2282145023345947, + "750": 2.2316651344299316, + "751": 2.210264205932617, + "752": 2.2339797019958496, + "753": 2.218125581741333, + "754": 2.208958387374878, + "755": 2.225076675415039 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 0.657305, + 0.651388, + 0.644149, + 0.637536, + 0.632715, + 0.627462, + 0.62035, + 0.612322, + 0.605562, + 0.600977, + 0.597811, + 0.592512, + 0.586272, + 0.579734, + 0.572985, + 0.567322, + 0.562746, + 0.556899, + 0.55041, + 0.544585, + 0.538007, + 0.531605, + 0.525124, + 0.520614, + 0.515571, + 0.509752, + 0.503599, + 0.497761, + 0.491282, + 0.48519, + 0.478321, + 0.471883, + 0.466036, + 0.460484, + 0.454466, + 0.448259, + 0.442329, + 0.435872, + 0.430122, + 0.42391, + 0.417958, + 0.412223, + 0.405885, + 0.399882, + 0.39359, + 0.387008, + 0.380648, + 0.374739, + 0.369004, + 0.363051, + 0.356788, + 0.350435, + 0.344412, + 0.338549 + ], + "train_epoch_time": 4.844266176223755, + "train_loss": 2.218914871817853, + "train_score": 0.35500470776126924, + "val_loss": 2.2751464695935955, + "val_score": 0.3386373429224494 + }, + { + "epoch": 14, + "grad_norm": 0.18735578656196594, + "learning_rate": 0.33333333333333337, + "model_norm": 88.26632690429688, + "step_logs": { + "grad_norm": { + "756": 0.1972518414258957, + "757": 0.20013578236103058, + "758": 0.20564697682857513, + "759": 0.19823364913463593, + "760": 0.21963264048099518, + "761": 0.1970648318529129, + "762": 0.17788271605968475, + "763": 0.21304206550121307, + "764": 0.23827432096004486, + "765": 0.19121018052101135, + "766": 0.20167680084705353, + "767": 0.20170077681541443, + "768": 0.17833080887794495, + "769": 0.18598417937755585, + "770": 0.2017047256231308, + "771": 0.20328561961650848, + "772": 0.16974008083343506, + "773": 0.17685215175151825, + "774": 0.19144922494888306, + "775": 0.20395591855049133, + "776": 0.20244090259075165, + "777": 0.19777093827724457, + "778": 0.19130989909172058, + "779": 0.19305555522441864, + "780": 0.17410975694656372, + "781": 0.20465616881847382, + "782": 0.2013433575630188, + "783": 0.19822944700717926, + "784": 0.1960882842540741, + "785": 0.18061654269695282, + "786": 0.1915721893310547, + "787": 0.1670348346233368, + "788": 0.18097978830337524, + "789": 0.18367715179920197, + "790": 0.19308632612228394, + "791": 0.21377742290496826, + "792": 0.1805594116449356, + "793": 0.16841340065002441, + "794": 0.19316135346889496, + "795": 0.1863432675600052, + "796": 0.1662268042564392, + "797": 0.1815100908279419, + "798": 0.17816632986068726, + "799": 0.17863771319389343, + "800": 0.16933517158031464, + "801": 0.1726406365633011, + "802": 0.1802784949541092, + "803": 0.1657143533229828, + "804": 0.17711059749126434, + "805": 0.1710374653339386, + "806": 0.15857510268688202, + "807": 0.17609231173992157, + "808": 0.18111881613731384, + "809": 0.18735578656196594 + }, + "loss": { + "756": 2.200650215148926, + "757": 2.20719313621521, + "758": 2.2292795181274414, + "759": 2.2182607650756836, + "760": 2.187898635864258, + "761": 2.22731876373291, + "762": 2.2302346229553223, + "763": 2.2116222381591797, + "764": 2.215651273727417, + "765": 2.206503391265869, + "766": 2.221101760864258, + "767": 2.2292208671569824, + "768": 2.211580276489258, + "769": 2.2003841400146484, + "770": 2.239424705505371, + "771": 2.231846809387207, + "772": 2.222195863723755, + "773": 2.2056221961975098, + "774": 2.2195372581481934, + "775": 2.2274832725524902, + "776": 2.2267355918884277, + "777": 2.225849151611328, + "778": 2.1938111782073975, + "779": 2.219778060913086, + "780": 2.203202962875366, + "781": 2.204745292663574, + "782": 2.221733808517456, + "783": 2.2287707328796387, + "784": 2.2243988513946533, + "785": 2.200479507446289, + "786": 2.207712411880493, + "787": 2.210223913192749, + "788": 2.2429215908050537, + "789": 2.203415870666504, + "790": 2.2089006900787354, + "791": 2.2147860527038574, + "792": 2.219022274017334, + "793": 2.2020816802978516, + "794": 2.201672315597534, + "795": 2.218629837036133, + "796": 2.2059714794158936, + "797": 2.2188315391540527, + "798": 2.210559844970703, + "799": 2.20656681060791, + "800": 2.2285170555114746, + "801": 2.2160894870758057, + "802": 2.1947011947631836, + "803": 2.200603485107422, + "804": 2.223465919494629, + "805": 2.2105941772460938, + "806": 2.2058119773864746, + "807": 2.2213356494903564, + "808": 2.196129322052002, + "809": 2.2225122451782227 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 0.332354, + 0.326192, + 0.320013, + 0.313939, + 0.307595, + 0.301674, + 0.295675, + 0.289262, + 0.282921, + 0.27714, + 0.270931, + 0.264791, + 0.258777, + 0.252584, + 0.246361, + 0.240205, + 0.234212, + 0.228026, + 0.221815, + 0.215614, + 0.209472, + 0.20334, + 0.197206, + 0.191051, + 0.18495, + 0.178708, + 0.172567, + 0.166422, + 0.160271, + 0.154145, + 0.147966, + 0.141848, + 0.135668, + 0.129501, + 0.123328, + 0.117142, + 0.11102, + 0.104867, + 0.0986829, + 0.0925255, + 0.086373, + 0.0801991, + 0.0740347, + 0.0678679, + 0.0617039, + 0.0555348, + 0.0493647, + 0.0431982, + 0.0370274, + 0.0308579, + 0.0246879, + 0.0185161, + 0.0123445, + 0.00617254 + ], + "train_epoch_time": 4.844316482543945, + "train_loss": 2.2101509927506084, + "train_score": 0.35690907460545196, + "val_loss": 2.270607901769172, + "val_score": 0.33950290603971645 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:38:55.948032", + "final_model_norm": 88.26632690429688, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:37:14.339733", + "step_scheduler_on_epoch": false + } + }, + { + "config": { + "batch_size": 64, + "dataset": "shakespeare", + "dataset_kwargs": {}, + "loss_func": "sequence_cross_entropy", + "max_epoch": 15, + "model": "llama", + "model_kwargs": { + "dim": 384, + "expand": 4, + "mlp": "mlp", + "n_heads": 6, + "n_layers": 6, + "seq_len": 256, + "vocab_size": 92 + }, + "opt": { + "lr": 1.0, + "lr_schedule": "wsd", + "name": "ngn", + "stepwise_schedule": true, + "warmup_steps": 50 + }, + "run_id": 2, + "score_func": "sequence_cross_entropy_accuracy" + }, + "history": [ + { + "epoch": 0, + "grad_norm": 4.823538303375244, + "learning_rate": 1e-10, + "model_norm": 87.35120391845703, + "step_logs": { + "grad_norm": { + "0": 22.766481399536133, + "1": 23.4499454498291, + "2": 6.2565083503723145, + "3": 7.88982629776001, + "4": 11.964227676391602, + "5": 4.96006441116333, + "6": 6.8435235023498535, + "7": 5.717643737792969, + "8": 4.00455379486084, + "9": 5.72320032119751, + "10": 8.341522216796875, + "11": 3.961703062057495, + "12": 8.768805503845215, + "13": 16.514781951904297, + "14": 6.734203815460205, + "15": 6.085165023803711, + "16": 12.341890335083008, + "17": 7.289083003997803, + "18": 9.369098663330078, + "19": 16.68512725830078, + "20": 8.437172889709473, + "21": 5.4232001304626465, + "22": 17.26092529296875, + "23": 7.168286323547363, + "24": 4.13083028793335, + "25": 16.84851837158203, + "26": 13.853059768676758, + "27": 8.860191345214844, + "28": 9.997201919555664, + "29": 10.188836097717285, + "30": 4.45705509185791, + "31": 17.632375717163086, + "32": 9.241437911987305, + "33": 7.948631286621094, + "34": 3.2504324913024902, + "35": 14.708056449890137, + "36": 4.642722129821777, + "37": 4.221202373504639, + "38": 5.220492839813232, + "39": 2.6587140560150146, + "40": 2.6095921993255615, + "41": 1.6444931030273438, + "42": 6.288504600524902, + "43": 1.4923979043960571, + "44": 10.944479942321777, + "45": 1.7730772495269775, + "46": 6.987361431121826, + "47": 1.9503520727157593, + "48": 13.502943992614746, + "49": 1.7726036310195923, + "50": 2.3103184700012207, + "51": 12.616952896118164, + "52": 1.661839246749878, + "53": 4.823538303375244 + }, + "loss": { + "0": 4.531927108764648, + "1": 4.532190799713135, + "2": 3.7843165397644043, + "3": 4.029876232147217, + "4": 4.144651889801025, + "5": 4.560702800750732, + "6": 3.7188267707824707, + "7": 4.114666938781738, + "8": 4.800957202911377, + "9": 3.598372459411621, + "10": 4.166380882263184, + "11": 5.117392539978027, + "12": 6.413439750671387, + "13": 5.329720973968506, + "14": 3.988182306289673, + "15": 4.154473304748535, + "16": 4.940454483032227, + "17": 5.166607856750488, + "18": 4.151963710784912, + "19": 6.220420837402344, + "20": 5.248250961303711, + "21": 4.856843948364258, + "22": 8.520427703857422, + "23": 6.243471622467041, + "24": 4.959427833557129, + "25": 9.62490463256836, + "26": 8.307036399841309, + "27": 7.091299533843994, + "28": 6.120484352111816, + "29": 6.131654739379883, + "30": 6.962408065795898, + "31": 7.249964714050293, + "32": 5.922432899475098, + "33": 6.217356204986572, + "34": 4.812512397766113, + "35": 8.056571960449219, + "36": 4.838377952575684, + "37": 4.32801628112793, + "38": 4.577471733093262, + "39": 5.102447032928467, + "40": 4.514163017272949, + "41": 3.5082497596740723, + "42": 5.699440002441406, + "43": 3.7615678310394287, + "44": 6.136580467224121, + "45": 3.9205100536346436, + "46": 4.462265968322754, + "47": 4.1435136795043945, + "48": 6.60205078125, + "49": 3.658388614654541, + "50": 3.652296543121338, + "51": 5.712080001831055, + "52": 3.88555645942688, + "53": 4.9976348876953125 + }, + "lr": { + "0": 1e-10, + "1": 0.020000000098, + "2": 0.040000000096, + "3": 0.060000000094, + "4": 0.08000000009199999, + "5": 0.10000000009, + "6": 0.12000000008799999, + "7": 0.140000000086, + "8": 0.160000000084, + "9": 0.180000000082, + "10": 0.20000000008000002, + "11": 0.220000000078, + "12": 0.240000000076, + "13": 0.260000000074, + "14": 0.280000000072, + "15": 0.30000000007, + "16": 0.320000000068, + "17": 0.34000000006599995, + "18": 0.360000000064, + "19": 0.380000000062, + "20": 0.40000000006, + "21": 0.420000000058, + "22": 0.440000000056, + "23": 0.46000000005400005, + "24": 0.480000000052, + "25": 0.5000000000499999, + "26": 0.520000000048, + "27": 0.540000000046, + "28": 0.560000000044, + "29": 0.5800000000419999, + "30": 0.60000000004, + "31": 0.620000000038, + "32": 0.640000000036, + "33": 0.660000000034, + "34": 0.6800000000319999, + "35": 0.7000000000300001, + "36": 0.720000000028, + "37": 0.740000000026, + "38": 0.760000000024, + "39": 0.780000000022, + "40": 0.80000000002, + "41": 0.820000000018, + "42": 0.840000000016, + "43": 0.860000000014, + "44": 0.880000000012, + "45": 0.9000000000099999, + "46": 0.9200000000080001, + "47": 0.940000000006, + "48": 0.960000000004, + "49": 0.9800000000019999, + "50": 1.0, + "51": 1.0, + "52": 1.0, + "53": 1.0 + } + }, + "step_size_list": [ + 1e-10, + 0.0090362, + 0.0331435, + 0.0410002, + 0.0335927, + 0.0787575, + 0.0683518, + 0.0899652, + 0.126261, + 0.0989421, + 0.0749047, + 0.164502, + 0.098413, + 0.0339758, + 0.108027, + 0.128372, + 0.0539351, + 0.123718, + 0.0749138, + 0.0399857, + 0.107737, + 0.184886, + 0.0506161, + 0.159009, + 0.262904, + 0.059713, + 0.0742172, + 0.135373, + 0.100498, + 0.098141, + 0.323281, + 0.0433756, + 0.11399, + 0.151604, + 0.389366, + 0.0673216, + 0.27652, + 0.293267, + 0.232953, + 0.506397, + 0.49893, + 0.623076, + 0.214606, + 0.685474, + 0.0917768, + 0.661352, + 0.152494, + 0.656666, + 0.0673391, + 0.689727, + 0.577796, + 0.06696, + 0.7378, + 0.300503 + ], + "train_epoch_time": 4.845650672912598, + "train_loss": 4.346086560910196, + "train_score": 0.15250291421988088, + "val_loss": 4.360582778155462, + "val_score": 0.151025222291492 + }, + { + "epoch": 1, + "grad_norm": 4.512044906616211, + "learning_rate": 1.0, + "model_norm": 87.37397003173828, + "step_logs": { + "grad_norm": { + "54": 7.251988410949707, + "55": 15.8644437789917, + "56": 1.1270478963851929, + "57": 4.473561763763428, + "58": 2.9332540035247803, + "59": 18.531587600708008, + "60": 11.311650276184082, + "61": 4.0336222648620605, + "62": 0.965795636177063, + "63": 7.779601573944092, + "64": 3.2097480297088623, + "65": 3.0750300884246826, + "66": 3.0253684520721436, + "67": 14.161813735961914, + "68": 5.438813209533691, + "69": 2.6942594051361084, + "70": 2.6608285903930664, + "71": 2.6943233013153076, + "72": 3.844778537750244, + "73": 2.3849990367889404, + "74": 17.495288848876953, + "75": 3.5518290996551514, + "76": 2.5672812461853027, + "77": 19.015371322631836, + "78": 3.306727170944214, + "79": 1.3112736940383911, + "80": 15.822526931762695, + "81": 3.9387097358703613, + "82": 4.532346248626709, + "83": 2.28145432472229, + "84": 13.847086906433105, + "85": 2.366462469100952, + "86": 0.8172315359115601, + "87": 4.138247489929199, + "88": 1.0341992378234863, + "89": 1.772910475730896, + "90": 7.529833793640137, + "91": 2.508044958114624, + "92": 2.8410980701446533, + "93": 1.5059130191802979, + "94": 1.2346277236938477, + "95": 2.402649164199829, + "96": 1.8017809391021729, + "97": 12.121929168701172, + "98": 2.8432464599609375, + "99": 0.9545131921768188, + "100": 4.520432472229004, + "101": 1.124801516532898, + "102": 2.301797866821289, + "103": 1.802559733390808, + "104": 2.4258575439453125, + "105": 4.497735977172852, + "106": 2.356623888015747, + "107": 4.512044906616211 + }, + "loss": { + "54": 4.339327335357666, + "55": 7.530325412750244, + "56": 3.7264466285705566, + "57": 4.140336990356445, + "58": 4.434525489807129, + "59": 6.198064804077148, + "60": 5.086067199707031, + "61": 5.056458473205566, + "62": 3.6493189334869385, + "63": 4.742439270019531, + "64": 4.1082048416137695, + "65": 4.339682579040527, + "66": 3.7419819831848145, + "67": 5.806866645812988, + "68": 5.333910942077637, + "69": 3.884222984313965, + "70": 4.3664140701293945, + "71": 4.497482776641846, + "72": 3.938791275024414, + "73": 4.020101547241211, + "74": 6.818613529205322, + "75": 3.8480968475341797, + "76": 4.219987392425537, + "77": 10.126190185546875, + "78": 4.448222637176514, + "79": 3.598482131958008, + "80": 8.532045364379883, + "81": 3.8793416023254395, + "82": 3.812709093093872, + "83": 4.266729354858398, + "84": 6.01512336730957, + "85": 4.1626996994018555, + "86": 3.3788065910339355, + "87": 4.7241597175598145, + "88": 3.4365878105163574, + "89": 3.7387542724609375, + "90": 4.382164478302002, + "91": 4.326023578643799, + "92": 3.707341194152832, + "93": 3.93203067779541, + "94": 3.5497255325317383, + "95": 3.884605646133423, + "96": 3.8811445236206055, + "97": 6.000147819519043, + "98": 4.273425579071045, + "99": 3.4745020866394043, + "100": 4.062713623046875, + "101": 3.5846211910247803, + "102": 3.8877429962158203, + "103": 3.731144905090332, + "104": 4.204802513122559, + "105": 3.9859097003936768, + "106": 3.831906795501709, + "107": 4.059577941894531 + }, + "lr": { + "54": 1.0, + "55": 1.0, + "56": 1.0, + "57": 1.0, + "58": 1.0, + "59": 1.0, + "60": 1.0, + "61": 1.0, + "62": 1.0, + "63": 1.0, + "64": 1.0, + "65": 1.0, + "66": 1.0, + "67": 1.0, + "68": 1.0, + "69": 1.0, + "70": 1.0, + "71": 1.0, + "72": 1.0, + "73": 1.0, + "74": 1.0, + "75": 1.0, + "76": 1.0, + "77": 1.0, + "78": 1.0, + "79": 1.0, + "80": 1.0, + "81": 1.0, + "82": 1.0, + "83": 1.0, + "84": 1.0, + "85": 1.0, + "86": 1.0, + "87": 1.0, + "88": 1.0, + "89": 1.0, + "90": 1.0, + "91": 1.0, + "92": 1.0, + "93": 1.0, + "94": 1.0, + "95": 1.0, + "96": 1.0, + "97": 1.0, + "98": 1.0, + "99": 1.0, + "100": 1.0, + "101": 1.0, + "102": 1.0, + "103": 1.0, + "104": 1.0, + "105": 1.0, + "106": 1.0, + "107": 1.0 + } + }, + "step_size_list": [ + 0.141646, + 0.0564617, + 0.854383, + 0.292671, + 0.507585, + 0.0348386, + 0.0736441, + 0.383311, + 0.886683, + 0.135485, + 0.443677, + 0.478593, + 0.449843, + 0.0547377, + 0.265049, + 0.516949, + 0.552262, + 0.553388, + 0.347644, + 0.585661, + 0.0426533, + 0.378905, + 0.561508, + 0.0530394, + 0.448616, + 0.80716, + 0.0638109, + 0.33339, + 0.270716, + 0.621135, + 0.0590378, + 0.597851, + 0.910057, + 0.355555, + 0.86534, + 0.704049, + 0.133883, + 0.579029, + 0.478783, + 0.776173, + 0.823243, + 0.573715, + 0.705105, + 0.0755014, + 0.513914, + 0.884086, + 0.284506, + 0.849998, + 0.59474, + 0.696661, + 0.588315, + 0.282674, + 0.579824, + 0.285106 + ], + "train_epoch_time": 4.844127178192139, + "train_loss": 4.14564177788142, + "train_score": 0.08546336982479397, + "val_loss": 4.150988496130013, + "val_score": 0.08249766796177208 + }, + { + "epoch": 2, + "grad_norm": 0.7047867178916931, + "learning_rate": 1.0, + "model_norm": 87.48506927490234, + "step_logs": { + "grad_norm": { + "108": 1.4983487129211426, + "109": 1.2913858890533447, + "110": 1.6994794607162476, + "111": 2.730990409851074, + "112": 2.5205719470977783, + "113": 2.494032859802246, + "114": 2.87644100189209, + "115": 2.64274001121521, + "116": 2.8192801475524902, + "117": 0.813446581363678, + "118": 1.87333083152771, + "119": 12.412668228149414, + "120": 3.2681543827056885, + "121": 0.816596508026123, + "122": 8.50576114654541, + "123": 2.056885242462158, + "124": 1.4469059705734253, + "125": 4.668663024902344, + "126": 1.66146981716156, + "127": 1.6472582817077637, + "128": 1.199605107307434, + "129": 6.843109130859375, + "130": 12.066320419311523, + "131": 1.6553430557250977, + "132": 10.090495109558105, + "133": 1.880860447883606, + "134": 1.3028234243392944, + "135": 1.2324339151382446, + "136": 1.7973941564559937, + "137": 0.8165777921676636, + "138": 2.5478763580322266, + "139": 1.0959110260009766, + "140": 2.330714702606201, + "141": 1.665844202041626, + "142": 2.3689467906951904, + "143": 1.56474769115448, + "144": 5.588191509246826, + "145": 1.6102663278579712, + "146": 1.687662959098816, + "147": 0.5792037844657898, + "148": 1.041499137878418, + "149": 2.03286075592041, + "150": 0.7721120715141296, + "151": 3.103257894515991, + "152": 0.9085686206817627, + "153": 1.8697940111160278, + "154": 1.200742483139038, + "155": 0.9075438380241394, + "156": 0.9638105630874634, + "157": 5.558727741241455, + "158": 0.8548763990402222, + "159": 0.916251003742218, + "160": 0.823141872882843, + "161": 0.7047867178916931 + }, + "loss": { + "108": 4.136025905609131, + "109": 3.6685400009155273, + "110": 3.491719961166382, + "111": 4.523752689361572, + "112": 3.6392483711242676, + "113": 4.23232889175415, + "114": 3.9685144424438477, + "115": 4.4471001625061035, + "116": 3.67026948928833, + "117": 3.5676987171173096, + "118": 3.554564952850342, + "119": 6.902256011962891, + "120": 4.3198957443237305, + "121": 3.402722120285034, + "122": 4.637648582458496, + "123": 3.7611632347106934, + "124": 3.6920828819274902, + "125": 3.8159029483795166, + "126": 4.0966315269470215, + "127": 3.707505702972412, + "128": 3.344067096710205, + "129": 4.417843818664551, + "130": 4.530896186828613, + "131": 3.6134791374206543, + "132": 4.731829643249512, + "133": 3.390665054321289, + "134": 3.606768846511841, + "135": 3.2326221466064453, + "136": 3.841463088989258, + "137": 3.1436095237731934, + "138": 3.6988344192504883, + "139": 3.381613254547119, + "140": 3.393953323364258, + "141": 3.7028579711914062, + "142": 3.4534738063812256, + "143": 3.424208641052246, + "144": 3.822815418243408, + "145": 3.7035539150238037, + "146": 3.2978720664978027, + "147": 3.1762351989746094, + "148": 3.1373298168182373, + "149": 3.7157065868377686, + "150": 3.200949192047119, + "151": 3.6398849487304688, + "152": 3.362764835357666, + "153": 3.2505688667297363, + "154": 3.459547758102417, + "155": 3.165719985961914, + "156": 3.1284775733947754, + "157": 4.016909599304199, + "158": 3.189304828643799, + "159": 2.985337972640991, + "160": 3.219618320465088, + "161": 2.9568843841552734 + }, + "lr": { + "108": 1.0, + "109": 1.0, + "110": 1.0, + "111": 1.0, + "112": 1.0, + "113": 1.0, + "114": 1.0, + "115": 1.0, + "116": 1.0, + "117": 1.0, + "118": 1.0, + "119": 1.0, + "120": 1.0, + "121": 1.0, + "122": 1.0, + "123": 1.0, + "124": 1.0, + "125": 1.0, + "126": 1.0, + "127": 1.0, + "128": 1.0, + "129": 1.0, + "130": 1.0, + "131": 1.0, + "132": 1.0, + "133": 1.0, + "134": 1.0, + "135": 1.0, + "136": 1.0, + "137": 1.0, + "138": 1.0, + "139": 1.0, + "140": 1.0, + "141": 1.0, + "142": 1.0, + "143": 1.0, + "144": 1.0, + "145": 1.0, + "146": 1.0, + "147": 1.0, + "148": 1.0, + "149": 1.0, + "150": 1.0, + "151": 1.0, + "152": 1.0, + "153": 1.0, + "154": 1.0, + "155": 1.0, + "156": 1.0, + "157": 1.0, + "158": 1.0, + "159": 1.0, + "160": 1.0, + "161": 1.0 + } + }, + "step_size_list": [ + 0.786534, + 0.8148, + 0.707422, + 0.548141, + 0.533936, + 0.576421, + 0.489609, + 0.560149, + 0.480123, + 0.915136, + 0.669504, + 0.082229, + 0.44718, + 0.910759, + 0.113635, + 0.640029, + 0.779109, + 0.259336, + 0.747987, + 0.732096, + 0.822933, + 0.158733, + 0.0585925, + 0.72508, + 0.0850423, + 0.657171, + 0.80952, + 0.809761, + 0.70398, + 0.904113, + 0.532615, + 0.849198, + 0.555469, + 0.727423, + 0.551724, + 0.736638, + 0.19668, + 0.740706, + 0.698409, + 0.949839, + 0.852607, + 0.642637, + 0.914811, + 0.430501, + 0.890678, + 0.650292, + 0.827556, + 0.884888, + 0.870728, + 0.206348, + 0.897205, + 0.876727, + 0.904794, + 0.922514 + ], + "train_epoch_time": 4.84415602684021, + "train_loss": 3.017545517547914, + "train_score": 0.18367557391802242, + "val_loss": 3.045292272907174, + "val_score": 0.1787950275241302 + }, + { + "epoch": 3, + "grad_norm": 0.4002307057380676, + "learning_rate": 1.0, + "model_norm": 87.59906768798828, + "step_logs": { + "grad_norm": { + "162": 1.2257070541381836, + "163": 1.4252961874008179, + "164": 0.7772822976112366, + "165": 0.854894757270813, + "166": 0.7863472700119019, + "167": 0.9834524393081665, + "168": 0.8009421825408936, + "169": 0.6448637247085571, + "170": 0.5537456274032593, + "171": 0.7591561675071716, + "172": 1.1511164903640747, + "173": 1.0290831327438354, + "174": 0.8881003260612488, + "175": 0.8370724320411682, + "176": 1.166159749031067, + "177": 1.0723377466201782, + "178": 0.8646930456161499, + "179": 0.5154723525047302, + "180": 0.6050717830657959, + "181": 0.7991882562637329, + "182": 1.3131170272827148, + "183": 0.8054488301277161, + "184": 0.7397527694702148, + "185": 0.9199510216712952, + "186": 0.9297583699226379, + "187": 0.7554708123207092, + "188": 0.8410573601722717, + "189": 1.3671910762786865, + "190": 0.7584227919578552, + "191": 0.5444364547729492, + "192": 0.4343738257884979, + "193": 0.572770893573761, + "194": 0.913043200969696, + "195": 1.0558162927627563, + "196": 0.6686832904815674, + "197": 0.6075354218482971, + "198": 1.2297452688217163, + "199": 0.7891676425933838, + "200": 0.6268939971923828, + "201": 0.5721192955970764, + "202": 0.7553921341896057, + "203": 0.9939144253730774, + "204": 0.8785144686698914, + "205": 0.6097036004066467, + "206": 0.5276299715042114, + "207": 0.7230173349380493, + "208": 0.9269310832023621, + "209": 0.8904930353164673, + "210": 0.6510063409805298, + "211": 0.579686164855957, + "212": 0.6641437411308289, + "213": 0.9507161378860474, + "214": 0.7723180055618286, + "215": 0.4002307057380676 + }, + "loss": { + "162": 3.0161948204040527, + "163": 3.284764289855957, + "164": 3.1054205894470215, + "165": 2.993417263031006, + "166": 3.031731367111206, + "167": 2.993283271789551, + "168": 3.10546612739563, + "169": 2.8692851066589355, + "170": 2.8510661125183105, + "171": 2.9046621322631836, + "172": 2.984726905822754, + "173": 3.0306029319763184, + "174": 3.0619125366210938, + "175": 2.881030321121216, + "176": 2.9785983562469482, + "177": 3.021331310272217, + "178": 3.0799038410186768, + "179": 2.8446714878082275, + "180": 2.7676024436950684, + "181": 2.871354341506958, + "182": 2.9102816581726074, + "183": 3.0837364196777344, + "184": 2.8665523529052734, + "185": 2.862290859222412, + "186": 2.9185848236083984, + "187": 2.8774290084838867, + "188": 2.854646921157837, + "189": 2.9404711723327637, + "190": 3.0454816818237305, + "191": 2.810105562210083, + "192": 2.724196434020996, + "193": 2.7497920989990234, + "194": 2.7893741130828857, + "195": 2.9536008834838867, + "196": 2.835805892944336, + "197": 2.773388624191284, + "198": 2.8488388061523438, + "199": 2.9932303428649902, + "200": 2.8020873069763184, + "201": 2.746697425842285, + "202": 2.765296697616577, + "203": 2.8473215103149414, + "204": 2.929778575897217, + "205": 2.7714762687683105, + "206": 2.740403652191162, + "207": 2.7294816970825195, + "208": 2.835923194885254, + "209": 2.847292423248291, + "210": 2.8470582962036133, + "211": 2.6816773414611816, + "212": 2.7541491985321045, + "213": 2.7499663829803467, + "214": 2.910299301147461, + "215": 2.7164416313171387 + }, + "lr": { + "162": 1.0, + "163": 1.0, + "164": 1.0, + "165": 1.0, + "166": 1.0, + "167": 1.0, + "168": 1.0, + "169": 1.0, + "170": 1.0, + "171": 1.0, + "172": 1.0, + "173": 1.0, + "174": 1.0, + "175": 1.0, + "176": 1.0, + "177": 1.0, + "178": 1.0, + "179": 1.0, + "180": 1.0, + "181": 1.0, + "182": 1.0, + "183": 1.0, + "184": 1.0, + "185": 1.0, + "186": 1.0, + "187": 1.0, + "188": 1.0, + "189": 1.0, + "190": 1.0, + "191": 1.0, + "192": 1.0, + "193": 1.0, + "194": 1.0, + "195": 1.0, + "196": 1.0, + "197": 1.0, + "198": 1.0, + "199": 1.0, + "200": 1.0, + "201": 1.0, + "202": 1.0, + "203": 1.0, + "204": 1.0, + "205": 1.0, + "206": 1.0, + "207": 1.0, + "208": 1.0, + "209": 1.0, + "210": 1.0, + "211": 1.0, + "212": 1.0, + "213": 1.0, + "214": 1.0, + "215": 1.0 + } + }, + "step_size_list": [ + 0.800609, + 0.76381, + 0.911347, + 0.891206, + 0.907459, + 0.860913, + 0.906383, + 0.932431, + 0.948969, + 0.909748, + 0.818347, + 0.851267, + 0.8859, + 0.89158, + 0.814144, + 0.840126, + 0.891756, + 0.95538, + 0.937961, + 0.899912, + 0.771463, + 0.904823, + 0.912866, + 0.871203, + 0.871009, + 0.909773, + 0.889759, + 0.758816, + 0.913713, + 0.949902, + 0.966529, + 0.943705, + 0.869994, + 0.841248, + 0.926923, + 0.937609, + 0.790252, + 0.905771, + 0.93447, + 0.943766, + 0.906475, + 0.852171, + 0.883615, + 0.93715, + 0.951661, + 0.912608, + 0.868444, + 0.87777, + 0.930727, + 0.94104, + 0.92586, + 0.858855, + 0.907049, + 0.97136 + ], + "train_epoch_time": 4.844200849533081, + "train_loss": 2.661428938021448, + "train_score": 0.24712607608293016, + "val_loss": 2.681328601596276, + "val_score": 0.24196326097924728 + }, + { + "epoch": 4, + "grad_norm": 0.5701131224632263, + "learning_rate": 1.0, + "model_norm": 87.68059539794922, + "step_logs": { + "grad_norm": { + "216": 0.35157185792922974, + "217": 0.6305283308029175, + "218": 0.6689011454582214, + "219": 0.7682798504829407, + "220": 0.6697713136672974, + "221": 0.4669936001300812, + "222": 0.608452320098877, + "223": 0.9744377136230469, + "224": 0.6994677186012268, + "225": 0.45972776412963867, + "226": 0.5007526278495789, + "227": 0.7899810671806335, + "228": 0.890059769153595, + "229": 0.7429040670394897, + "230": 0.6641275882720947, + "231": 0.7926104068756104, + "232": 0.7025500535964966, + "233": 0.47195491194725037, + "234": 0.5225667357444763, + "235": 0.9557555317878723, + "236": 0.8144726753234863, + "237": 0.4025629758834839, + "238": 0.33580297231674194, + "239": 0.4634389281272888, + "240": 0.6295719146728516, + "241": 0.6642836928367615, + "242": 0.8138523101806641, + "243": 0.6546770930290222, + "244": 0.3867209553718567, + "245": 0.4119410216808319, + "246": 0.6824784874916077, + "247": 0.8117210865020752, + "248": 0.7649862170219421, + "249": 0.7218926548957825, + "250": 0.5709288716316223, + "251": 0.5692389011383057, + "252": 0.8376795649528503, + "253": 0.7577732801437378, + "254": 0.5632246136665344, + "255": 0.7132888436317444, + "256": 0.7931423783302307, + "257": 0.6427789330482483, + "258": 0.5352956056594849, + "259": 0.6267139315605164, + "260": 0.8365180492401123, + "261": 0.6753789782524109, + "262": 0.40073320269584656, + "263": 0.4384753108024597, + "264": 0.5244778394699097, + "265": 0.6394432783126831, + "266": 0.8070451021194458, + "267": 0.6586354970932007, + "268": 0.5192147493362427, + "269": 0.5701131224632263 + }, + "loss": { + "216": 2.6652145385742188, + "217": 2.6709206104278564, + "218": 2.754687786102295, + "219": 2.761075496673584, + "220": 2.8076882362365723, + "221": 2.6866326332092285, + "222": 2.720402240753174, + "223": 2.7774767875671387, + "224": 2.8788671493530273, + "225": 2.6797099113464355, + "226": 2.671217918395996, + "227": 2.7287678718566895, + "228": 2.8325276374816895, + "229": 2.7546448707580566, + "230": 2.7611241340637207, + "231": 2.7264084815979004, + "232": 2.8039281368255615, + "233": 2.6774282455444336, + "234": 2.6826274394989014, + "235": 2.7156553268432617, + "236": 2.899374485015869, + "237": 2.688506603240967, + "238": 2.6291966438293457, + "239": 2.63651704788208, + "240": 2.704230785369873, + "241": 2.7291460037231445, + "242": 2.705507516860962, + "243": 2.785426139831543, + "244": 2.657219886779785, + "245": 2.613079071044922, + "246": 2.66627836227417, + "247": 2.748772144317627, + "248": 2.7636160850524902, + "249": 2.778533935546875, + "250": 2.6858222484588623, + "251": 2.6874942779541016, + "252": 2.7338716983795166, + "253": 2.8107264041900635, + "254": 2.6686882972717285, + "255": 2.6972086429595947, + "256": 2.7379114627838135, + "257": 2.7553751468658447, + "258": 2.6499733924865723, + "259": 2.700737953186035, + "260": 2.694472312927246, + "261": 2.7928671836853027, + "262": 2.6408352851867676, + "263": 2.641464948654175, + "264": 2.676417589187622, + "265": 2.6676526069641113, + "266": 2.7247204780578613, + "267": 2.7644755840301514, + "268": 2.6589903831481934, + "269": 2.678616762161255 + }, + "lr": { + "216": 1.0, + "217": 1.0, + "218": 1.0, + "219": 1.0, + "220": 1.0, + "221": 1.0, + "222": 1.0, + "223": 1.0, + "224": 1.0, + "225": 1.0, + "226": 1.0, + "227": 1.0, + "228": 1.0, + "229": 1.0, + "230": 1.0, + "231": 1.0, + "232": 1.0, + "233": 1.0, + "234": 1.0, + "235": 1.0, + "236": 1.0, + "237": 1.0, + "238": 1.0, + "239": 1.0, + "240": 1.0, + "241": 1.0, + "242": 1.0, + "243": 1.0, + "244": 1.0, + "245": 1.0, + "246": 1.0, + "247": 1.0, + "248": 1.0, + "249": 1.0, + "250": 1.0, + "251": 1.0, + "252": 1.0, + "253": 1.0, + "254": 1.0, + "255": 1.0, + "256": 1.0, + "257": 1.0, + "258": 1.0, + "259": 1.0, + "260": 1.0, + "261": 1.0, + "262": 1.0, + "263": 1.0, + "264": 1.0, + "265": 1.0, + "266": 1.0, + "267": 1.0, + "268": 1.0, + "269": 1.0 + } + }, + "step_size_list": [ + 0.977337, + 0.930731, + 0.924888, + 0.903433, + 0.926023, + 0.960996, + 0.936291, + 0.854019, + 0.921681, + 0.962061, + 0.955168, + 0.897384, + 0.877315, + 0.908944, + 0.926037, + 0.89669, + 0.919105, + 0.960065, + 0.951568, + 0.856028, + 0.897345, + 0.970743, + 0.979006, + 0.960863, + 0.931719, + 0.925202, + 0.890941, + 0.92856, + 0.972629, + 0.968551, + 0.919671, + 0.892975, + 0.90426, + 0.914263, + 0.94279, + 0.943142, + 0.886261, + 0.907319, + 0.9439, + 0.913813, + 0.896956, + 0.930255, + 0.948708, + 0.932214, + 0.885072, + 0.924504, + 0.970493, + 0.964885, + 0.951123, + 0.928817, + 0.893239, + 0.927248, + 0.951753, + 0.942799 + ], + "train_epoch_time": 4.843627214431763, + "train_loss": 2.6792670729512635, + "train_score": 0.24416808642906643, + "val_loss": 2.7003905209279635, + "val_score": 0.2410169705389288 + }, + { + "epoch": 5, + "grad_norm": 0.6564647555351257, + "learning_rate": 1.0, + "model_norm": 87.75013732910156, + "step_logs": { + "grad_norm": { + "270": 0.8181772828102112, + "271": 0.6801655888557434, + "272": 0.4163513481616974, + "273": 0.4679330289363861, + "274": 0.6349716186523438, + "275": 0.7468962073326111, + "276": 0.7852400541305542, + "277": 0.6241344809532166, + "278": 0.4764142334461212, + "279": 0.5797223448753357, + "280": 0.7787536382675171, + "281": 0.6379414796829224, + "282": 0.398966908454895, + "283": 0.49806177616119385, + "284": 0.7276239395141602, + "285": 0.7271779775619507, + "286": 0.6163162589073181, + "287": 0.5810685157775879, + "288": 0.6158027052879333, + "289": 0.6318897604942322, + "290": 0.593970537185669, + "291": 0.5808635950088501, + "292": 0.5572472810745239, + "293": 0.6181164979934692, + "294": 0.6605523824691772, + "295": 0.620652437210083, + "296": 0.5607692003250122, + "297": 0.5383511185646057, + "298": 0.5575741529464722, + "299": 0.6443954706192017, + "300": 0.6675539612770081, + "301": 0.65948486328125, + "302": 0.6175847053527832, + "303": 0.5659894943237305, + "304": 0.5658437609672546, + "305": 0.5902078747749329, + "306": 0.5941104888916016, + "307": 0.590848982334137, + "308": 0.6037076115608215, + "309": 0.6228870153427124, + "310": 0.5768495202064514, + "311": 0.5681480765342712, + "312": 0.5810586214065552, + "313": 0.5691453814506531, + "314": 0.5784564018249512, + "315": 0.6008875370025635, + "316": 0.5586863160133362, + "317": 0.6046246290206909, + "318": 0.6240992546081543, + "319": 0.5120927691459656, + "320": 0.4306298792362213, + "321": 0.4668160378932953, + "322": 0.6215357780456543, + "323": 0.6564647555351257 + }, + "loss": { + "270": 2.693901300430298, + "271": 2.79805326461792, + "272": 2.645878314971924, + "273": 2.638319253921509, + "274": 2.6527023315429688, + "275": 2.707859992980957, + "276": 2.6951546669006348, + "277": 2.751971483230591, + "278": 2.6244640350341797, + "279": 2.672447681427002, + "280": 2.652071237564087, + "281": 2.762293577194214, + "282": 2.6372337341308594, + "283": 2.6398119926452637, + "284": 2.659114360809326, + "285": 2.749040365219116, + "286": 2.6650798320770264, + "287": 2.69551420211792, + "288": 2.6418070793151855, + "289": 2.7103919982910156, + "290": 2.640364646911621, + "291": 2.675276279449463, + "292": 2.621049642562866, + "293": 2.670933723449707, + "294": 2.6734423637390137, + "295": 2.6947922706604004, + "296": 2.6273746490478516, + "297": 2.6775970458984375, + "298": 2.6315767765045166, + "299": 2.681574583053589, + "300": 2.653317928314209, + "301": 2.707068920135498, + "302": 2.6572303771972656, + "303": 2.666687250137329, + "304": 2.6224920749664307, + "305": 2.6868209838867188, + "306": 2.6465818881988525, + "307": 2.6512224674224854, + "308": 2.642472982406616, + "309": 2.693118095397949, + "310": 2.644137382507324, + "311": 2.6688456535339355, + "312": 2.6253676414489746, + "313": 2.6528191566467285, + "314": 2.64564847946167, + "315": 2.6564254760742188, + "316": 2.6119375228881836, + "317": 2.6588306427001953, + "318": 2.6486783027648926, + "319": 2.634204626083374, + "320": 2.6109776496887207, + "321": 2.604259490966797, + "322": 2.6050381660461426, + "323": 2.692103147506714 + }, + "lr": { + "270": 1.0, + "271": 1.0, + "272": 1.0, + "273": 1.0, + "274": 1.0, + "275": 1.0, + "276": 1.0, + "277": 1.0, + "278": 1.0, + "279": 1.0, + "280": 1.0, + "281": 1.0, + "282": 1.0, + "283": 1.0, + "284": 1.0, + "285": 1.0, + "286": 1.0, + "287": 1.0, + "288": 1.0, + "289": 1.0, + "290": 1.0, + "291": 1.0, + "292": 1.0, + "293": 1.0, + "294": 1.0, + "295": 1.0, + "296": 1.0, + "297": 1.0, + "298": 1.0, + "299": 1.0, + "300": 1.0, + "301": 1.0, + "302": 1.0, + "303": 1.0, + "304": 1.0, + "305": 1.0, + "306": 1.0, + "307": 1.0, + "308": 1.0, + "309": 1.0, + "310": 1.0, + "311": 1.0, + "312": 1.0, + "313": 1.0, + "314": 1.0, + "315": 1.0, + "316": 1.0, + "317": 1.0, + "318": 1.0, + "319": 1.0, + "320": 1.0, + "321": 1.0, + "322": 1.0, + "323": 1.0 + } + }, + "step_size_list": [ + 0.889485, + 0.923643, + 0.968281, + 0.960157, + 0.929372, + 0.906613, + 0.897351, + 0.933903, + 0.958551, + 0.940841, + 0.897395, + 0.931389, + 0.970706, + 0.955123, + 0.909462, + 0.912262, + 0.933477, + 0.941061, + 0.933035, + 0.931395, + 0.937375, + 0.940681, + 0.944076, + 0.933251, + 0.924552, + 0.933295, + 0.943536, + 0.948659, + 0.944226, + 0.928138, + 0.92253, + 0.925643, + 0.933037, + 0.943339, + 0.942467, + 0.939122, + 0.937485, + 0.938229, + 0.935487, + 0.932807, + 0.940802, + 0.942974, + 0.939584, + 0.94246, + 0.940523, + 0.936364, + 0.943618, + 0.935676, + 0.931509, + 0.952584, + 0.965706, + 0.959842, + 0.930972, + 0.925893 + ], + "train_epoch_time": 4.844163179397583, + "train_loss": 2.630333215434377, + "train_score": 0.22913490846817258, + "val_loss": 2.6502758301495137, + "val_score": 0.22905155698139001 + }, + { + "epoch": 6, + "grad_norm": 0.5630146265029907, + "learning_rate": 1.0, + "model_norm": 87.8231430053711, + "step_logs": { + "grad_norm": { + "324": 0.5665695071220398, + "325": 0.6207720637321472, + "326": 0.679410994052887, + "327": 0.640845537185669, + "328": 0.6557438373565674, + "329": 0.6467770338058472, + "330": 0.5974902510643005, + "331": 0.6088232398033142, + "332": 0.6805403828620911, + "333": 0.6934331059455872, + "334": 0.6434071063995361, + "335": 0.6306788921356201, + "336": 0.6044982075691223, + "337": 0.5734537243843079, + "338": 0.5707707405090332, + "339": 0.5774801969528198, + "340": 0.6090638041496277, + "341": 0.6756903529167175, + "342": 0.5592733025550842, + "343": 0.5819928646087646, + "344": 0.6928020119667053, + "345": 0.6013165712356567, + "346": 0.4722282290458679, + "347": 0.4533126652240753, + "348": 0.5106614232063293, + "349": 0.552486002445221, + "350": 0.5333740711212158, + "351": 0.5413498282432556, + "352": 0.5809600949287415, + "353": 0.5913351774215698, + "354": 0.6134089231491089, + "355": 0.6003088355064392, + "356": 0.5555221438407898, + "357": 0.570401668548584, + "358": 0.637662947177887, + "359": 0.5945121645927429, + "360": 0.48445793986320496, + "361": 0.5014073848724365, + "362": 0.5888465642929077, + "363": 0.6221649050712585, + "364": 0.5847055315971375, + "365": 0.5664844512939453, + "366": 0.5298759341239929, + "367": 0.5440005660057068, + "368": 0.5984290838241577, + "369": 0.5895470380783081, + "370": 0.5926340222358704, + "371": 0.5938579440116882, + "372": 0.565003514289856, + "373": 0.5573912262916565, + "374": 0.5748423337936401, + "375": 0.5812371373176575, + "376": 0.5559704303741455, + "377": 0.5630146265029907 + }, + "loss": { + "324": 2.6389575004577637, + "325": 2.6744384765625, + "326": 2.656019687652588, + "327": 2.672060489654541, + "328": 2.643585443496704, + "329": 2.655033588409424, + "330": 2.6457149982452393, + "331": 2.6522912979125977, + "332": 2.617915630340576, + "333": 2.6835780143737793, + "334": 2.614906072616577, + "335": 2.6859030723571777, + "336": 2.6196837425231934, + "337": 2.6409616470336914, + "338": 2.6314563751220703, + "339": 2.6548380851745605, + "340": 2.6133038997650146, + "341": 2.670090675354004, + "342": 2.6241979598999023, + "343": 2.6148767471313477, + "344": 2.637996196746826, + "345": 2.6516692638397217, + "346": 2.5890603065490723, + "347": 2.5865163803100586, + "348": 2.598806619644165, + "349": 2.624627113342285, + "350": 2.600104570388794, + "351": 2.6266677379608154, + "352": 2.6116387844085693, + "353": 2.6305651664733887, + "354": 2.600620746612549, + "355": 2.6391561031341553, + "356": 2.6104931831359863, + "357": 2.6266136169433594, + "358": 2.617392063140869, + "359": 2.642134666442871, + "360": 2.5678536891937256, + "361": 2.5877432823181152, + "362": 2.5749778747558594, + "363": 2.643909454345703, + "364": 2.5950937271118164, + "365": 2.6365654468536377, + "366": 2.5696539878845215, + "367": 2.6266026496887207, + "368": 2.602271318435669, + "369": 2.644552707672119, + "370": 2.6005771160125732, + "371": 2.6311559677124023, + "372": 2.5861282348632812, + "373": 2.6212921142578125, + "374": 2.5832526683807373, + "375": 2.6001381874084473, + "376": 2.5962820053100586, + "377": 2.611752986907959 + }, + "lr": { + "324": 1.0, + "325": 1.0, + "326": 1.0, + "327": 1.0, + "328": 1.0, + "329": 1.0, + "330": 1.0, + "331": 1.0, + "332": 1.0, + "333": 1.0, + "334": 1.0, + "335": 1.0, + "336": 1.0, + "337": 1.0, + "338": 1.0, + "339": 1.0, + "340": 1.0, + "341": 1.0, + "342": 1.0, + "343": 1.0, + "344": 1.0, + "345": 1.0, + "346": 1.0, + "347": 1.0, + "348": 1.0, + "349": 1.0, + "350": 1.0, + "351": 1.0, + "352": 1.0, + "353": 1.0, + "354": 1.0, + "355": 1.0, + "356": 1.0, + "357": 1.0, + "358": 1.0, + "359": 1.0, + "360": 1.0, + "361": 1.0, + "362": 1.0, + "363": 1.0, + "364": 1.0, + "365": 1.0, + "366": 1.0, + "367": 1.0, + "368": 1.0, + "369": 1.0, + "370": 1.0, + "371": 1.0, + "372": 1.0, + "373": 1.0, + "374": 1.0, + "375": 1.0, + "376": 1.0, + "377": 1.0 + } + }, + "step_size_list": [ + 0.942667, + 0.932797, + 0.920051, + 0.928636, + 0.924788, + 0.926974, + 0.936797, + 0.934687, + 0.918733, + 0.917775, + 0.92665, + 0.93106, + 0.934802, + 0.94139, + 0.941707, + 0.940905, + 0.933729, + 0.921239, + 0.943755, + 0.939173, + 0.916613, + 0.936172, + 0.958712, + 0.961794, + 0.952225, + 0.945046, + 0.948131, + 0.947162, + 0.939305, + 0.937678, + 0.932538, + 0.93609, + 0.94419, + 0.941677, + 0.927923, + 0.937307, + 0.956298, + 0.953673, + 0.936918, + 0.931789, + 0.9382, + 0.942634, + 0.948198, + 0.94667, + 0.935621, + 0.938338, + 0.936745, + 0.937192, + 0.941868, + 0.944054, + 0.939886, + 0.938998, + 0.943816, + 0.942787 + ], + "train_epoch_time": 4.843476295471191, + "train_loss": 2.5858833301358106, + "train_score": 0.24968503411308765, + "val_loss": 2.61170709776413, + "val_score": 0.2497084889086188 + }, + { + "epoch": 7, + "grad_norm": 0.5835149884223938, + "learning_rate": 1.0, + "model_norm": 87.91259002685547, + "step_logs": { + "grad_norm": { + "378": 0.631034791469574, + "379": 0.6645892262458801, + "380": 0.6508366465568542, + "381": 0.6770791411399841, + "382": 0.6244338750839233, + "383": 0.5523872971534729, + "384": 0.5418217182159424, + "385": 0.5459670424461365, + "386": 0.5279170274734497, + "387": 0.5229026675224304, + "388": 0.54593425989151, + "389": 0.6523922681808472, + "390": 0.6707743406295776, + "391": 0.5343193411827087, + "392": 0.4200863540172577, + "393": 0.6051865220069885, + "394": 0.7158398032188416, + "395": 0.6296555995941162, + "396": 0.5331427454948425, + "397": 0.5095428824424744, + "398": 0.5672648549079895, + "399": 0.6428970694541931, + "400": 0.6532034277915955, + "401": 0.5940384268760681, + "402": 0.5841777324676514, + "403": 0.6384387612342834, + "404": 0.6099355220794678, + "405": 0.579751193523407, + "406": 0.6122485399246216, + "407": 0.6530709266662598, + "408": 0.5794283151626587, + "409": 0.5202420353889465, + "410": 0.5804641842842102, + "411": 0.6161943674087524, + "412": 0.6395607590675354, + "413": 0.6825110912322998, + "414": 0.584525465965271, + "415": 0.5930807590484619, + "416": 0.5564717650413513, + "417": 0.5128763914108276, + "418": 0.6574738621711731, + "419": 0.8442243933677673, + "420": 0.6434152126312256, + "421": 0.5266668796539307, + "422": 0.6124412417411804, + "423": 0.7113578915596008, + "424": 0.6526036262512207, + "425": 0.6006346940994263, + "426": 0.7452445030212402, + "427": 0.6529521346092224, + "428": 0.5348069667816162, + "429": 0.5427950024604797, + "430": 0.6044876575469971, + "431": 0.5835149884223938 + }, + "loss": { + "378": 2.595905303955078, + "379": 2.645911693572998, + "380": 2.606935977935791, + "381": 2.6263294219970703, + "382": 2.6046905517578125, + "383": 2.601595640182495, + "384": 2.5734827518463135, + "385": 2.5872702598571777, + "386": 2.564354419708252, + "387": 2.578911781311035, + "388": 2.539823055267334, + "389": 2.5958669185638428, + "390": 2.6346778869628906, + "391": 2.6006574630737305, + "392": 2.528630256652832, + "393": 2.549687623977661, + "394": 2.611457109451294, + "395": 2.6117324829101562, + "396": 2.5580716133117676, + "397": 2.567366361618042, + "398": 2.547837257385254, + "399": 2.5905261039733887, + "400": 2.576244831085205, + "401": 2.6036343574523926, + "402": 2.5575199127197266, + "403": 2.584616184234619, + "404": 2.583188772201538, + "405": 2.5615596771240234, + "406": 2.5433802604675293, + "407": 2.5824694633483887, + "408": 2.5776078701019287, + "409": 2.552412986755371, + "410": 2.5194010734558105, + "411": 2.562621593475342, + "412": 2.558915615081787, + "413": 2.5721797943115234, + "414": 2.5612964630126953, + "415": 2.5327343940734863, + "416": 2.550753355026245, + "417": 2.5114545822143555, + "418": 2.5217838287353516, + "419": 2.5841362476348877, + "420": 2.592214584350586, + "421": 2.511564254760742, + "422": 2.519066333770752, + "423": 2.563769817352295, + "424": 2.5526232719421387, + "425": 2.5217087268829346, + "426": 2.5163087844848633, + "427": 2.579192876815796, + "428": 2.498741626739502, + "429": 2.5157222747802734, + "430": 2.4993467330932617, + "431": 2.5408687591552734 + }, + "lr": { + "378": 1.0, + "379": 1.0, + "380": 1.0, + "381": 1.0, + "382": 1.0, + "383": 1.0, + "384": 1.0, + "385": 1.0, + "386": 1.0, + "387": 1.0, + "388": 1.0, + "389": 1.0, + "390": 1.0, + "391": 1.0, + "392": 1.0, + "393": 1.0, + "394": 1.0, + "395": 1.0, + "396": 1.0, + "397": 1.0, + "398": 1.0, + "399": 1.0, + "400": 1.0, + "401": 1.0, + "402": 1.0, + "403": 1.0, + "404": 1.0, + "405": 1.0, + "406": 1.0, + "407": 1.0, + "408": 1.0, + "409": 1.0, + "410": 1.0, + "411": 1.0, + "412": 1.0, + "413": 1.0, + "414": 1.0, + "415": 1.0, + "416": 1.0, + "417": 1.0, + "418": 1.0, + "419": 1.0, + "420": 1.0, + "421": 1.0, + "422": 1.0, + "423": 1.0, + "424": 1.0, + "425": 1.0, + "426": 1.0, + "427": 1.0, + "428": 1.0, + "429": 1.0, + "430": 1.0, + "431": 1.0 + } + }, + "step_size_list": [ + 0.928765, + 0.922965, + 0.924862, + 0.919729, + 0.930363, + 0.944605, + 0.94604, + 0.945532, + 0.94846, + 0.949657, + 0.944578, + 0.924232, + 0.92133, + 0.947967, + 0.966282, + 0.93299, + 0.910655, + 0.929453, + 0.947366, + 0.951869, + 0.940601, + 0.926119, + 0.923524, + 0.936534, + 0.937455, + 0.926911, + 0.932829, + 0.938432, + 0.931367, + 0.923722, + 0.938856, + 0.949651, + 0.937322, + 0.931026, + 0.925991, + 0.916969, + 0.937472, + 0.935069, + 0.942774, + 0.950237, + 0.921058, + 0.87881, + 0.926053, + 0.94767, + 0.93071, + 0.910176, + 0.923001, + 0.933244, + 0.90061, + 0.923659, + 0.945866, + 0.944682, + 0.931879, + 0.937205 + ], + "train_epoch_time": 4.843728542327881, + "train_loss": 2.485765217503311, + "train_score": 0.2834088055954089, + "val_loss": 2.507180509008852, + "val_score": 0.2780882244337303 + }, + { + "epoch": 8, + "grad_norm": 0.4784611463546753, + "learning_rate": 1.0, + "model_norm": 88.0074234008789, + "step_logs": { + "grad_norm": { + "432": 0.549848198890686, + "433": 0.5837130546569824, + "434": 0.7905740737915039, + "435": 0.5529758334159851, + "436": 0.44585034251213074, + "437": 0.511372447013855, + "438": 0.6252089142799377, + "439": 0.6288154721260071, + "440": 0.6147361993789673, + "441": 0.6964508891105652, + "442": 0.5719792246818542, + "443": 0.49305108189582825, + "444": 0.599076509475708, + "445": 0.668509304523468, + "446": 0.7662803530693054, + "447": 0.9397596716880798, + "448": 0.5515175461769104, + "449": 0.4790220856666565, + "450": 0.5375211834907532, + "451": 0.7502608895301819, + "452": 0.6550245881080627, + "453": 0.5108506083488464, + "454": 0.5343354940414429, + "455": 0.5929295420646667, + "456": 0.7053409218788147, + "457": 0.6709436178207397, + "458": 0.5561814308166504, + "459": 0.5841220617294312, + "460": 0.7298258543014526, + "461": 0.7944507598876953, + "462": 0.674461305141449, + "463": 0.5819013714790344, + "464": 0.6430020332336426, + "465": 0.624009370803833, + "466": 0.516533613204956, + "467": 0.5987129807472229, + "468": 0.6656692028045654, + "469": 0.6360422372817993, + "470": 0.5653752088546753, + "471": 0.5587164759635925, + "472": 0.7932880520820618, + "473": 0.8027954697608948, + "474": 0.5690339207649231, + "475": 0.5225327610969543, + "476": 0.6446221470832825, + "477": 0.7345907688140869, + "478": 0.5668221712112427, + "479": 0.5040398240089417, + "480": 0.5966363549232483, + "481": 0.5954105854034424, + "482": 0.6325654983520508, + "483": 0.5826522707939148, + "484": 0.4609067440032959, + "485": 0.4784611463546753 + }, + "loss": { + "432": 2.5038466453552246, + "433": 2.4998693466186523, + "434": 2.508211135864258, + "435": 2.572650194168091, + "436": 2.4810874462127686, + "437": 2.501356363296509, + "438": 2.4633829593658447, + "439": 2.5254392623901367, + "440": 2.480496644973755, + "441": 2.512538433074951, + "442": 2.5309786796569824, + "443": 2.4784884452819824, + "444": 2.48624849319458, + "445": 2.5248870849609375, + "446": 2.487924337387085, + "447": 2.586681365966797, + "448": 2.556032180786133, + "449": 2.480590343475342, + "450": 2.462827205657959, + "451": 2.4823365211486816, + "452": 2.539616823196411, + "453": 2.4547269344329834, + "454": 2.473958730697632, + "455": 2.4873905181884766, + "456": 2.477478504180908, + "457": 2.537742853164673, + "458": 2.455606698989868, + "459": 2.4600114822387695, + "460": 2.481431007385254, + "461": 2.5656208992004395, + "462": 2.504275321960449, + "463": 2.4905426502227783, + "464": 2.448953151702881, + "465": 2.507854461669922, + "466": 2.427478790283203, + "467": 2.4791388511657715, + "468": 2.4901974201202393, + "469": 2.507401943206787, + "470": 2.4635491371154785, + "471": 2.463207960128784, + "472": 2.4576711654663086, + "473": 2.554412603378296, + "474": 2.506570816040039, + "475": 2.4603357315063477, + "476": 2.448587417602539, + "477": 2.4790964126586914, + "478": 2.4630634784698486, + "479": 2.43473219871521, + "480": 2.444490909576416, + "481": 2.4640111923217773, + "482": 2.461678981781006, + "483": 2.4721994400024414, + "484": 2.404785394668579, + "485": 2.41409969329834 + }, + "lr": { + "432": 1.0, + "433": 1.0, + "434": 1.0, + "435": 1.0, + "436": 1.0, + "437": 1.0, + "438": 1.0, + "439": 1.0, + "440": 1.0, + "441": 1.0, + "442": 1.0, + "443": 1.0, + "444": 1.0, + "445": 1.0, + "446": 1.0, + "447": 1.0, + "448": 1.0, + "449": 1.0, + "450": 1.0, + "451": 1.0, + "452": 1.0, + "453": 1.0, + "454": 1.0, + "455": 1.0, + "456": 1.0, + "457": 1.0, + "458": 1.0, + "459": 1.0, + "460": 1.0, + "461": 1.0, + "462": 1.0, + "463": 1.0, + "464": 1.0, + "465": 1.0, + "466": 1.0, + "467": 1.0, + "468": 1.0, + "469": 1.0, + "470": 1.0, + "471": 1.0, + "472": 1.0, + "473": 1.0, + "474": 1.0, + "475": 1.0, + "476": 1.0, + "477": 1.0, + "478": 1.0, + "479": 1.0, + "480": 1.0, + "481": 1.0, + "482": 1.0, + "483": 1.0, + "484": 1.0, + "485": 1.0 + } + }, + "step_size_list": [ + 0.943064, + 0.9362, + 0.889211, + 0.943904, + 0.961483, + 0.950325, + 0.926493, + 0.927398, + 0.929217, + 0.911972, + 0.939292, + 0.953251, + 0.932683, + 0.918696, + 0.894449, + 0.854182, + 0.943841, + 0.955793, + 0.944592, + 0.898166, + 0.922107, + 0.949527, + 0.945444, + 0.933995, + 0.908756, + 0.918532, + 0.940746, + 0.935148, + 0.903076, + 0.890471, + 0.916738, + 0.936348, + 0.922157, + 0.927959, + 0.947907, + 0.932579, + 0.918297, + 0.925351, + 0.939077, + 0.940411, + 0.886502, + 0.887981, + 0.939329, + 0.947429, + 0.921784, + 0.901848, + 0.938772, + 0.950414, + 0.93213, + 0.932889, + 0.924835, + 0.935751, + 0.957699, + 0.954732 + ], + "train_epoch_time": 4.844201564788818, + "train_loss": 2.4252914186529653, + "train_score": 0.29261567440402386, + "val_loss": 2.4552955723246805, + "val_score": 0.2873448260752944 + }, + { + "epoch": 9, + "grad_norm": 0.507682204246521, + "learning_rate": 1.0, + "model_norm": 88.0914535522461, + "step_logs": { + "grad_norm": { + "486": 0.57832932472229, + "487": 0.6478872895240784, + "488": 0.5928182601928711, + "489": 0.5464459657669067, + "490": 0.5789236426353455, + "491": 0.6152143478393555, + "492": 0.6163302659988403, + "493": 0.6283442378044128, + "494": 0.7662516832351685, + "495": 0.695849597454071, + "496": 0.45231619477272034, + "497": 0.43580302596092224, + "498": 0.5713932514190674, + "499": 0.6079331040382385, + "500": 0.6007699966430664, + "501": 0.6494527459144592, + "502": 0.6491064429283142, + "503": 0.5804978609085083, + "504": 0.5806142687797546, + "505": 0.6671934723854065, + "506": 0.6753543615341187, + "507": 0.6034969687461853, + "508": 0.5302227735519409, + "509": 0.5263882279396057, + "510": 0.4867282211780548, + "511": 0.48377421498298645, + "512": 0.5275049805641174, + "513": 0.544241189956665, + "514": 0.587230920791626, + "515": 0.6122822761535645, + "516": 0.6598381400108337, + "517": 0.6440941095352173, + "518": 0.6427991390228271, + "519": 0.6577604413032532, + "520": 0.5495144724845886, + "521": 0.5072782635688782, + "522": 0.5514366626739502, + "523": 0.5523213148117065, + "524": 0.7139912247657776, + "525": 0.6774431467056274, + "526": 0.6321172118186951, + "527": 0.5850508213043213, + "528": 0.5684505701065063, + "529": 0.558586597442627, + "530": 0.6529635190963745, + "531": 0.6388319730758667, + "532": 0.5655055046081543, + "533": 0.555770754814148, + "534": 0.5636330246925354, + "535": 0.567356526851654, + "536": 0.5862202048301697, + "537": 0.6310988664627075, + "538": 0.6419916749000549, + "539": 0.507682204246521 + }, + "loss": { + "486": 2.444479465484619, + "487": 2.4827351570129395, + "488": 2.4525582790374756, + "489": 2.4294040203094482, + "490": 2.4081668853759766, + "491": 2.4531545639038086, + "492": 2.44587779045105, + "493": 2.483649253845215, + "494": 2.4348349571228027, + "495": 2.4871487617492676, + "496": 2.431297540664673, + "497": 2.402846336364746, + "498": 2.4212663173675537, + "499": 2.4522478580474854, + "500": 2.4311389923095703, + "501": 2.462240219116211, + "502": 2.4582650661468506, + "503": 2.4273014068603516, + "504": 2.4112019538879395, + "505": 2.4532992839813232, + "506": 2.4747438430786133, + "507": 2.4368577003479004, + "508": 2.428983688354492, + "509": 2.435624361038208, + "510": 2.3989522457122803, + "511": 2.397653341293335, + "512": 2.4161252975463867, + "513": 2.430206298828125, + "514": 2.419283390045166, + "515": 2.4285733699798584, + "516": 2.4293394088745117, + "517": 2.4625699520111084, + "518": 2.4435346126556396, + "519": 2.4574975967407227, + "520": 2.41593074798584, + "521": 2.4223670959472656, + "522": 2.4104039669036865, + "523": 2.4375414848327637, + "524": 2.4481759071350098, + "525": 2.4842605590820312, + "526": 2.4485764503479004, + "527": 2.4255530834198, + "528": 2.4002795219421387, + "529": 2.4076528549194336, + "530": 2.4005117416381836, + "531": 2.4537055492401123, + "532": 2.4028143882751465, + "533": 2.4327540397644043, + "534": 2.4008846282958984, + "535": 2.4443836212158203, + "536": 2.4099483489990234, + "537": 2.4318974018096924, + "538": 2.403520107269287, + "539": 2.4510669708251953 + }, + "lr": { + "486": 1.0, + "487": 1.0, + "488": 1.0, + "489": 1.0, + "490": 1.0, + "491": 1.0, + "492": 1.0, + "493": 1.0, + "494": 1.0, + "495": 1.0, + "496": 1.0, + "497": 1.0, + "498": 1.0, + "499": 1.0, + "500": 1.0, + "501": 1.0, + "502": 1.0, + "503": 1.0, + "504": 1.0, + "505": 1.0, + "506": 1.0, + "507": 1.0, + "508": 1.0, + "509": 1.0, + "510": 1.0, + "511": 1.0, + "512": 1.0, + "513": 1.0, + "514": 1.0, + "515": 1.0, + "516": 1.0, + "517": 1.0, + "518": 1.0, + "519": 1.0, + "520": 1.0, + "521": 1.0, + "522": 1.0, + "523": 1.0, + "524": 1.0, + "525": 1.0, + "526": 1.0, + "527": 1.0, + "528": 1.0, + "529": 1.0, + "530": 1.0, + "531": 1.0, + "532": 1.0, + "533": 1.0, + "534": 1.0, + "535": 1.0, + "536": 1.0, + "537": 1.0, + "538": 1.0, + "539": 1.0 + } + }, + "step_size_list": [ + 0.935968, + 0.922054, + 0.933144, + 0.942102, + 0.934941, + 0.928382, + 0.927942, + 0.926369, + 0.892402, + 0.911293, + 0.959625, + 0.961982, + 0.936837, + 0.929925, + 0.9309, + 0.921106, + 0.921066, + 0.935091, + 0.934662, + 0.916822, + 0.915624, + 0.930467, + 0.945295, + 0.94618, + 0.952947, + 0.953466, + 0.945551, + 0.942559, + 0.933472, + 0.928347, + 0.91776, + 0.922311, + 0.922043, + 0.919095, + 0.941181, + 0.949563, + 0.940666, + 0.94111, + 0.905703, + 0.915443, + 0.924562, + 0.934092, + 0.936933, + 0.939146, + 0.918437, + 0.923224, + 0.937606, + 0.940306, + 0.937946, + 0.938224, + 0.933446, + 0.92431, + 0.921031, + 0.950049 + ], + "train_epoch_time": 4.843919992446899, + "train_loss": 2.374070036804659, + "train_score": 0.30918108853411297, + "val_loss": 2.4118292178406917, + "val_score": 0.2987047940595279 + }, + { + "epoch": 10, + "grad_norm": 0.48772522807121277, + "learning_rate": 1.0, + "model_norm": 88.18107604980469, + "step_logs": { + "grad_norm": { + "540": 0.4458203911781311, + "541": 0.5136281251907349, + "542": 0.5244190096855164, + "543": 0.5123744606971741, + "544": 0.5148400664329529, + "545": 0.6042338609695435, + "546": 0.592066764831543, + "547": 0.5598015189170837, + "548": 0.5485560894012451, + "549": 0.5597467422485352, + "550": 0.6671572923660278, + "551": 0.5480512976646423, + "552": 0.444033145904541, + "553": 0.5193850994110107, + "554": 0.5741039514541626, + "555": 0.6345900297164917, + "556": 0.5866849422454834, + "557": 0.4788570702075958, + "558": 0.5482646822929382, + "559": 0.6118820309638977, + "560": 0.594347357749939, + "561": 0.606283962726593, + "562": 0.6163011789321899, + "563": 0.5812875032424927, + "564": 0.5867927074432373, + "565": 0.6255936622619629, + "566": 0.6476121544837952, + "567": 0.5879682302474976, + "568": 0.5346601009368896, + "569": 0.5324615836143494, + "570": 0.4745098352432251, + "571": 0.4630752503871918, + "572": 0.511570394039154, + "573": 0.5756678581237793, + "574": 0.5992061495780945, + "575": 0.5642318725585938, + "576": 0.536216139793396, + "577": 0.5043255090713501, + "578": 0.5142707824707031, + "579": 0.533702552318573, + "580": 0.5127367377281189, + "581": 0.48662662506103516, + "582": 0.49175190925598145, + "583": 0.6300363540649414, + "584": 0.5854738354682922, + "585": 0.51082444190979, + "586": 0.4999653995037079, + "587": 0.5188165307044983, + "588": 0.5914244651794434, + "589": 0.616300642490387, + "590": 0.6214520335197449, + "591": 0.6554830074310303, + "592": 0.5870980620384216, + "593": 0.48772522807121277 + }, + "loss": { + "540": 2.3845901489257812, + "541": 2.3942341804504395, + "542": 2.3856544494628906, + "543": 2.3923680782318115, + "544": 2.3843882083892822, + "545": 2.4156227111816406, + "546": 2.442225933074951, + "547": 2.400441884994507, + "548": 2.3967676162719727, + "549": 2.380504608154297, + "550": 2.417879581451416, + "551": 2.419123649597168, + "552": 2.3917505741119385, + "553": 2.3664190769195557, + "554": 2.411536693572998, + "555": 2.4109506607055664, + "556": 2.4361066818237305, + "557": 2.3568806648254395, + "558": 2.3607475757598877, + "559": 2.4009745121002197, + "560": 2.4093422889709473, + "561": 2.375514268875122, + "562": 2.42792010307312, + "563": 2.419508457183838, + "564": 2.3964576721191406, + "565": 2.3959453105926514, + "566": 2.4302735328674316, + "567": 2.404489040374756, + "568": 2.3777198791503906, + "569": 2.3871707916259766, + "570": 2.4041543006896973, + "571": 2.3582606315612793, + "572": 2.376455307006836, + "573": 2.3530220985412598, + "574": 2.389054536819458, + "575": 2.4047579765319824, + "576": 2.354835033416748, + "577": 2.3601691722869873, + "578": 2.368180274963379, + "579": 2.3701419830322266, + "580": 2.387045383453369, + "581": 2.3608124256134033, + "582": 2.345092296600342, + "583": 2.3491830825805664, + "584": 2.42059326171875, + "585": 2.3626253604888916, + "586": 2.37371826171875, + "587": 2.3939995765686035, + "588": 2.3850338459014893, + "589": 2.391475200653076, + "590": 2.389573097229004, + "591": 2.4043402671813965, + "592": 2.387998580932617, + "593": 2.3460845947265625 + }, + "lr": { + "540": 1.0, + "541": 1.0, + "542": 1.0, + "543": 1.0, + "544": 1.0, + "545": 1.0, + "546": 1.0, + "547": 1.0, + "548": 1.0, + "549": 1.0, + "550": 1.0, + "551": 1.0, + "552": 1.0, + "553": 1.0, + "554": 1.0, + "555": 1.0, + "556": 1.0, + "557": 1.0, + "558": 1.0, + "559": 1.0, + "560": 1.0, + "561": 1.0, + "562": 1.0, + "563": 1.0, + "564": 1.0, + "565": 1.0, + "566": 1.0, + "567": 1.0, + "568": 1.0, + "569": 1.0, + "570": 1.0, + "571": 1.0, + "572": 1.0, + "573": 1.0, + "574": 1.0, + "575": 1.0, + "576": 1.0, + "577": 1.0, + "578": 1.0, + "579": 1.0, + "580": 1.0, + "581": 1.0, + "582": 1.0, + "583": 1.0, + "584": 1.0, + "585": 1.0, + "586": 1.0, + "587": 1.0, + "588": 1.0, + "589": 1.0, + "590": 1.0, + "591": 1.0, + "592": 1.0, + "593": 1.0 + } + }, + "step_size_list": [ + 0.959992, + 0.947783, + 0.945502, + 0.947986, + 0.947344, + 0.929739, + 0.933039, + 0.938725, + 0.940933, + 0.938255, + 0.915715, + 0.941548, + 0.960414, + 0.946076, + 0.936034, + 0.922921, + 0.934016, + 0.953611, + 0.940146, + 0.927671, + 0.931699, + 0.928188, + 0.927454, + 0.93473, + 0.932975, + 0.924494, + 0.920567, + 0.932934, + 0.943296, + 0.943946, + 0.955267, + 0.956512, + 0.947812, + 0.934214, + 0.930108, + 0.937916, + 0.942462, + 0.948872, + 0.947114, + 0.943317, + 0.947806, + 0.952242, + 0.950969, + 0.922096, + 0.933877, + 0.947667, + 0.949981, + 0.946774, + 0.931681, + 0.92643, + 0.925232, + 0.917978, + 0.932688, + 0.95175 + ], + "train_epoch_time": 4.844288349151611, + "train_loss": 2.351793902847312, + "train_score": 0.32671493903294185, + "val_loss": 2.3972862930166463, + "val_score": 0.3164197051497743 + }, + { + "epoch": 11, + "grad_norm": 0.6007877588272095, + "learning_rate": 1.0, + "model_norm": 88.26958465576172, + "step_logs": { + "grad_norm": { + "594": 0.5038158893585205, + "595": 0.5298601984977722, + "596": 0.7168340086936951, + "597": 0.9889169335365295, + "598": 0.7741715908050537, + "599": 0.6755338311195374, + "600": 0.5615705251693726, + "601": 0.5231757760047913, + "602": 0.4276902377605438, + "603": 0.4245786964893341, + "604": 0.4568428099155426, + "605": 0.47332683205604553, + "606": 0.494045227766037, + "607": 0.4955570101737976, + "608": 0.46529778838157654, + "609": 0.4713149666786194, + "610": 0.4874584972858429, + "611": 0.45520979166030884, + "612": 0.4652065634727478, + "613": 0.605072021484375, + "614": 0.6871828436851501, + "615": 0.6343675851821899, + "616": 0.6152904033660889, + "617": 0.600509762763977, + "618": 0.6721615791320801, + "619": 0.6511174440383911, + "620": 0.5150911808013916, + "621": 0.4967924952507019, + "622": 0.5662773251533508, + "623": 0.5638839602470398, + "624": 0.6103441119194031, + "625": 0.6011389493942261, + "626": 0.4980040192604065, + "627": 0.4948805570602417, + "628": 0.4760291576385498, + "629": 0.530250608921051, + "630": 0.6468032598495483, + "631": 0.7163217663764954, + "632": 0.590148389339447, + "633": 0.4722534120082855, + "634": 0.4898388683795929, + "635": 0.49531492590904236, + "636": 0.49066048860549927, + "637": 0.5269964337348938, + "638": 0.5501883029937744, + "639": 0.5197085738182068, + "640": 0.5104097127914429, + "641": 0.5442822575569153, + "642": 0.5863537788391113, + "643": 0.5064905881881714, + "644": 0.4422999322414398, + "645": 0.5581861734390259, + "646": 0.6343194842338562, + "647": 0.6007877588272095 + }, + "loss": { + "594": 2.3564181327819824, + "595": 2.349951982498169, + "596": 2.385848045349121, + "597": 2.412327289581299, + "598": 2.422610282897949, + "599": 2.4111275672912598, + "600": 2.3904480934143066, + "601": 2.3819735050201416, + "602": 2.340860605239868, + "603": 2.3423237800598145, + "604": 2.3417506217956543, + "605": 2.3592422008514404, + "606": 2.3720672130584717, + "607": 2.3765432834625244, + "608": 2.356266975402832, + "609": 2.3234362602233887, + "610": 2.3410472869873047, + "611": 2.351180076599121, + "612": 2.3297650814056396, + "613": 2.3659005165100098, + "614": 2.4027676582336426, + "615": 2.397608995437622, + "616": 2.3960280418395996, + "617": 2.3853845596313477, + "618": 2.3702778816223145, + "619": 2.4199793338775635, + "620": 2.347090244293213, + "621": 2.3456039428710938, + "622": 2.355914831161499, + "623": 2.36606502532959, + "624": 2.374274730682373, + "625": 2.388221502304077, + "626": 2.3333864212036133, + "627": 2.3182082176208496, + "628": 2.3259224891662598, + "629": 2.3107547760009766, + "630": 2.3683109283447266, + "631": 2.3690876960754395, + "632": 2.3664283752441406, + "633": 2.349916934967041, + "634": 2.3301212787628174, + "635": 2.344449520111084, + "636": 2.328540563583374, + "637": 2.353421449661255, + "638": 2.3580117225646973, + "639": 2.3385441303253174, + "640": 2.326274871826172, + "641": 2.3408737182617188, + "642": 2.333332061767578, + "643": 2.3740005493164062, + "644": 2.299046754837036, + "645": 2.356168270111084, + "646": 2.3704917430877686, + "647": 2.381502866744995 + }, + "lr": { + "594": 1.0, + "595": 1.0, + "596": 1.0, + "597": 1.0, + "598": 1.0, + "599": 1.0, + "600": 1.0, + "601": 1.0, + "602": 1.0, + "603": 1.0, + "604": 1.0, + "605": 1.0, + "606": 1.0, + "607": 1.0, + "608": 1.0, + "609": 1.0, + "610": 1.0, + "611": 1.0, + "612": 1.0, + "613": 1.0, + "614": 1.0, + "615": 1.0, + "616": 1.0, + "617": 1.0, + "618": 1.0, + "619": 1.0, + "620": 1.0, + "621": 1.0, + "622": 1.0, + "623": 1.0, + "624": 1.0, + "625": 1.0, + "626": 1.0, + "627": 1.0, + "628": 1.0, + "629": 1.0, + "630": 1.0, + "631": 1.0, + "632": 1.0, + "633": 1.0, + "634": 1.0, + "635": 1.0, + "636": 1.0, + "637": 1.0, + "638": 1.0, + "639": 1.0, + "640": 1.0, + "641": 1.0, + "642": 1.0, + "643": 1.0, + "644": 1.0, + "645": 1.0, + "646": 1.0, + "647": 1.0 + } + }, + "step_size_list": [ + 0.948893, + 0.943632, + 0.902782, + 0.831463, + 0.889919, + 0.913548, + 0.938119, + 0.945667, + 0.962398, + 0.962945, + 0.957339, + 0.954671, + 0.951069, + 0.950872, + 0.956076, + 0.954377, + 0.951701, + 0.957793, + 0.955615, + 0.928184, + 0.910526, + 0.922576, + 0.926782, + 0.929724, + 0.912987, + 0.91946, + 0.946503, + 0.95002, + 0.93628, + 0.937038, + 0.927257, + 0.929665, + 0.949538, + 0.949828, + 0.95355, + 0.942651, + 0.918845, + 0.902287, + 0.931457, + 0.954696, + 0.951034, + 0.950279, + 0.950846, + 0.944283, + 0.939685, + 0.945404, + 0.946974, + 0.940489, + 0.931382, + 0.94874, + 0.959191, + 0.937982, + 0.921771, + 0.929557 + ], + "train_epoch_time": 4.843904495239258, + "train_loss": 2.349056749124951, + "train_score": 0.31112020254306166, + "val_loss": 2.388967074701898, + "val_score": 0.30335551849325815 + }, + { + "epoch": 12, + "grad_norm": 0.39419859647750854, + "learning_rate": 1.0, + "model_norm": 88.33519744873047, + "step_logs": { + "grad_norm": { + "648": 0.6511616110801697, + "649": 0.6989766359329224, + "650": 0.5482044219970703, + "651": 0.4628799557685852, + "652": 0.4435892403125763, + "653": 0.4629161059856415, + "654": 0.538483738899231, + "655": 0.5638764500617981, + "656": 0.517505943775177, + "657": 0.4965159595012665, + "658": 0.5503877997398376, + "659": 0.5701238512992859, + "660": 0.5422475934028625, + "661": 0.5240478515625, + "662": 0.5590012669563293, + "663": 0.5614112019538879, + "664": 0.4637320637702942, + "665": 0.41335898637771606, + "666": 0.3972206115722656, + "667": 0.38193708658218384, + "668": 0.41595569252967834, + "669": 0.4244782626628876, + "670": 0.4399038851261139, + "671": 0.4313145875930786, + "672": 0.4001099765300751, + "673": 0.41833269596099854, + "674": 0.44175443053245544, + "675": 0.4228600263595581, + "676": 0.42377057671546936, + "677": 0.39453262090682983, + "678": 0.39062777161598206, + "679": 0.38830944895744324, + "680": 0.44104278087615967, + "681": 0.4659619629383087, + "682": 0.4254288077354431, + "683": 0.39343419671058655, + "684": 0.3838067054748535, + "685": 0.3991822898387909, + "686": 0.42290619015693665, + "687": 0.4018757939338684, + "688": 0.3866080939769745, + "689": 0.405292809009552, + "690": 0.414725661277771, + "691": 0.4198669493198395, + "692": 0.3751433193683624, + "693": 0.3406931161880493, + "694": 0.34855344891548157, + "695": 0.3698401153087616, + "696": 0.41545191407203674, + "697": 0.45390641689300537, + "698": 0.4355085790157318, + "699": 0.42599087953567505, + "700": 0.4184563457965851, + "701": 0.39419859647750854 + }, + "loss": { + "648": 2.341552734375, + "649": 2.3889966011047363, + "650": 2.3494069576263428, + "651": 2.3306591510772705, + "652": 2.3127858638763428, + "653": 2.312307357788086, + "654": 2.296816349029541, + "655": 2.33444881439209, + "656": 2.3216490745544434, + "657": 2.321135997772217, + "658": 2.307474136352539, + "659": 2.322965145111084, + "660": 2.2960028648376465, + "661": 2.335008144378662, + "662": 2.334357500076294, + "663": 2.3267815113067627, + "664": 2.3098506927490234, + "665": 2.300386428833008, + "666": 2.3022618293762207, + "667": 2.258044719696045, + "668": 2.263434410095215, + "669": 2.3040952682495117, + "670": 2.2888686656951904, + "671": 2.2761268615722656, + "672": 2.271550178527832, + "673": 2.3023762702941895, + "674": 2.2836050987243652, + "675": 2.269228935241699, + "676": 2.3071327209472656, + "677": 2.2728633880615234, + "678": 2.274847984313965, + "679": 2.291092872619629, + "680": 2.2709689140319824, + "681": 2.2796080112457275, + "682": 2.2883238792419434, + "683": 2.294191360473633, + "684": 2.2727112770080566, + "685": 2.26954984664917, + "686": 2.2552878856658936, + "687": 2.2681663036346436, + "688": 2.259733200073242, + "689": 2.2676241397857666, + "690": 2.248312473297119, + "691": 2.2806448936462402, + "692": 2.236166000366211, + "693": 2.226914882659912, + "694": 2.247272491455078, + "695": 2.246020793914795, + "696": 2.268394947052002, + "697": 2.252779483795166, + "698": 2.252521276473999, + "699": 2.250152587890625, + "700": 2.258693218231201, + "701": 2.2442920207977295 + }, + "lr": { + "648": 1.0, + "649": 0.9938271604938271, + "650": 0.9876543209876543, + "651": 0.9814814814814815, + "652": 0.9753086419753086, + "653": 0.9691358024691358, + "654": 0.962962962962963, + "655": 0.9567901234567902, + "656": 0.9506172839506173, + "657": 0.9444444444444444, + "658": 0.9382716049382716, + "659": 0.9320987654320988, + "660": 0.9259259259259259, + "661": 0.9197530864197531, + "662": 0.9135802469135803, + "663": 0.9074074074074074, + "664": 0.9012345679012346, + "665": 0.8950617283950617, + "666": 0.8888888888888888, + "667": 0.8827160493827161, + "668": 0.8765432098765432, + "669": 0.8703703703703703, + "670": 0.8641975308641976, + "671": 0.8580246913580247, + "672": 0.8518518518518519, + "673": 0.845679012345679, + "674": 0.8395061728395061, + "675": 0.8333333333333334, + "676": 0.8271604938271605, + "677": 0.8209876543209876, + "678": 0.8148148148148149, + "679": 0.808641975308642, + "680": 0.8024691358024691, + "681": 0.7962962962962963, + "682": 0.7901234567901234, + "683": 0.7839506172839507, + "684": 0.7777777777777778, + "685": 0.7716049382716049, + "686": 0.7654320987654322, + "687": 0.7592592592592593, + "688": 0.7530864197530864, + "689": 0.7469135802469136, + "690": 0.7407407407407407, + "691": 0.7345679012345678, + "692": 0.7283950617283951, + "693": 0.7222222222222222, + "694": 0.7160493827160495, + "695": 0.7098765432098766, + "696": 0.7037037037037037, + "697": 0.6975308641975309, + "698": 0.691358024691358, + "699": 0.6851851851851851, + "700": 0.6790123456790124, + "701": 0.6728395061728395 + } + }, + "step_size_list": [ + 0.916976, + 0.902148, + 0.928972, + 0.939114, + 0.936455, + 0.927485, + 0.907783, + 0.898261, + 0.901205, + 0.899338, + 0.883837, + 0.875036, + 0.874102, + 0.872559, + 0.860937, + 0.854869, + 0.864948, + 0.866266, + 0.862614, + 0.858245, + 0.848129, + 0.841725, + 0.833739, + 0.828958, + 0.827027, + 0.819345, + 0.810436, + 0.806843, + 0.801363, + 0.798539, + 0.79314, + 0.787682, + 0.775807, + 0.767203, + 0.766183, + 0.763752, + 0.758655, + 0.751255, + 0.742885, + 0.739276, + 0.734786, + 0.72724, + 0.720331, + 0.714289, + 0.712074, + 0.70888, + 0.702453, + 0.694857, + 0.685355, + 0.67597, + 0.671804, + 0.666763, + 0.661599, + 0.657524 + ], + "train_epoch_time": 4.844761371612549, + "train_loss": 2.247865048576804, + "train_score": 0.345256456326549, + "val_loss": 2.29649863998597, + "val_score": 0.333964194105906 + }, + { + "epoch": 13, + "grad_norm": 0.21998971700668335, + "learning_rate": 0.6666666666666667, + "model_norm": 88.36693572998047, + "step_logs": { + "grad_norm": { + "702": 0.3778780400753021, + "703": 0.3608350455760956, + "704": 0.3651042878627777, + "705": 0.388094037771225, + "706": 0.3618382215499878, + "707": 0.349433958530426, + "708": 0.350179523229599, + "709": 0.3341130018234253, + "710": 0.29819419980049133, + "711": 0.2698521614074707, + "712": 0.2913244962692261, + "713": 0.31975212693214417, + "714": 0.3172418475151062, + "715": 0.29471907019615173, + "716": 0.28547587990760803, + "717": 0.26432570815086365, + "718": 0.2401735931634903, + "719": 0.26239922642707825, + "720": 0.2587120831012726, + "721": 0.27061891555786133, + "722": 0.2909615933895111, + "723": 0.3134424388408661, + "724": 0.3677830398082733, + "725": 0.32742029428482056, + "726": 0.2667600214481354, + "727": 0.2844672203063965, + "728": 0.279303640127182, + "729": 0.2534867525100708, + "730": 0.2552092969417572, + "731": 0.24614571034908295, + "732": 0.23709611594676971, + "733": 0.2705739140510559, + "734": 0.26088449358940125, + "735": 0.24297428131103516, + "736": 0.2223147451877594, + "737": 0.20578642189502716, + "738": 0.22110122442245483, + "739": 0.22143249213695526, + "740": 0.23313593864440918, + "741": 0.22729408740997314, + "742": 0.23405714333057404, + "743": 0.24651159346103668, + "744": 0.22185885906219482, + "745": 0.214141383767128, + "746": 0.22089646756649017, + "747": 0.20220467448234558, + "748": 0.21291328966617584, + "749": 0.21357682347297668, + "750": 0.21467262506484985, + "751": 0.2303503304719925, + "752": 0.21531184017658234, + "753": 0.21045315265655518, + "754": 0.20281724631786346, + "755": 0.21998971700668335 + }, + "loss": { + "702": 2.2505717277526855, + "703": 2.2411627769470215, + "704": 2.238123893737793, + "705": 2.239494562149048, + "706": 2.238781452178955, + "707": 2.2343153953552246, + "708": 2.2369303703308105, + "709": 2.2434890270233154, + "710": 2.239253282546997, + "711": 2.227365493774414, + "712": 2.2049736976623535, + "713": 2.235891342163086, + "714": 2.2205817699432373, + "715": 2.254295825958252, + "716": 2.2185144424438477, + "717": 2.1970832347869873, + "718": 2.2062389850616455, + "719": 2.2300188541412354, + "720": 2.2006869316101074, + "721": 2.2079954147338867, + "722": 2.2378220558166504, + "723": 2.2083661556243896, + "724": 2.2260518074035645, + "725": 2.2498276233673096, + "726": 2.2193684577941895, + "727": 2.2244410514831543, + "728": 2.2064828872680664, + "729": 2.218808889389038, + "730": 2.1951470375061035, + "731": 2.235578775405884, + "732": 2.2016549110412598, + "733": 2.2122249603271484, + "734": 2.2072882652282715, + "735": 2.2308173179626465, + "736": 2.1912527084350586, + "737": 2.1960291862487793, + "738": 2.213735342025757, + "739": 2.222991943359375, + "740": 2.20228910446167, + "741": 2.2345454692840576, + "742": 2.195842742919922, + "743": 2.1963882446289062, + "744": 2.1989965438842773, + "745": 2.1889209747314453, + "746": 2.2223312854766846, + "747": 2.21597957611084, + "748": 2.195054769515991, + "749": 2.1846275329589844, + "750": 2.2157058715820312, + "751": 2.199411630630493, + "752": 2.212005615234375, + "753": 2.203597068786621, + "754": 2.2048442363739014, + "755": 2.2119381427764893 + }, + "lr": { + "702": 0.6666666666666667, + "703": 0.6604938271604939, + "704": 0.654320987654321, + "705": 0.6481481481481481, + "706": 0.6419753086419753, + "707": 0.6358024691358024, + "708": 0.6296296296296297, + "709": 0.6234567901234568, + "710": 0.617283950617284, + "711": 0.6111111111111112, + "712": 0.6049382716049383, + "713": 0.5987654320987654, + "714": 0.5925925925925926, + "715": 0.5864197530864197, + "716": 0.5802469135802469, + "717": 0.5740740740740741, + "718": 0.5679012345679013, + "719": 0.5617283950617284, + "720": 0.5555555555555556, + "721": 0.5493827160493827, + "722": 0.5432098765432098, + "723": 0.537037037037037, + "724": 0.5308641975308642, + "725": 0.5246913580246914, + "726": 0.5185185185185186, + "727": 0.5123456790123457, + "728": 0.5061728395061729, + "729": 0.5, + "730": 0.49382716049382713, + "731": 0.48765432098765427, + "732": 0.4814814814814815, + "733": 0.47530864197530864, + "734": 0.4691358024691358, + "735": 0.4629629629629629, + "736": 0.45679012345679015, + "737": 0.4506172839506173, + "738": 0.4444444444444444, + "739": 0.43827160493827155, + "740": 0.4320987654320988, + "741": 0.42592592592592593, + "742": 0.41975308641975306, + "743": 0.4135802469135802, + "744": 0.40740740740740744, + "745": 0.4012345679012346, + "746": 0.3950617283950617, + "747": 0.38888888888888884, + "748": 0.3827160493827161, + "749": 0.3765432098765432, + "750": 0.37037037037037035, + "751": 0.3641975308641975, + "752": 0.3580246913580247, + "753": 0.35185185185185186, + "754": 0.345679012345679, + "755": 0.3395061728395061 + } + }, + "step_size_list": [ + 0.652859, + 0.64806, + 0.641815, + 0.634323, + 0.630146, + 0.624945, + 0.618948, + 0.613934, + 0.60981, + 0.605067, + 0.597977, + 0.590679, + 0.58474, + 0.579869, + 0.574128, + 0.568881, + 0.563716, + 0.556899, + 0.550901, + 0.544423, + 0.537685, + 0.530697, + 0.522438, + 0.518213, + 0.514244, + 0.507615, + 0.501684, + 0.496406, + 0.490236, + 0.484453, + 0.47854, + 0.4716, + 0.465767, + 0.460144, + 0.454449, + 0.448668, + 0.442274, + 0.436163, + 0.429807, + 0.423839, + 0.417567, + 0.411227, + 0.405558, + 0.399555, + 0.393356, + 0.387499, + 0.38121, + 0.375069, + 0.368949, + 0.362604, + 0.356686, + 0.350612, + 0.344568, + 0.33825 + ], + "train_epoch_time": 4.8431618213653564, + "train_loss": 2.195485944898433, + "train_score": 0.3584794207659137, + "val_loss": 2.256491680232857, + "val_score": 0.34196505412442263 + }, + { + "epoch": 14, + "grad_norm": 0.17988839745521545, + "learning_rate": 0.33333333333333337, + "model_norm": 88.37663269042969, + "step_logs": { + "grad_norm": { + "756": 0.1821930855512619, + "757": 0.19538429379463196, + "758": 0.20732107758522034, + "759": 0.20958596467971802, + "760": 0.2063555270433426, + "761": 0.20376798510551453, + "762": 0.20231837034225464, + "763": 0.21082890033721924, + "764": 0.22145602107048035, + "765": 0.2254459261894226, + "766": 0.20370836555957794, + "767": 0.19688469171524048, + "768": 0.2070452868938446, + "769": 0.20759566128253937, + "770": 0.22178387641906738, + "771": 0.19839537143707275, + "772": 0.18363282084465027, + "773": 0.18331368267536163, + "774": 0.1851062923669815, + "775": 0.19360743463039398, + "776": 0.20063072443008423, + "777": 0.1836511492729187, + "778": 0.18879574537277222, + "779": 0.20281025767326355, + "780": 0.19790515303611755, + "781": 0.2011026293039322, + "782": 0.19825921952724457, + "783": 0.1800878793001175, + "784": 0.1839653104543686, + "785": 0.20201678574085236, + "786": 0.18508686125278473, + "787": 0.18991634249687195, + "788": 0.19692765176296234, + "789": 0.18821491301059723, + "790": 0.19337418675422668, + "791": 0.19300681352615356, + "792": 0.18469208478927612, + "793": 0.18032197654247284, + "794": 0.17788352072238922, + "795": 0.1759253442287445, + "796": 0.18745002150535583, + "797": 0.18053771555423737, + "798": 0.1818985641002655, + "799": 0.18222616612911224, + "800": 0.19171017408370972, + "801": 0.1934817135334015, + "802": 0.16393093764781952, + "803": 0.1889495998620987, + "804": 0.17868955433368683, + "805": 0.1914677768945694, + "806": 0.1868775188922882, + "807": 0.18818259239196777, + "808": 0.17567041516304016, + "809": 0.17988839745521545 + }, + "loss": { + "756": 2.2010631561279297, + "757": 2.209050416946411, + "758": 2.1987504959106445, + "759": 2.207879066467285, + "760": 2.1938045024871826, + "761": 2.203763246536255, + "762": 2.1959850788116455, + "763": 2.2000350952148438, + "764": 2.1638104915618896, + "765": 2.1800477504730225, + "766": 2.1980581283569336, + "767": 2.1998472213745117, + "768": 2.2184081077575684, + "769": 2.171358108520508, + "770": 2.228346586227417, + "771": 2.169013023376465, + "772": 2.1921329498291016, + "773": 2.18513822555542, + "774": 2.1895694732666016, + "775": 2.173962354660034, + "776": 2.1969332695007324, + "777": 2.1867828369140625, + "778": 2.186495780944824, + "779": 2.2036492824554443, + "780": 2.190216302871704, + "781": 2.156318187713623, + "782": 2.1985085010528564, + "783": 2.160374164581299, + "784": 2.2032132148742676, + "785": 2.202622890472412, + "786": 2.1816070079803467, + "787": 2.1931629180908203, + "788": 2.1984469890594482, + "789": 2.152705669403076, + "790": 2.1666464805603027, + "791": 2.200831413269043, + "792": 2.2016005516052246, + "793": 2.1729722023010254, + "794": 2.1898653507232666, + "795": 2.177778720855713, + "796": 2.190587043762207, + "797": 2.1497602462768555, + "798": 2.182061195373535, + "799": 2.1854350566864014, + "800": 2.172102451324463, + "801": 2.1652748584747314, + "802": 2.194276809692383, + "803": 2.1874892711639404, + "804": 2.2078864574432373, + "805": 2.201608657836914, + "806": 2.193455696105957, + "807": 2.1880478858947754, + "808": 2.1807518005371094, + "809": 2.1754772663116455 + }, + "lr": { + "756": 0.33333333333333337, + "757": 0.3271604938271605, + "758": 0.32098765432098764, + "759": 0.31481481481481477, + "760": 0.308641975308642, + "761": 0.30246913580246915, + "762": 0.2962962962962963, + "763": 0.2901234567901234, + "764": 0.28395061728395066, + "765": 0.2777777777777778, + "766": 0.2716049382716049, + "767": 0.26543209876543206, + "768": 0.2592592592592593, + "769": 0.25308641975308643, + "770": 0.24691358024691357, + "771": 0.2407407407407407, + "772": 0.23456790123456794, + "773": 0.22839506172839508, + "774": 0.2222222222222222, + "775": 0.21604938271604934, + "776": 0.2098765432098766, + "777": 0.20370370370370372, + "778": 0.19753086419753085, + "779": 0.191358024691358, + "780": 0.18518518518518523, + "781": 0.17901234567901236, + "782": 0.1728395061728395, + "783": 0.16666666666666663, + "784": 0.16049382716049387, + "785": 0.154320987654321, + "786": 0.14814814814814814, + "787": 0.14197530864197527, + "788": 0.13580246913580252, + "789": 0.12962962962962965, + "790": 0.12345679012345678, + "791": 0.11728395061728392, + "792": 0.11111111111111116, + "793": 0.1049382716049383, + "794": 0.09876543209876543, + "795": 0.09259259259259256, + "796": 0.0864197530864198, + "797": 0.08024691358024694, + "798": 0.07407407407407407, + "799": 0.0679012345679012, + "800": 0.06172839506172845, + "801": 0.05555555555555558, + "802": 0.04938271604938271, + "803": 0.043209876543209846, + "804": 0.03703703703703709, + "805": 0.030864197530864224, + "806": 0.024691358024691357, + "807": 0.01851851851851849, + "808": 0.012345679012345734, + "809": 0.006172839506172867 + } + }, + "step_size_list": [ + 0.332498, + 0.326238, + 0.319984, + 0.313832, + 0.30772, + 0.30161, + 0.29548, + 0.289276, + 0.28304, + 0.276881, + 0.27091, + 0.264813, + 0.258611, + 0.252452, + 0.246243, + 0.240216, + 0.234145, + 0.227995, + 0.221837, + 0.215648, + 0.209474, + 0.203384, + 0.197213, + 0.191017, + 0.184879, + 0.178712, + 0.172573, + 0.166458, + 0.160296, + 0.154101, + 0.147976, + 0.14181, + 0.13564, + 0.129492, + 0.123325, + 0.117168, + 0.111016, + 0.104856, + 0.098695, + 0.0925317, + 0.0863599, + 0.0801981, + 0.0740325, + 0.0678662, + 0.0616962, + 0.0555289, + 0.0493678, + 0.0431946, + 0.0370271, + 0.0308563, + 0.0246865, + 0.0185157, + 0.0123446, + 0.00617256 + ], + "train_epoch_time": 4.844646692276001, + "train_loss": 2.182451911978263, + "train_score": 0.3627376254439525, + "val_loss": 2.247394614323683, + "val_score": 0.3452479188874723 + } + ], + "summary": { + "data_parallel": "false", + "end_time": "2025-12-03 10:40:37.758976", + "final_model_norm": 88.37663269042969, + "init_model_norm": 87.41546630859375, + "input_dim": [ + 256 + ], + "num_batches_per_epoch": 54, + "num_workers": 0, + "output_dim": [ + 256 + ], + "start_time": "2025-12-03 10:38:56.130617", + "step_scheduler_on_epoch": false + } + } +] \ No newline at end of file From 453fca1e7ba4a5e8309f890bae67605338a285d5 Mon Sep 17 00:00:00 2001 From: fabian-sp Date: Tue, 6 Jan 2026 09:40:01 +0100 Subject: [PATCH 6/6] remove tracked files --- configs/lr-stability/cifar10_resnet20-2.json | 12 - configs/lr-stability/cifar10_resnet20-3.json | 12 - configs/lr-stability/cifar10_resnet20-4.json | 13 - configs/lr-stability/cifar10_resnet20.json | 12 - configs/lr-stability/dna_logreg.json | 14 - configs/lr-stability/linreg_v1.json | 15 - configs/lr-stability/linreg_v2.json | 15 - configs/lr-stability/linreg_v3.json | 15 - configs/lr-stability/linreg_v4.json | 15 - configs/lr-stability/shakespeare-2.json | 12 - configs/lr-stability/shakespeare-3.json | 12 - configs/lr-stability/shakespeare.json | 12 - output/lr-stability/shakespeare-2.json | 43214 ----------------- output/lr-stability/shakespeare-3.json | 43214 ----------------- output/lr-stability/shakespeare.json | 33134 ------------- 15 files changed, 119721 deletions(-) delete mode 100644 configs/lr-stability/cifar10_resnet20-2.json delete mode 100644 configs/lr-stability/cifar10_resnet20-3.json delete mode 100644 configs/lr-stability/cifar10_resnet20-4.json delete mode 100644 configs/lr-stability/cifar10_resnet20.json delete mode 100644 configs/lr-stability/dna_logreg.json delete mode 100644 configs/lr-stability/linreg_v1.json delete mode 100644 configs/lr-stability/linreg_v2.json delete mode 100644 configs/lr-stability/linreg_v3.json delete mode 100644 configs/lr-stability/linreg_v4.json delete mode 100644 configs/lr-stability/shakespeare-2.json delete mode 100644 configs/lr-stability/shakespeare-3.json delete mode 100644 configs/lr-stability/shakespeare.json delete mode 100644 output/lr-stability/shakespeare-2.json delete mode 100644 output/lr-stability/shakespeare-3.json delete mode 100644 output/lr-stability/shakespeare.json diff --git a/configs/lr-stability/cifar10_resnet20-2.json b/configs/lr-stability/cifar10_resnet20-2.json deleted file mode 100644 index 4e45796..0000000 --- a/configs/lr-stability/cifar10_resnet20-2.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dataset": "cifar10", - "model": "resnet20", - "model_kwargs": {"batch_norm": true}, - "loss_func": "cross_entropy", - "score_func": "cross_entropy_accuracy", - "opt": [{"name": "prox-sps", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant"} - ], - "batch_size": 128, - "max_epoch": 20, - "n_runs": 3 - } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20-3.json b/configs/lr-stability/cifar10_resnet20-3.json deleted file mode 100644 index d285025..0000000 --- a/configs/lr-stability/cifar10_resnet20-3.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dataset": "cifar10", - "model": "resnet20", - "model_kwargs": {"batch_norm": true}, - "loss_func": "cross_entropy", - "score_func": "cross_entropy_accuracy", - "opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant", "warmup_steps": 100, "stepwise_schedule": true} - ], - "batch_size": 128, - "max_epoch": 20, - "n_runs": 3 - } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20-4.json b/configs/lr-stability/cifar10_resnet20-4.json deleted file mode 100644 index d119b3d..0000000 --- a/configs/lr-stability/cifar10_resnet20-4.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "dataset": "cifar10", - "model": "resnet20", - "model_kwargs": {"batch_norm": true}, - "loss_func": "cross_entropy", - "score_func": "cross_entropy_accuracy", - "opt": [{"name": "nle", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, - {"name": "ngn", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"} - ], - "batch_size": 128, - "max_epoch": 20, - "n_runs": 3 - } \ No newline at end of file diff --git a/configs/lr-stability/cifar10_resnet20.json b/configs/lr-stability/cifar10_resnet20.json deleted file mode 100644 index 669f19f..0000000 --- a/configs/lr-stability/cifar10_resnet20.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dataset": "cifar10", - "model": "resnet20", - "model_kwargs": {"batch_norm": true}, - "loss_func": "cross_entropy", - "score_func": "cross_entropy_accuracy", - "opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "weight_decay": 0, "lr_schedule": "constant"} - ], - "batch_size": 128, - "max_epoch": 20, - "n_runs": 3 - } \ No newline at end of file diff --git a/configs/lr-stability/dna_logreg.json b/configs/lr-stability/dna_logreg.json deleted file mode 100644 index 71e8b53..0000000 --- a/configs/lr-stability/dna_logreg.json +++ /dev/null @@ -1,14 +0,0 @@ -{ -"dataset": "dna", -"model": "linear", -"model_kwargs": {"output_size": 3}, -"loss_func": "cross_entropy", -"score_func": "cross_entropy_accuracy", -"opt": [{"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, - {"name": "prox-sps", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant"}, - {"name": "sgd", "lr": [0.01, 0.03162, 0.1, 0.3162, 1, 3.162, 10, 31.62, 100], "lr_schedule": "constant", "warmup_steps": 100, "stepwise_schedule": true} - ], -"batch_size": 16, -"max_epoch": 10, -"n_runs": 3 -} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v1.json b/configs/lr-stability/linreg_v1.json deleted file mode 100644 index b0da316..0000000 --- a/configs/lr-stability/linreg_v1.json +++ /dev/null @@ -1,15 +0,0 @@ -{ -"dataset": "synthetic_linear", -"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 0}, -"model": "linear", -"model_kwargs": {"bias": false}, -"loss_func": "squared", -"score_func": "squared", -"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} - ], -"batch_size": 5, -"max_epoch": 10, -"n_runs": 5 -} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v2.json b/configs/lr-stability/linreg_v2.json deleted file mode 100644 index 1ae5cee..0000000 --- a/configs/lr-stability/linreg_v2.json +++ /dev/null @@ -1,15 +0,0 @@ -{ -"dataset": "synthetic_linear", -"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 1.0}, -"model": "linear", -"model_kwargs": {"bias": false}, -"loss_func": "squared", -"score_func": "squared", -"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} - ], -"batch_size": 5, -"max_epoch": 10, -"n_runs": 5 -} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v3.json b/configs/lr-stability/linreg_v3.json deleted file mode 100644 index 40b1708..0000000 --- a/configs/lr-stability/linreg_v3.json +++ /dev/null @@ -1,15 +0,0 @@ -{ -"dataset": "synthetic_linear", -"dataset_kwargs": {"p": 10, "n_samples": 50}, -"model": "linear", -"model_kwargs": {"bias": false}, -"loss_func": "squared", -"score_func": "squared", -"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant", "lb": -2}, - {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} - ], -"batch_size": 5, -"max_epoch": 10, -"n_runs": 5 -} \ No newline at end of file diff --git a/configs/lr-stability/linreg_v4.json b/configs/lr-stability/linreg_v4.json deleted file mode 100644 index 821bae6..0000000 --- a/configs/lr-stability/linreg_v4.json +++ /dev/null @@ -1,15 +0,0 @@ -{ -"dataset": "synthetic_linear", -"dataset_kwargs": {"p": 10, "n_samples": 50, "noise": 1.0}, -"model": "linear", -"model_kwargs": {"bias": false}, -"loss_func": "squared", -"score_func": "squared", -"opt": [{"name": "sgd", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "prox-sps", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"}, - {"name": "spp", "lr": [0.001, 0.002, 0.005, 0.01, 0.022, 0.046, 0.1, 0.215, 0.464, 1.0, 2.154, 4.642, 10.0, 21.544, 46.416, 100.0], "lr_schedule": "constant"} - ], -"batch_size": 25, -"max_epoch": 50, -"n_runs": 5 -} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare-2.json b/configs/lr-stability/shakespeare-2.json deleted file mode 100644 index b73037e..0000000 --- a/configs/lr-stability/shakespeare-2.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "max_epoch": 15, - "model": "llama", - "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, - "opt": [{"name": "prox-sps", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], - "loss_func": "sequence_cross_entropy", - "score_func": "sequence_cross_entropy_accuracy", - "n_runs": 3 -} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare-3.json b/configs/lr-stability/shakespeare-3.json deleted file mode 100644 index ab85f9e..0000000 --- a/configs/lr-stability/shakespeare-3.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "max_epoch": 15, - "model": "llama", - "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, - "opt": [{"name": "ngn", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], - "loss_func": "sequence_cross_entropy", - "score_func": "sequence_cross_entropy_accuracy", - "n_runs": 3 -} \ No newline at end of file diff --git a/configs/lr-stability/shakespeare.json b/configs/lr-stability/shakespeare.json deleted file mode 100644 index 6677b80..0000000 --- a/configs/lr-stability/shakespeare.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "max_epoch": 15, - "model": "llama", - "model_kwargs": {"vocab_size": 92, "dim": 384, "expand": 4, "n_layers": 6, "n_heads": 6, "mlp": "mlp", "seq_len": 256}, - "opt": [{"name": "sgd", "lr": [0.1, 0.215, 0.464, 1.0], "lr_schedule": "wsd", "warmup_steps": 50, "stepwise_schedule": true}], - "loss_func": "sequence_cross_entropy", - "score_func": "sequence_cross_entropy_accuracy", - "n_runs": 3 -} \ No newline at end of file diff --git a/output/lr-stability/shakespeare-2.json b/output/lr-stability/shakespeare-2.json deleted file mode 100644 index 3dca842..0000000 --- a/output/lr-stability/shakespeare-2.json +++ /dev/null @@ -1,43214 +0,0 @@ -[ - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.595483422279358, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.43165588378906, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 8.304182052612305, - "3": 5.066382884979248, - "4": 3.986539840698242, - "5": 7.368899822235107, - "6": 22.684249877929688, - "7": 8.206059455871582, - "8": 5.990283966064453, - "9": 3.6286511421203613, - "10": 2.7827494144439697, - "11": 4.019216060638428, - "12": 7.367673873901367, - "13": 5.671923637390137, - "14": 8.214241027832031, - "15": 5.449173450469971, - "16": 53.0404167175293, - "17": 5.609607696533203, - "18": 3.5228676795959473, - "19": 11.873223304748535, - "20": 3.7461884021759033, - "21": 56.21821594238281, - "22": 4.051365852355957, - "23": 5.570032119750977, - "24": 6.279479026794434, - "25": 15.953083992004395, - "26": 5.07693338394165, - "27": 8.598682403564453, - "28": 3.695465326309204, - "29": 39.91961669921875, - "30": 5.038580894470215, - "31": 4.063226699829102, - "32": 3.440243721008301, - "33": 3.7905149459838867, - "34": 4.257300853729248, - "35": 3.8865303993225098, - "36": 6.444921493530273, - "37": 3.5574936866760254, - "38": 28.470718383789062, - "39": 2.530970811843872, - "40": 2.914734363555908, - "41": 3.5695884227752686, - "42": 3.06233549118042, - "43": 3.487231492996216, - "44": 2.910644054412842, - "45": 1.67691171169281, - "46": 1.6890980005264282, - "47": 3.128803253173828, - "48": 2.8322150707244873, - "49": 1.7586274147033691, - "50": 1.6818804740905762, - "51": 2.382829189300537, - "52": 2.2293994426727295, - "53": 1.595483422279358 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.94942569732666, - "3": 3.737607002258301, - "4": 3.612809658050537, - "5": 3.569070816040039, - "6": 4.066908836364746, - "7": 4.043568134307861, - "8": 3.549619436264038, - "9": 3.492824077606201, - "10": 3.3548102378845215, - "11": 3.344089984893799, - "12": 3.4439680576324463, - "13": 3.403204917907715, - "14": 3.536405324935913, - "15": 3.2851245403289795, - "16": 4.4294633865356445, - "17": 3.292044162750244, - "18": 3.1227591037750244, - "19": 3.1227288246154785, - "20": 3.1710028648376465, - "21": 4.861227512359619, - "22": 3.012739896774292, - "23": 3.187997341156006, - "24": 3.2275547981262207, - "25": 3.138049602508545, - "26": 3.1043057441711426, - "27": 3.3548502922058105, - "28": 2.914957046508789, - "29": 3.808320999145508, - "30": 3.0150039196014404, - "31": 3.117154121398926, - "32": 2.905519485473633, - "33": 2.892486810684204, - "34": 2.9366321563720703, - "35": 3.109389305114746, - "36": 2.8898768424987793, - "37": 2.8421175479888916, - "38": 6.3293070793151855, - "39": 2.8310492038726807, - "40": 2.8002824783325195, - "41": 2.942443370819092, - "42": 2.8423304557800293, - "43": 2.849879503250122, - "44": 2.9626104831695557, - "45": 2.724215507507324, - "46": 2.678582191467285, - "47": 2.75137996673584, - "48": 2.9589619636535645, - "49": 2.7027854919433594, - "50": 2.6485424041748047, - "51": 2.6648268699645996, - "52": 2.7782740592956543, - "53": 2.632413387298584 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 0.00839976, - 0.00857642, - 0.0572717, - 0.145612, - 0.227328, - 0.0657279, - 0.00790343, - 0.0600476, - 0.0989207, - 0.265269, - 0.433231, - 0.207012, - 0.0634451, - 0.105786, - 0.0524116, - 0.110634, - 0.00157448, - 0.104617, - 0.25162, - 0.0221512, - 0.225953, - 0.00153813, - 0.183552, - 0.102755, - 0.0818515, - 0.0123302, - 0.120437, - 0.0453742, - 0.213449, - 0.0023898, - 0.11876, - 0.188806, - 0.245497, - 0.201314, - 0.162025, - 0.20585, - 0.0695735, - 0.224571, - 0.00780835, - 0.44195, - 0.329613, - 0.230925, - 0.303088, - 0.23435, - 0.349701, - 0.968771, - 0.938848, - 0.281057, - 0.368882, - 0.873904, - 0.936304, - 0.469335, - 0.558984, - 1.03412 - ], - "train_epoch_time": 5.876681566238403, - "train_loss": 2.619171995480399, - "train_score": 0.25954425208366755, - "val_loss": 2.6478476132919537, - "val_score": 0.25241281593434434 - }, - { - "epoch": 1, - "grad_norm": 1.7122350931167603, - "learning_rate": 0.1, - "model_norm": 87.44861602783203, - "step_logs": { - "grad_norm": { - "54": 1.4994385242462158, - "55": 1.755557656288147, - "56": 2.5974295139312744, - "57": 2.6195151805877686, - "58": 2.341726779937744, - "59": 2.764500856399536, - "60": 2.719050168991089, - "61": 2.9073119163513184, - "62": 2.706298351287842, - "63": 2.1449856758117676, - "64": 1.3226447105407715, - "65": 1.3035261631011963, - "66": 1.8220620155334473, - "67": 2.0108823776245117, - "68": 2.291959524154663, - "69": 1.9728983640670776, - "70": 1.2969900369644165, - "71": 1.5286873579025269, - "72": 2.0283780097961426, - "73": 2.1360676288604736, - "74": 2.1657683849334717, - "75": 1.932334065437317, - "76": 1.7503284215927124, - "77": 2.015733242034912, - "78": 2.3730719089508057, - "79": 2.149228572845459, - "80": 1.7569128274917603, - "81": 1.89297354221344, - "82": 2.1936445236206055, - "83": 2.1091713905334473, - "84": 1.765440821647644, - "85": 1.8004003763198853, - "86": 1.8538652658462524, - "87": 1.8449907302856445, - "88": 1.8676313161849976, - "89": 1.710210919380188, - "90": 1.4306646585464478, - "91": 1.5061683654785156, - "92": 1.8027716875076294, - "93": 1.7483417987823486, - "94": 1.6187306642532349, - "95": 1.6917165517807007, - "96": 1.9544671773910522, - "97": 1.7831960916519165, - "98": 1.5010552406311035, - "99": 1.5395915508270264, - "100": 1.8150750398635864, - "101": 1.779003381729126, - "102": 1.6438370943069458, - "103": 1.6328306198120117, - "104": 1.665625810623169, - "105": 1.6750128269195557, - "106": 1.617784857749939, - "107": 1.7122350931167603 - }, - "loss": { - "54": 2.612858533859253, - "55": 2.6396708488464355, - "56": 2.6654889583587646, - "57": 2.8374953269958496, - "58": 2.677858352661133, - "59": 2.7912232875823975, - "60": 2.718667984008789, - "61": 2.8346023559570312, - "62": 2.71724271774292, - "63": 2.7834033966064453, - "64": 2.5961713790893555, - "65": 2.5818214416503906, - "66": 2.6074576377868652, - "67": 2.678248643875122, - "68": 2.6233010292053223, - "69": 2.725496768951416, - "70": 2.5729541778564453, - "71": 2.5963242053985596, - "72": 2.633251428604126, - "73": 2.642148971557617, - "74": 2.6354236602783203, - "75": 2.683126449584961, - "76": 2.57817006111145, - "77": 2.649172306060791, - "78": 2.6319797039031982, - "79": 2.7170534133911133, - "80": 2.5920491218566895, - "81": 2.6264402866363525, - "82": 2.59210205078125, - "83": 2.6878161430358887, - "84": 2.592118263244629, - "85": 2.6103262901306152, - "86": 2.588736057281494, - "87": 2.6495399475097656, - "88": 2.565886974334717, - "89": 2.646350860595703, - "90": 2.56858491897583, - "91": 2.581890106201172, - "92": 2.570685386657715, - "93": 2.620863437652588, - "94": 2.5663914680480957, - "95": 2.6002283096313477, - "96": 2.569908380508423, - "97": 2.6125473976135254, - "98": 2.5333504676818848, - "99": 2.573561191558838, - "100": 2.541639804840088, - "101": 2.618934154510498, - "102": 2.5519614219665527, - "103": 2.6009321212768555, - "104": 2.5399415493011475, - "105": 2.5924625396728516, - "106": 2.528653144836426, - "107": 2.614945411682129 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 1.16214, - 0.856485, - 0.395084, - 0.413517, - 0.488332, - 0.365226, - 0.367724, - 0.335358, - 0.371003, - 0.604961, - 1.48404, - 1.51945, - 0.7854, - 0.662335, - 0.499384, - 0.700223, - 1.52953, - 1.11102, - 0.640021, - 0.579065, - 0.561858, - 0.718583, - 0.841536, - 0.651995, - 0.46737, - 0.588211, - 0.839736, - 0.732957, - 0.538666, - 0.604193, - 0.831665, - 0.805298, - 0.753237, - 0.778363, - 0.735623, - 0.904791, - 1.25493, - 1.13813, - 0.790984, - 0.857416, - 0.979431, - 0.908566, - 0.672761, - 0.82161, - 1.12435, - 1.08573, - 0.77148, - 0.827506, - 0.944401, - 0.975544, - 0.915522, - 0.924009, - 0.966157, - 0.89194 - ], - "train_epoch_time": 5.049004793167114, - "train_loss": 2.554169839287078, - "train_score": 0.2546673241869107, - "val_loss": 2.5931542007301758, - "val_score": 0.25052920456047584 - }, - { - "epoch": 2, - "grad_norm": 1.5213649272918701, - "learning_rate": 0.1, - "model_norm": 87.4622573852539, - "step_logs": { - "grad_norm": { - "108": 1.7698161602020264, - "109": 1.6863858699798584, - "110": 1.4905215501785278, - "111": 1.484915852546692, - "112": 1.5513427257537842, - "113": 1.5944156646728516, - "114": 1.670814871788025, - "115": 1.6587823629379272, - "116": 1.5405527353286743, - "117": 1.5328104496002197, - "118": 1.5365923643112183, - "119": 1.8154313564300537, - "120": 2.2267980575561523, - "121": 1.9328863620758057, - "122": 1.616337537765503, - "123": 1.5008413791656494, - "124": 1.6445186138153076, - "125": 1.6981024742126465, - "126": 1.7186658382415771, - "127": 1.6501388549804688, - "128": 1.4911203384399414, - "129": 1.481259822845459, - "130": 1.7012332677841187, - "131": 1.6902801990509033, - "132": 1.4128209352493286, - "133": 1.392470359802246, - "134": 1.4971853494644165, - "135": 1.559562087059021, - "136": 1.5591846704483032, - "137": 1.5602431297302246, - "138": 1.621742844581604, - "139": 1.6631306409835815, - "140": 1.6960033178329468, - "141": 1.6355992555618286, - "142": 1.5226027965545654, - "143": 1.499666452407837, - "144": 1.4234119653701782, - "145": 1.4620394706726074, - "146": 1.5172392129898071, - "147": 1.505304217338562, - "148": 1.5338846445083618, - "149": 1.565070629119873, - "150": 1.707519292831421, - "151": 1.6376944780349731, - "152": 1.4706141948699951, - "153": 1.5124589204788208, - "154": 1.70602285861969, - "155": 1.7617076635360718, - "156": 1.6470904350280762, - "157": 1.5048695802688599, - "158": 1.423397183418274, - "159": 1.3689265251159668, - "160": 1.4615544080734253, - "161": 1.5213649272918701 - }, - "loss": { - "108": 2.5415701866149902, - "109": 2.580026149749756, - "110": 2.5348711013793945, - "111": 2.571901321411133, - "112": 2.5279598236083984, - "113": 2.5806150436401367, - "114": 2.53316068649292, - "115": 2.5662384033203125, - "116": 2.538095474243164, - "117": 2.5629286766052246, - "118": 2.542416572570801, - "119": 2.5777578353881836, - "120": 2.5772297382354736, - "121": 2.6121902465820312, - "122": 2.5644259452819824, - "123": 2.5671091079711914, - "124": 2.541909694671631, - "125": 2.562094211578369, - "126": 2.5294177532196045, - "127": 2.5802440643310547, - "128": 2.5231287479400635, - "129": 2.5453009605407715, - "130": 2.5176773071289062, - "131": 2.5860157012939453, - "132": 2.5463199615478516, - "133": 2.5190093517303467, - "134": 2.5388855934143066, - "135": 2.557464122772217, - "136": 2.550321578979492, - "137": 2.535327911376953, - "138": 2.535480260848999, - "139": 2.546576976776123, - "140": 2.5351390838623047, - "141": 2.5489518642425537, - "142": 2.511795997619629, - "143": 2.5451183319091797, - "144": 2.538144588470459, - "145": 2.524411201477051, - "146": 2.5095009803771973, - "147": 2.537546157836914, - "148": 2.5233800411224365, - "149": 2.5372838973999023, - "150": 2.5321202278137207, - "151": 2.5746989250183105, - "152": 2.5178050994873047, - "153": 2.534905433654785, - "154": 2.538670778274536, - "155": 2.5623373985290527, - "156": 2.530207633972168, - "157": 2.5415940284729004, - "158": 2.5066914558410645, - "159": 2.528236150741577, - "160": 2.4945034980773926, - "161": 2.5192644596099854 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 0.81142, - 0.907215, - 1.14098, - 1.16641, - 1.0504, - 1.01513, - 0.907415, - 0.932649, - 1.06944, - 1.09084, - 1.07679, - 0.782136, - 0.519746, - 0.699185, - 0.981581, - 1.13966, - 0.939902, - 0.88852, - 0.856323, - 0.947589, - 1.13479, - 1.16005, - 0.869906, - 0.905136, - 1.27567, - 1.29915, - 1.13264, - 1.05149, - 1.04906, - 1.04148, - 0.964043, - 0.92067, - 0.88135, - 0.952813, - 1.08346, - 1.13167, - 1.25272, - 1.18098, - 1.09013, - 1.11986, - 1.0725, - 1.03586, - 0.868466, - 0.959977, - 1.16419, - 1.10814, - 0.872241, - 0.825598, - 0.932656, - 1.1223, - 1.23722, - 1.34914, - 1.16776, - 1.08845 - ], - "train_epoch_time": 5.04582142829895, - "train_loss": 2.5082411634699686, - "train_score": 0.26282841634203064, - "val_loss": 2.5498671890263167, - "val_score": 0.2559916757209703 - }, - { - "epoch": 3, - "grad_norm": 1.4472090005874634, - "learning_rate": 0.1, - "model_norm": 87.4747314453125, - "step_logs": { - "grad_norm": { - "162": 1.4126492738723755, - "163": 1.2858518362045288, - "164": 1.2448664903640747, - "165": 1.3287618160247803, - "166": 1.4017678499221802, - "167": 1.5514181852340698, - "168": 1.6390444040298462, - "169": 1.5579659938812256, - "170": 1.422958493232727, - "171": 1.3877211809158325, - "172": 1.399069905281067, - "173": 1.4332078695297241, - "174": 1.4144973754882812, - "175": 1.3887548446655273, - "176": 1.3306553363800049, - "177": 1.2968372106552124, - "178": 1.2731873989105225, - "179": 1.2861305475234985, - "180": 1.5044138431549072, - "181": 1.6309152841567993, - "182": 1.5537265539169312, - "183": 1.4571703672409058, - "184": 1.4838138818740845, - "185": 1.5193510055541992, - "186": 1.525577425956726, - "187": 1.5352483987808228, - "188": 1.338010549545288, - "189": 1.2651041746139526, - "190": 1.3229663372039795, - "191": 1.3557943105697632, - "192": 1.3098673820495605, - "193": 1.3461573123931885, - "194": 1.3851524591445923, - "195": 1.4443306922912598, - "196": 1.5883374214172363, - "197": 1.6625257730484009, - "198": 1.7239654064178467, - "199": 1.640328049659729, - "200": 1.651151418685913, - "201": 1.4805493354797363, - "202": 1.3065520524978638, - "203": 1.4018487930297852, - "204": 1.5423328876495361, - "205": 1.6838067770004272, - "206": 1.9795117378234863, - "207": 1.6968928575515747, - "208": 1.1992686986923218, - "209": 1.0217435359954834, - "210": 1.064129114151001, - "211": 1.1120821237564087, - "212": 1.2089051008224487, - "213": 1.3125848770141602, - "214": 1.4519954919815063, - "215": 1.4472090005874634 - }, - "loss": { - "162": 2.5166265964508057, - "163": 2.518171787261963, - "164": 2.482665777206421, - "165": 2.5133309364318848, - "166": 2.5067520141601562, - "167": 2.525150775909424, - "168": 2.5074658393859863, - "169": 2.532771587371826, - "170": 2.507209062576294, - "171": 2.5089704990386963, - "172": 2.5103073120117188, - "173": 2.5220656394958496, - "174": 2.490757942199707, - "175": 2.4923863410949707, - "176": 2.505175828933716, - "177": 2.494217872619629, - "178": 2.4662699699401855, - "179": 2.5162346363067627, - "180": 2.5031118392944336, - "181": 2.5442981719970703, - "182": 2.502875804901123, - "183": 2.5245161056518555, - "184": 2.511025905609131, - "185": 2.5248634815216064, - "186": 2.5211057662963867, - "187": 2.5176310539245605, - "188": 2.505277633666992, - "189": 2.512939214706421, - "190": 2.5019590854644775, - "191": 2.4912538528442383, - "192": 2.486813545227051, - "193": 2.4861788749694824, - "194": 2.4946980476379395, - "195": 2.492351531982422, - "196": 2.5079643726348877, - "197": 2.5245511531829834, - "198": 2.4963221549987793, - "199": 2.5263419151306152, - "200": 2.527980327606201, - "201": 2.5093507766723633, - "202": 2.489234685897827, - "203": 2.4962615966796875, - "204": 2.505506992340088, - "205": 2.530435085296631, - "206": 2.530974864959717, - "207": 2.546389579772949, - "208": 2.4833383560180664, - "209": 2.4681429862976074, - "210": 2.463146209716797, - "211": 2.4733996391296387, - "212": 2.4631106853485107, - "213": 2.49599027633667, - "214": 2.499584674835205, - "215": 2.5120723247528076 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 1.2611, - 1.52301, - 1.60204, - 1.42349, - 1.27573, - 1.04913, - 0.93337, - 1.04347, - 1.23824, - 1.30284, - 1.28247, - 1.22783, - 1.24488, - 1.2923, - 1.41484, - 1.48308, - 1.52144, - 1.52118, - 1.10598, - 0.956545, - 1.03679, - 1.18893, - 1.14049, - 1.09376, - 1.08323, - 1.06816, - 1.39938, - 1.57011, - 1.42949, - 1.35528, - 1.4494, - 1.37196, - 1.30024, - 1.19475, - 0.994113, - 0.913371, - 0.839931, - 0.938925, - 0.927257, - 1.14476, - 1.45818, - 1.27025, - 1.05327, - 0.892505, - 0.645909, - 0.884333, - 1.72664, - 2.36421, - 2.17521, - 1.99996, - 1.68539, - 1.44873, - 1.1856, - 1.19942 - ], - "train_epoch_time": 5.043837070465088, - "train_loss": 2.4864724651812824, - "train_score": 0.26887329614145344, - "val_loss": 2.529483837320391, - "val_score": 0.25956156747212505 - }, - { - "epoch": 4, - "grad_norm": 1.6333067417144775, - "learning_rate": 0.1, - "model_norm": 87.48685455322266, - "step_logs": { - "grad_norm": { - "216": 1.4290608167648315, - "217": 1.3120534420013428, - "218": 1.2739180326461792, - "219": 1.3524606227874756, - "220": 1.4517425298690796, - "221": 1.5836673974990845, - "222": 1.5921634435653687, - "223": 1.4788835048675537, - "224": 1.2752031087875366, - "225": 1.182509422302246, - "226": 1.2438864707946777, - "227": 1.4129899740219116, - "228": 1.379360318183899, - "229": 1.234757661819458, - "230": 1.2055987119674683, - "231": 1.3568651676177979, - "232": 1.4966015815734863, - "233": 1.531385898590088, - "234": 1.359263300895691, - "235": 1.1495434045791626, - "236": 1.3062193393707275, - "237": 1.463045358657837, - "238": 1.5421591997146606, - "239": 1.5483026504516602, - "240": 1.4234495162963867, - "241": 1.3835184574127197, - "242": 1.3792067766189575, - "243": 1.365871787071228, - "244": 1.2870206832885742, - "245": 1.3198952674865723, - "246": 1.2530943155288696, - "247": 1.2332710027694702, - "248": 1.1639885902404785, - "249": 1.1980524063110352, - "250": 1.4111249446868896, - "251": 1.5256085395812988, - "252": 1.6563024520874023, - "253": 1.634125828742981, - "254": 1.647292137145996, - "255": 1.6125293970108032, - "256": 1.488387107849121, - "257": 1.505018949508667, - "258": 1.3945235013961792, - "259": 1.3868354558944702, - "260": 1.5193884372711182, - "261": 1.4111015796661377, - "262": 1.33878493309021, - "263": 1.4832243919372559, - "264": 1.6625736951828003, - "265": 1.4743620157241821, - "266": 1.3665927648544312, - "267": 1.350955605506897, - "268": 1.4537014961242676, - "269": 1.6333067417144775 - }, - "loss": { - "216": 2.498208522796631, - "217": 2.5084166526794434, - "218": 2.468519687652588, - "219": 2.4700491428375244, - "220": 2.4857678413391113, - "221": 2.507571220397949, - "222": 2.4933125972747803, - "223": 2.5257890224456787, - "224": 2.4855804443359375, - "225": 2.4715840816497803, - "226": 2.481645107269287, - "227": 2.4919357299804688, - "228": 2.4991374015808105, - "229": 2.4684383869171143, - "230": 2.467231273651123, - "231": 2.4887256622314453, - "232": 2.5055744647979736, - "233": 2.4857892990112305, - "234": 2.510507822036743, - "235": 2.4743242263793945, - "236": 2.4503369331359863, - "237": 2.500880002975464, - "238": 2.4847540855407715, - "239": 2.491593360900879, - "240": 2.477583885192871, - "241": 2.4990100860595703, - "242": 2.469034433364868, - "243": 2.484649896621704, - "244": 2.465479850769043, - "245": 2.483989953994751, - "246": 2.451329231262207, - "247": 2.469083070755005, - "248": 2.4642605781555176, - "249": 2.4573283195495605, - "250": 2.4803404808044434, - "251": 2.485412359237671, - "252": 2.482370615005493, - "253": 2.516460657119751, - "254": 2.4841811656951904, - "255": 2.5117170810699463, - "256": 2.4905197620391846, - "257": 2.499704360961914, - "258": 2.460371255874634, - "259": 2.47316837310791, - "260": 2.477318048477173, - "261": 2.4979257583618164, - "262": 2.482491970062256, - "263": 2.4682559967041016, - "264": 2.4702749252319336, - "265": 2.4952211380004883, - "266": 2.4528074264526367, - "267": 2.4531466960906982, - "268": 2.457468032836914, - "269": 2.48698091506958 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 1.22328, - 1.45712, - 1.52109, - 1.35038, - 1.17946, - 0.999828, - 0.983561, - 1.15486, - 1.52851, - 1.76753, - 1.6039, - 1.24813, - 1.31351, - 1.61904, - 1.69748, - 1.35177, - 1.11865, - 1.05997, - 1.3588, - 1.87243, - 1.43613, - 1.16836, - 1.04478, - 1.03936, - 1.22277, - 1.30556, - 1.29798, - 1.33182, - 1.48844, - 1.42584, - 1.56111, - 1.62337, - 1.81882, - 1.71203, - 1.2456, - 1.06785, - 0.904872, - 0.942365, - 0.915466, - 0.965952, - 1.12424, - 1.10358, - 1.26517, - 1.28589, - 1.07311, - 1.25448, - 1.38505, - 1.12196, - 0.893683, - 1.14789, - 1.31336, - 1.34413, - 1.16289, - 0.93226 - ], - "train_epoch_time": 5.047112941741943, - "train_loss": 2.4842419149543837, - "train_score": 0.25807142210656636, - "val_loss": 2.520354661821087, - "val_score": 0.2501973307912851 - }, - { - "epoch": 5, - "grad_norm": 1.5142675638198853, - "learning_rate": 0.1, - "model_norm": 87.50237274169922, - "step_logs": { - "grad_norm": { - "270": 1.6810544729232788, - "271": 1.5603219270706177, - "272": 1.3195146322250366, - "273": 1.1578401327133179, - "274": 1.2478570938110352, - "275": 1.3786864280700684, - "276": 1.5500452518463135, - "277": 1.421463131904602, - "278": 1.341933250427246, - "279": 1.4265083074569702, - "280": 1.3541568517684937, - "281": 1.3280987739562988, - "282": 1.4996294975280762, - "283": 1.4291025400161743, - "284": 1.2934948205947876, - "285": 1.3371226787567139, - "286": 1.463035225868225, - "287": 1.5002892017364502, - "288": 1.5061780214309692, - "289": 1.704079508781433, - "290": 1.914076566696167, - "291": 1.7685819864273071, - "292": 1.3837261199951172, - "293": 1.202583909034729, - "294": 1.2350960969924927, - "295": 1.3258447647094727, - "296": 1.4913440942764282, - "297": 1.6742933988571167, - "298": 1.4741325378417969, - "299": 1.2376271486282349, - "300": 1.3532912731170654, - "301": 1.407514214515686, - "302": 1.258879542350769, - "303": 1.1456117630004883, - "304": 1.2588986158370972, - "305": 1.4454586505889893, - "306": 1.8084183931350708, - "307": 1.835208773612976, - "308": 1.47889244556427, - "309": 1.2990059852600098, - "310": 1.4059151411056519, - "311": 1.3239109516143799, - "312": 1.2116364240646362, - "313": 1.2776561975479126, - "314": 1.3373874425888062, - "315": 1.380043625831604, - "316": 1.561672568321228, - "317": 1.5787192583084106, - "318": 1.5495904684066772, - "319": 1.6338167190551758, - "320": 1.7478995323181152, - "321": 1.904791235923767, - "322": 1.7893041372299194, - "323": 1.5142675638198853 - }, - "loss": { - "270": 2.4693410396575928, - "271": 2.474403142929077, - "272": 2.481935977935791, - "273": 2.4520368576049805, - "274": 2.4503657817840576, - "275": 2.452192783355713, - "276": 2.4742250442504883, - "277": 2.4822051525115967, - "278": 2.4493818283081055, - "279": 2.4777278900146484, - "280": 2.456301212310791, - "281": 2.4464101791381836, - "282": 2.4542148113250732, - "283": 2.4563732147216797, - "284": 2.4444618225097656, - "285": 2.4467639923095703, - "286": 2.4570093154907227, - "287": 2.4427075386047363, - "288": 2.4584643840789795, - "289": 2.4551587104797363, - "290": 2.4839529991149902, - "291": 2.4930782318115234, - "292": 2.438288688659668, - "293": 2.4505019187927246, - "294": 2.404829502105713, - "295": 2.4518871307373047, - "296": 2.4218006134033203, - "297": 2.45648193359375, - "298": 2.4494688510894775, - "299": 2.416492462158203, - "300": 2.4349756240844727, - "301": 2.4601359367370605, - "302": 2.423393726348877, - "303": 2.4048194885253906, - "304": 2.4216668605804443, - "305": 2.426604747772217, - "306": 2.4317641258239746, - "307": 2.487464189529419, - "308": 2.4466681480407715, - "309": 2.4164767265319824, - "310": 2.409547805786133, - "311": 2.440345287322998, - "312": 2.389723300933838, - "313": 2.4066061973571777, - "314": 2.394221067428589, - "315": 2.4129481315612793, - "316": 2.428553581237793, - "317": 2.4317996501922607, - "318": 2.4167392253875732, - "319": 2.428300142288208, - "320": 2.4225542545318604, - "321": 2.444823741912842, - "322": 2.4715967178344727, - "323": 2.408566474914551 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 0.873811, - 1.01635, - 1.42548, - 1.82907, - 1.57362, - 1.2901, - 1.02979, - 1.22848, - 1.36017, - 1.2176, - 1.3395, - 1.38697, - 1.0913, - 1.20273, - 1.46101, - 1.36852, - 1.14788, - 1.08523, - 1.08371, - 0.845473, - 0.677992, - 0.79705, - 1.27346, - 1.69443, - 1.57646, - 1.39481, - 1.08889, - 0.876295, - 1.12719, - 1.57763, - 1.32957, - 1.24181, - 1.52917, - 1.83235, - 1.52803, - 1.16142, - 0.743573, - 0.73856, - 1.11867, - 1.43206, - 1.21904, - 1.3923, - 1.62781, - 1.47427, - 1.3386, - 1.26696, - 0.99579, - 0.975704, - 1.00646, - 0.909695, - 0.79294, - 0.673834, - 0.771986, - 1.0504 - ], - "train_epoch_time": 5.054343223571777, - "train_loss": 2.4141224595703386, - "train_score": 0.2681223097969504, - "val_loss": 2.4734828012271537, - "val_score": 0.25829237227324914 - }, - { - "epoch": 6, - "grad_norm": 1.3155577182769775, - "learning_rate": 0.1, - "model_norm": 87.51831817626953, - "step_logs": { - "grad_norm": { - "324": 1.418066382408142, - "325": 1.26797616481781, - "326": 1.3308789730072021, - "327": 1.3658920526504517, - "328": 1.509848952293396, - "329": 1.6191020011901855, - "330": 1.5451252460479736, - "331": 1.455051064491272, - "332": 1.3848249912261963, - "333": 1.2380965948104858, - "334": 1.2768330574035645, - "335": 1.2965487241744995, - "336": 1.532166838645935, - "337": 1.5866661071777344, - "338": 1.4262930154800415, - "339": 1.3868048191070557, - "340": 1.3542104959487915, - "341": 1.3543922901153564, - "342": 1.6262602806091309, - "343": 1.951874852180481, - "344": 2.2120168209075928, - "345": 2.171908140182495, - "346": 1.7381994724273682, - "347": 1.292962908744812, - "348": 1.3616982698440552, - "349": 1.5318586826324463, - "350": 1.463059425354004, - "351": 1.53459632396698, - "352": 1.513016700744629, - "353": 1.4959349632263184, - "354": 1.5368945598602295, - "355": 1.5273686647415161, - "356": 1.4757351875305176, - "357": 1.4687684774398804, - "358": 1.3882992267608643, - "359": 1.2901830673217773, - "360": 1.3259265422821045, - "361": 1.401950478553772, - "362": 1.4129544496536255, - "363": 1.2871674299240112, - "364": 1.2704095840454102, - "365": 1.3863569498062134, - "366": 1.3463984727859497, - "367": 1.3396457433700562, - "368": 1.609025001525879, - "369": 1.774939775466919, - "370": 1.7160730361938477, - "371": 1.55253005027771, - "372": 1.293172836303711, - "373": 1.4918750524520874, - "374": 1.6320565938949585, - "375": 1.781571388244629, - "376": 1.6311960220336914, - "377": 1.3155577182769775 - }, - "loss": { - "324": 2.3906726837158203, - "325": 2.409987449645996, - "326": 2.3870949745178223, - "327": 2.3950319290161133, - "328": 2.412079334259033, - "329": 2.393211841583252, - "330": 2.3995373249053955, - "331": 2.4038217067718506, - "332": 2.3917741775512695, - "333": 2.382768154144287, - "334": 2.362889289855957, - "335": 2.37251615524292, - "336": 2.414001703262329, - "337": 2.415994167327881, - "338": 2.389451026916504, - "339": 2.3947300910949707, - "340": 2.380363941192627, - "341": 2.383960247039795, - "342": 2.397738218307495, - "343": 2.403510093688965, - "344": 2.4402947425842285, - "345": 2.438345432281494, - "346": 2.4622936248779297, - "347": 2.3993618488311768, - "348": 2.4011447429656982, - "349": 2.3996105194091797, - "350": 2.406304121017456, - "351": 2.402252674102783, - "352": 2.4081785678863525, - "353": 2.392700433731079, - "354": 2.405967950820923, - "355": 2.380117893218994, - "356": 2.390638828277588, - "357": 2.381976366043091, - "358": 2.410921573638916, - "359": 2.3644192218780518, - "360": 2.3576500415802, - "361": 2.3726725578308105, - "362": 2.379549980163574, - "363": 2.3842437267303467, - "364": 2.3507213592529297, - "365": 2.3805453777313232, - "366": 2.392122268676758, - "367": 2.371797561645508, - "368": 2.395376682281494, - "369": 2.4013423919677734, - "370": 2.392371892929077, - "371": 2.4127049446105957, - "372": 2.379133462905884, - "373": 2.364417314529419, - "374": 2.4106600284576416, - "375": 2.385195732116699, - "376": 2.4129273891448975, - "377": 2.3587918281555176 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 1.18885, - 1.49897, - 1.3477, - 1.28374, - 1.05809, - 0.91292, - 1.00508, - 1.13539, - 1.24718, - 1.55444, - 1.44936, - 1.41134, - 1.02831, - 0.959676, - 1.17457, - 1.24516, - 1.29799, - 1.2996, - 0.906612, - 0.630873, - 0.49873, - 0.516907, - 0.814968, - 1.43524, - 1.29496, - 1.02259, - 1.12416, - 1.02007, - 1.05196, - 1.06921, - 1.0186, - 1.02026, - 1.09773, - 1.10416, - 1.25088, - 1.42044, - 1.34104, - 1.20718, - 1.1919, - 1.43907, - 1.45651, - 1.23859, - 1.31958, - 1.32159, - 0.925227, - 0.762231, - 0.812376, - 1.00098, - 1.42267, - 1.06233, - 0.905035, - 0.75148, - 0.906843, - 1.36292 - ], - "train_epoch_time": 5.047745704650879, - "train_loss": 2.361743169330285, - "train_score": 0.29974892387431185, - "val_loss": 2.4082364491288617, - "val_score": 0.2869053176432061 - }, - { - "epoch": 7, - "grad_norm": 1.4832714796066284, - "learning_rate": 0.1, - "model_norm": 87.5346450805664, - "step_logs": { - "grad_norm": { - "378": 1.2507907152175903, - "379": 1.2716996669769287, - "380": 1.3078027963638306, - "381": 1.3168158531188965, - "382": 1.2987083196640015, - "383": 1.2467254400253296, - "384": 1.3580095767974854, - "385": 1.6096895933151245, - "386": 1.5116077661514282, - "387": 1.3220933675765991, - "388": 1.473404049873352, - "389": 1.6791013479232788, - "390": 1.6889094114303589, - "391": 1.5956958532333374, - "392": 1.5935845375061035, - "393": 1.4105433225631714, - "394": 1.271815299987793, - "395": 1.3040547370910645, - "396": 1.3289153575897217, - "397": 1.1971837282180786, - "398": 1.221039056777954, - "399": 1.423857569694519, - "400": 1.5197818279266357, - "401": 1.584545612335205, - "402": 1.4568161964416504, - "403": 1.227216124534607, - "404": 1.2610230445861816, - "405": 1.5278589725494385, - "406": 1.4574626684188843, - "407": 1.3358162641525269, - "408": 1.221930742263794, - "409": 1.2022416591644287, - "410": 1.3379154205322266, - "411": 1.5157263278961182, - "412": 1.5712929964065552, - "413": 1.6875874996185303, - "414": 1.6722849607467651, - "415": 1.7562932968139648, - "416": 1.8147763013839722, - "417": 1.6781888008117676, - "418": 1.631113886833191, - "419": 1.6726852655410767, - "420": 1.5651556253433228, - "421": 1.3145784139633179, - "422": 1.3216067552566528, - "423": 1.4262635707855225, - "424": 1.352217674255371, - "425": 1.2871074676513672, - "426": 1.2919652462005615, - "427": 1.3381145000457764, - "428": 1.3508021831512451, - "429": 1.5001040697097778, - "430": 1.5713080167770386, - "431": 1.4832714796066284 - }, - "loss": { - "378": 2.367905855178833, - "379": 2.3387227058410645, - "380": 2.375861883163452, - "381": 2.3376030921936035, - "382": 2.3504133224487305, - "383": 2.3421030044555664, - "384": 2.3537960052490234, - "385": 2.357940673828125, - "386": 2.388073444366455, - "387": 2.3495824337005615, - "388": 2.3642053604125977, - "389": 2.376500129699707, - "390": 2.3908321857452393, - "391": 2.3723621368408203, - "392": 2.3595778942108154, - "393": 2.3609061241149902, - "394": 2.3399276733398438, - "395": 2.3277668952941895, - "396": 2.361086845397949, - "397": 2.336249351501465, - "398": 2.362821578979492, - "399": 2.327812433242798, - "400": 2.3626058101654053, - "401": 2.3715500831604004, - "402": 2.3660147190093994, - "403": 2.3475341796875, - "404": 2.341981887817383, - "405": 2.3601129055023193, - "406": 2.3760735988616943, - "407": 2.334378242492676, - "408": 2.328312397003174, - "409": 2.341625452041626, - "410": 2.3512682914733887, - "411": 2.3703114986419678, - "412": 2.3764235973358154, - "413": 2.357820987701416, - "414": 2.3812451362609863, - "415": 2.3705861568450928, - "416": 2.388590097427368, - "417": 2.3676886558532715, - "418": 2.357398509979248, - "419": 2.3815762996673584, - "420": 2.359773635864258, - "421": 2.3335471153259277, - "422": 2.337965965270996, - "423": 2.3463830947875977, - "424": 2.3496408462524414, - "425": 2.325202465057373, - "426": 2.338893175125122, - "427": 2.323929786682129, - "428": 2.3332583904266357, - "429": 2.3421614170074463, - "430": 2.3548731803894043, - "431": 2.3263583183288574 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 1.51354, - 1.44614, - 1.38911, - 1.3481, - 1.39355, - 1.50683, - 1.27633, - 0.910015, - 1.04513, - 1.34421, - 1.08903, - 0.842915, - 0.838178, - 0.93171, - 0.929146, - 1.1866, - 1.44662, - 1.36882, - 1.33696, - 1.63004, - 1.58479, - 1.14819, - 1.02289, - 0.944545, - 1.11483, - 1.55873, - 1.47278, - 1.01104, - 1.11857, - 1.30821, - 1.55937, - 1.62007, - 1.31354, - 1.03172, - 0.962519, - 0.827901, - 0.851498, - 0.768532, - 0.725263, - 0.840704, - 0.886063, - 0.851209, - 0.963286, - 1.35034, - 1.33855, - 1.15345, - 1.28502, - 1.40356, - 1.40123, - 1.29789, - 1.27873, - 1.04082, - 0.953773, - 1.05739 - ], - "train_epoch_time": 5.046604156494141, - "train_loss": 2.3419349491168644, - "train_score": 0.30487468625756897, - "val_loss": 2.393266716178605, - "val_score": 0.2925157869655968 - }, - { - "epoch": 8, - "grad_norm": 1.6719913482666016, - "learning_rate": 0.1, - "model_norm": 87.55176544189453, - "step_logs": { - "grad_norm": { - "432": 1.3772529363632202, - "433": 1.3426146507263184, - "434": 1.3324344158172607, - "435": 1.358776569366455, - "436": 1.4715733528137207, - "437": 1.4788800477981567, - "438": 1.4976952075958252, - "439": 1.3530436754226685, - "440": 1.3663952350616455, - "441": 1.4930800199508667, - "442": 1.5838810205459595, - "443": 1.513715147972107, - "444": 1.3886741399765015, - "445": 1.199733853340149, - "446": 1.1420347690582275, - "447": 1.2655256986618042, - "448": 1.4025110006332397, - "449": 1.4412257671356201, - "450": 1.4895614385604858, - "451": 1.5109293460845947, - "452": 1.5501667261123657, - "453": 1.658917784690857, - "454": 1.5020052194595337, - "455": 1.365525722503662, - "456": 1.3858025074005127, - "457": 1.5922483205795288, - "458": 1.7014318704605103, - "459": 1.5323066711425781, - "460": 1.4663453102111816, - "461": 1.5507745742797852, - "462": 1.418809175491333, - "463": 1.3079020977020264, - "464": 1.497633457183838, - "465": 1.5724527835845947, - "466": 1.3144352436065674, - "467": 1.224091649055481, - "468": 1.2142049074172974, - "469": 1.1949164867401123, - "470": 1.262269139289856, - "471": 1.2704116106033325, - "472": 1.2816797494888306, - "473": 1.4210776090621948, - "474": 1.5883044004440308, - "475": 1.6863433122634888, - "476": 1.7087546586990356, - "477": 1.6457250118255615, - "478": 1.5868438482284546, - "479": 1.602194905281067, - "480": 1.4961035251617432, - "481": 1.38497793674469, - "482": 1.2938809394836426, - "483": 1.3164377212524414, - "484": 1.5357096195220947, - "485": 1.6719913482666016 - }, - "loss": { - "432": 2.3569703102111816, - "433": 2.3265621662139893, - "434": 2.3311655521392822, - "435": 2.325826406478882, - "436": 2.348071813583374, - "437": 2.3353023529052734, - "438": 2.349766492843628, - "439": 2.329619884490967, - "440": 2.3471851348876953, - "441": 2.3441779613494873, - "442": 2.3317179679870605, - "443": 2.3459043502807617, - "444": 2.3380584716796875, - "445": 2.334829092025757, - "446": 2.304365634918213, - "447": 2.2944769859313965, - "448": 2.3185274600982666, - "449": 2.3153786659240723, - "450": 2.341874122619629, - "451": 2.3294148445129395, - "452": 2.323920488357544, - "453": 2.332111120223999, - "454": 2.3540799617767334, - "455": 2.3185930252075195, - "456": 2.3187055587768555, - "457": 2.327409267425537, - "458": 2.3606953620910645, - "459": 2.3353662490844727, - "460": 2.321321964263916, - "461": 2.330463409423828, - "462": 2.33009672164917, - "463": 2.3038291931152344, - "464": 2.3188774585723877, - "465": 2.350034236907959, - "466": 2.32249116897583, - "467": 2.310570240020752, - "468": 2.298391580581665, - "469": 2.300656318664551, - "470": 2.294245719909668, - "471": 2.3013916015625, - "472": 2.311711311340332, - "473": 2.305185079574585, - "474": 2.307091236114502, - "475": 2.3272156715393066, - "476": 2.310476303100586, - "477": 2.3332691192626953, - "478": 2.333348274230957, - "479": 2.329441547393799, - "480": 2.333315372467041, - "481": 2.306332588195801, - "482": 2.2903270721435547, - "483": 2.2886452674865723, - "484": 2.3130598068237305, - "485": 2.3527469635009766 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 1.24259, - 1.29066, - 1.31305, - 1.25974, - 1.0843, - 1.06777, - 1.04756, - 1.27251, - 1.25717, - 1.05154, - 0.92946, - 1.02382, - 1.21242, - 1.62213, - 1.76682, - 1.43266, - 1.17869, - 1.1147, - 1.05547, - 1.02037, - 0.967085, - 0.847422, - 1.04347, - 1.24344, - 1.20738, - 0.918018, - 0.815475, - 0.994635, - 1.0796, - 0.969047, - 1.15751, - 1.34679, - 1.03387, - 0.950427, - 1.34424, - 1.54202, - 1.55898, - 1.6113, - 1.43991, - 1.42594, - 1.40726, - 1.14148, - 0.914529, - 0.81836, - 0.791302, - 0.86149, - 0.92664, - 0.907447, - 1.04244, - 1.20236, - 1.36807, - 1.32062, - 0.980773, - 0.841603 - ], - "train_epoch_time": 5.05733060836792, - "train_loss": 2.3190040145064015, - "train_score": 0.31665172162151745, - "val_loss": 2.3817042644754753, - "val_score": 0.30194729457082214 - }, - { - "epoch": 9, - "grad_norm": 1.7779604196548462, - "learning_rate": 0.1, - "model_norm": 87.56884765625, - "step_logs": { - "grad_norm": { - "486": 1.6742181777954102, - "487": 1.551944375038147, - "488": 1.386225938796997, - "489": 1.5135105848312378, - "490": 1.717846393585205, - "491": 1.7236398458480835, - "492": 1.6709500551223755, - "493": 1.867740511894226, - "494": 1.5722646713256836, - "495": 1.3661384582519531, - "496": 1.3567614555358887, - "497": 1.3679317235946655, - "498": 1.327094316482544, - "499": 1.2675182819366455, - "500": 1.2265138626098633, - "501": 1.1087000370025635, - "502": 1.055065393447876, - "503": 1.16732919216156, - "504": 1.2666935920715332, - "505": 1.3211218118667603, - "506": 1.470011591911316, - "507": 1.7048187255859375, - "508": 1.79426109790802, - "509": 1.7555835247039795, - "510": 1.5852285623550415, - "511": 1.4790658950805664, - "512": 1.4900202751159668, - "513": 1.4202228784561157, - "514": 1.2604990005493164, - "515": 1.227232575416565, - "516": 1.250272274017334, - "517": 1.4230231046676636, - "518": 1.522752285003662, - "519": 1.4509660005569458, - "520": 1.2020130157470703, - "521": 1.1757957935333252, - "522": 1.2957830429077148, - "523": 1.5180374383926392, - "524": 1.6535571813583374, - "525": 1.6018004417419434, - "526": 1.5107072591781616, - "527": 1.3575783967971802, - "528": 1.324666976928711, - "529": 1.362179160118103, - "530": 1.474269986152649, - "531": 1.5422247648239136, - "532": 1.6697173118591309, - "533": 1.58110773563385, - "534": 1.3566840887069702, - "535": 1.3498913049697876, - "536": 1.3571470975875854, - "537": 1.4225178956985474, - "538": 1.533928632736206, - "539": 1.7779604196548462 - }, - "loss": { - "486": 2.3220930099487305, - "487": 2.3261220455169678, - "488": 2.2855758666992188, - "489": 2.3221473693847656, - "490": 2.2996368408203125, - "491": 2.3574090003967285, - "492": 2.3189697265625, - "493": 2.3327951431274414, - "494": 2.3433547019958496, - "495": 2.310523509979248, - "496": 2.2948732376098633, - "497": 2.293727159500122, - "498": 2.2975926399230957, - "499": 2.256014347076416, - "500": 2.2843923568725586, - "501": 2.2881460189819336, - "502": 2.28273868560791, - "503": 2.289323568344116, - "504": 2.3028831481933594, - "505": 2.2738089561462402, - "506": 2.2794981002807617, - "507": 2.314923048019409, - "508": 2.317366600036621, - "509": 2.3298819065093994, - "510": 2.3180510997772217, - "511": 2.290679454803467, - "512": 2.293121337890625, - "513": 2.3143627643585205, - "514": 2.290820837020874, - "515": 2.276301860809326, - "516": 2.2668962478637695, - "517": 2.2852418422698975, - "518": 2.2983531951904297, - "519": 2.2913625240325928, - "520": 2.2787485122680664, - "521": 2.263439178466797, - "522": 2.278364896774292, - "523": 2.3045449256896973, - "524": 2.3066561222076416, - "525": 2.3127849102020264, - "526": 2.2955591678619385, - "527": 2.2579898834228516, - "528": 2.2588822841644287, - "529": 2.2573554515838623, - "530": 2.2974653244018555, - "531": 2.284247636795044, - "532": 2.291372776031494, - "533": 2.3105831146240234, - "534": 2.2505717277526855, - "535": 2.242361307144165, - "536": 2.255277156829834, - "537": 2.252377986907959, - "538": 2.2567806243896484, - "539": 2.271329402923584 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 0.828429, - 0.965784, - 1.1894, - 1.01372, - 0.779275, - 0.793491, - 0.830554, - 0.668719, - 0.947953, - 1.238, - 1.24667, - 1.22578, - 1.30458, - 1.40421, - 1.51854, - 1.86147, - 2.05068, - 1.68004, - 1.43525, - 1.30277, - 1.05487, - 0.79649, - 0.719819, - 0.755946, - 0.922442, - 1.0471, - 1.03286, - 1.14741, - 1.4418, - 1.51139, - 1.45018, - 1.12852, - 0.991193, - 1.08838, - 1.57717, - 1.63721, - 1.35693, - 1.00005, - 0.843615, - 0.901402, - 1.00584, - 1.22516, - 1.2873, - 1.21655, - 1.05705, - 0.96039, - 0.821883, - 0.92427, - 1.22274, - 1.23057, - 1.22446, - 1.11308, - 0.959133, - 0.718515 - ], - "train_epoch_time": 5.098841667175293, - "train_loss": 2.3024649360087546, - "train_score": 0.3245034524067232, - "val_loss": 2.348097736060825, - "val_score": 0.3150114812615545 - }, - { - "epoch": 10, - "grad_norm": 1.2640491724014282, - "learning_rate": 0.1, - "model_norm": 87.58625030517578, - "step_logs": { - "grad_norm": { - "540": 1.7134689092636108, - "541": 1.5779328346252441, - "542": 1.5871375799179077, - "543": 1.4161053895950317, - "544": 1.2711663246154785, - "545": 1.3576077222824097, - "546": 1.3114019632339478, - "547": 1.1410677433013916, - "548": 1.0767924785614014, - "549": 1.204289197921753, - "550": 1.4401222467422485, - "551": 1.4469940662384033, - "552": 1.3848143815994263, - "553": 1.467239260673523, - "554": 1.4632148742675781, - "555": 1.3870761394500732, - "556": 1.4207103252410889, - "557": 1.5865219831466675, - "558": 1.5462971925735474, - "559": 1.417084813117981, - "560": 1.3681401014328003, - "561": 1.322740912437439, - "562": 1.2615020275115967, - "563": 1.2015020847320557, - "564": 1.1711989641189575, - "565": 1.1907838582992554, - "566": 1.1135379076004028, - "567": 1.1052764654159546, - "568": 1.1534289121627808, - "569": 1.1689847707748413, - "570": 1.2904889583587646, - "571": 1.5788869857788086, - "572": 1.697857141494751, - "573": 1.7383028268814087, - "574": 1.7290221452713013, - "575": 1.6544092893600464, - "576": 1.5269207954406738, - "577": 1.4095512628555298, - "578": 1.3658093214035034, - "579": 1.2597167491912842, - "580": 1.1534322500228882, - "581": 1.2306861877441406, - "582": 1.3668746948242188, - "583": 1.5056084394454956, - "584": 1.6548144817352295, - "585": 1.8572112321853638, - "586": 2.060096025466919, - "587": 1.9171427488327026, - "588": 1.455045461654663, - "589": 1.3886091709136963, - "590": 1.4992473125457764, - "591": 1.5363121032714844, - "592": 1.393326997756958, - "593": 1.2640491724014282 - }, - "loss": { - "540": 2.3114218711853027, - "541": 2.2731072902679443, - "542": 2.2986507415771484, - "543": 2.294313907623291, - "544": 2.2365550994873047, - "545": 2.2500340938568115, - "546": 2.2500264644622803, - "547": 2.2545166015625, - "548": 2.240654468536377, - "549": 2.243361473083496, - "550": 2.2672383785247803, - "551": 2.2800679206848145, - "552": 2.245500087738037, - "553": 2.255904197692871, - "554": 2.256373405456543, - "555": 2.280869245529175, - "556": 2.254581928253174, - "557": 2.263603925704956, - "558": 2.2469727993011475, - "559": 2.2748565673828125, - "560": 2.2153480052948, - "561": 2.2607178688049316, - "562": 2.2670421600341797, - "563": 2.2362453937530518, - "564": 2.261608123779297, - "565": 2.207308053970337, - "566": 2.2216320037841797, - "567": 2.210115671157837, - "568": 2.247307538986206, - "569": 2.2463788986206055, - "570": 2.2509500980377197, - "571": 2.2585365772247314, - "572": 2.283242702484131, - "573": 2.280989408493042, - "574": 2.264427900314331, - "575": 2.2549333572387695, - "576": 2.250190019607544, - "577": 2.2448570728302, - "578": 2.2494397163391113, - "579": 2.249887228012085, - "580": 2.2311456203460693, - "581": 2.236873149871826, - "582": 2.2451956272125244, - "583": 2.229813814163208, - "584": 2.268385648727417, - "585": 2.271548271179199, - "586": 2.278242588043213, - "587": 2.2822768688201904, - "588": 2.2455201148986816, - "589": 2.219609260559082, - "590": 2.250032424926758, - "591": 2.2523858547210693, - "592": 2.2375988960266113, - "593": 2.234337329864502 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 0.787275, - 0.912941, - 0.912523, - 1.14409, - 1.38412, - 1.22079, - 1.30833, - 1.73153, - 1.93246, - 1.54681, - 1.0932, - 1.08897, - 1.17093, - 1.0479, - 1.05389, - 1.1855, - 1.117, - 0.899308, - 0.939749, - 1.13282, - 1.18353, - 1.2921, - 1.42457, - 1.54907, - 1.64875, - 1.55667, - 1.79169, - 1.80914, - 1.6892, - 1.64386, - 1.35163, - 0.905993, - 0.792045, - 0.75487, - 0.757456, - 0.823849, - 0.965131, - 1.12987, - 1.20585, - 1.4178, - 1.67704, - 1.47689, - 1.2017, - 0.983659, - 0.828358, - 0.658566, - 0.536815, - 0.620954, - 1.06063, - 1.15111, - 1.00102, - 0.954298, - 1.15259, - 1.39837 - ], - "train_epoch_time": 5.047824859619141, - "train_loss": 2.220233416454693, - "train_score": 0.35017485649377067, - "val_loss": 2.2958824054245013, - "val_score": 0.3326770601672228 - }, - { - "epoch": 11, - "grad_norm": 1.2157100439071655, - "learning_rate": 0.1, - "model_norm": 87.6042251586914, - "step_logs": { - "grad_norm": { - "594": 1.1669530868530273, - "595": 1.2132084369659424, - "596": 1.4693739414215088, - "597": 1.7073791027069092, - "598": 1.698878288269043, - "599": 1.7962965965270996, - "600": 1.5512479543685913, - "601": 1.4652150869369507, - "602": 1.3944478034973145, - "603": 1.3317171335220337, - "604": 1.302470326423645, - "605": 1.1761752367019653, - "606": 1.1240900754928589, - "607": 1.187212347984314, - "608": 1.229611873626709, - "609": 1.2905144691467285, - "610": 1.4038149118423462, - "611": 1.422003984451294, - "612": 1.4970314502716064, - "613": 1.5171924829483032, - "614": 1.5920166969299316, - "615": 1.599853515625, - "616": 1.410097599029541, - "617": 1.705039143562317, - "618": 1.4403878450393677, - "619": 1.7139389514923096, - "620": 1.5842852592468262, - "621": 1.6414768695831299, - "622": 1.5656919479370117, - "623": 1.4700489044189453, - "624": 1.4448580741882324, - "625": 1.521384358406067, - "626": 1.4248954057693481, - "627": 1.273108720779419, - "628": 1.279323935508728, - "629": 1.4342466592788696, - "630": 1.5914961099624634, - "631": 1.575094223022461, - "632": 1.3863567113876343, - "633": 1.3225921392440796, - "634": 1.4696089029312134, - "635": 1.6384886503219604, - "636": 1.785634160041809, - "637": 1.6300214529037476, - "638": 1.462430715560913, - "639": 1.4831770658493042, - "640": 1.5735154151916504, - "641": 1.5286848545074463, - "642": 1.3503004312515259, - "643": 1.3330632448196411, - "644": 1.3279651403427124, - "645": 1.2344977855682373, - "646": 1.231083869934082, - "647": 1.2157100439071655 - }, - "loss": { - "594": 2.2216835021972656, - "595": 2.244797706604004, - "596": 2.2370944023132324, - "597": 2.2341275215148926, - "598": 2.2558789253234863, - "599": 2.246788263320923, - "600": 2.2335166931152344, - "601": 2.2481918334960938, - "602": 2.242051124572754, - "603": 2.238752841949463, - "604": 2.2127814292907715, - "605": 2.2252867221832275, - "606": 2.207686424255371, - "607": 2.207956314086914, - "608": 2.1954174041748047, - "609": 2.2380852699279785, - "610": 2.2355079650878906, - "611": 2.2108030319213867, - "612": 2.2386536598205566, - "613": 2.2283706665039062, - "614": 2.238961935043335, - "615": 2.2507331371307373, - "616": 2.2310800552368164, - "617": 2.255988597869873, - "618": 2.275700092315674, - "619": 2.253037452697754, - "620": 2.243696689605713, - "621": 2.2591986656188965, - "622": 2.222571849822998, - "623": 2.2285008430480957, - "624": 2.2326738834381104, - "625": 2.210934638977051, - "626": 2.229423999786377, - "627": 2.23610520362854, - "628": 2.2118873596191406, - "629": 2.2106199264526367, - "630": 2.204601764678955, - "631": 2.231132745742798, - "632": 2.221668243408203, - "633": 2.21366810798645, - "634": 2.193549871444702, - "635": 2.2152392864227295, - "636": 2.242152690887451, - "637": 2.2621583938598633, - "638": 2.2313241958618164, - "639": 2.194629669189453, - "640": 2.2009198665618896, - "641": 2.233835220336914, - "642": 2.2121715545654297, - "643": 2.2223803997039795, - "644": 2.2018537521362305, - "645": 2.206470489501953, - "646": 2.201155662536621, - "647": 2.1892902851104736 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 1.63146, - 1.52513, - 1.03614, - 0.766387, - 0.781612, - 0.696315, - 0.928168, - 1.0472, - 1.15303, - 1.26236, - 1.30438, - 1.60858, - 1.74717, - 1.56651, - 1.45205, - 1.34385, - 1.13437, - 1.09332, - 0.998907, - 0.968068, - 0.883388, - 0.879354, - 1.12206, - 0.776012, - 1.09687, - 0.766969, - 0.893917, - 0.838465, - 0.906657, - 1.03121, - 1.06949, - 0.955208, - 1.09806, - 1.37963, - 1.35146, - 1.07465, - 0.8704, - 0.899316, - 1.15592, - 1.2655, - 1.01565, - 0.825152, - 0.703202, - 0.851405, - 1.04331, - 0.997643, - 0.888919, - 0.955906, - 1.21327, - 1.2506, - 1.24858, - 1.44783, - 1.45236, - 1.4813 - ], - "train_epoch_time": 5.062178134918213, - "train_loss": 2.188803861130943, - "train_score": 0.3473177456017717, - "val_loss": 2.2665565969202195, - "val_score": 0.3304750283946947 - }, - { - "epoch": 12, - "grad_norm": 1.1594904661178589, - "learning_rate": 0.1, - "model_norm": 87.6198959350586, - "step_logs": { - "grad_norm": { - "648": 1.1954418420791626, - "649": 1.1974676847457886, - "650": 1.2672324180603027, - "651": 1.452003002166748, - "652": 1.528839349746704, - "653": 1.503833293914795, - "654": 1.5454744100570679, - "655": 1.5231748819351196, - "656": 1.417120337486267, - "657": 1.2715767621994019, - "658": 1.2945997714996338, - "659": 1.2057448625564575, - "660": 1.1349579095840454, - "661": 1.118137001991272, - "662": 1.064670205116272, - "663": 1.0222800970077515, - "664": 1.0659438371658325, - "665": 1.051184058189392, - "666": 1.158028244972229, - "667": 1.17327082157135, - "668": 1.1828001737594604, - "669": 1.123181939125061, - "670": 1.0461944341659546, - "671": 1.0050735473632812, - "672": 1.0115554332733154, - "673": 1.070014238357544, - "674": 1.1404701471328735, - "675": 1.2399085760116577, - "676": 1.3631823062896729, - "677": 1.2542368173599243, - "678": 1.2008265256881714, - "679": 1.0959028005599976, - "680": 1.0375603437423706, - "681": 1.1151578426361084, - "682": 1.1901311874389648, - "683": 1.1519356966018677, - "684": 1.092047095298767, - "685": 1.0978142023086548, - "686": 0.99715656042099, - "687": 0.9034143090248108, - "688": 0.8573057651519775, - "689": 0.8090256452560425, - "690": 0.8250359892845154, - "691": 0.796980619430542, - "692": 0.7854346036911011, - "693": 0.815854549407959, - "694": 0.8617132306098938, - "695": 0.9406847357749939, - "696": 0.93160080909729, - "697": 0.9361287355422974, - "698": 1.0740296840667725, - "699": 1.254116415977478, - "700": 1.2265839576721191, - "701": 1.1594904661178589 - }, - "loss": { - "648": 2.1869421005249023, - "649": 2.1919777393341064, - "650": 2.1624321937561035, - "651": 2.1949124336242676, - "652": 2.224238634109497, - "653": 2.1967856884002686, - "654": 2.2175872325897217, - "655": 2.2128512859344482, - "656": 2.214641571044922, - "657": 2.1855058670043945, - "658": 2.190960645675659, - "659": 2.1876306533813477, - "660": 2.177907943725586, - "661": 2.151695966720581, - "662": 2.190953254699707, - "663": 2.1881825923919678, - "664": 2.1756930351257324, - "665": 2.18461012840271, - "666": 2.154822587966919, - "667": 2.1482014656066895, - "668": 2.170921564102173, - "669": 2.170084238052368, - "670": 2.162811040878296, - "671": 2.172851800918579, - "672": 2.166306257247925, - "673": 2.1446595191955566, - "674": 2.167536497116089, - "675": 2.1664373874664307, - "676": 2.1408629417419434, - "677": 2.1656198501586914, - "678": 2.1529040336608887, - "679": 2.173119068145752, - "680": 2.1585640907287598, - "681": 2.1659741401672363, - "682": 2.1546504497528076, - "683": 2.1651697158813477, - "684": 2.1620402336120605, - "685": 2.1537861824035645, - "686": 2.170220375061035, - "687": 2.1818923950195312, - "688": 2.1448683738708496, - "689": 2.1345367431640625, - "690": 2.1399693489074707, - "691": 2.1359193325042725, - "692": 2.115656852722168, - "693": 2.1095523834228516, - "694": 2.1351966857910156, - "695": 2.152808904647827, - "696": 2.1223111152648926, - "697": 2.1618027687072754, - "698": 2.1327104568481445, - "699": 2.1687190532684326, - "700": 2.1600100994110107, - "701": 2.135938882827759 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 1.53031, - 1.52865, - 1.34657, - 1.04108, - 0.951607, - 0.971378, - 0.928447, - 0.95379, - 1.10278, - 1.35166, - 1.30726, - 1.50475, - 1.69075, - 1.72104, - 1.93287, - 2.09384, - 1.91482, - 1.97704, - 1.60684, - 1.56055, - 1.55175, - 1.72019, - 1.97603, - 2.15097, - 2.1171, - 1.87318, - 1.66647, - 1.40918, - 1.15208, - 1.37665, - 1.49301, - 1.80942, - 2.00511, - 1.74173, - 1.5212, - 1.63168, - 1.81293, - 1.78708, - 2.18262, - 2.67337, - 2.91829, - 3.26121, - 3.14385, - 3.36271, - 3.42946, - 3.16931, - 2.87549, - 2.43286, - 2.4454, - 2.46686, - 1.84884, - 1.37888, - 1.43569, - 1.58875 - ], - "train_epoch_time": 5.051527261734009, - "train_loss": 2.1430730223826733, - "train_score": 0.36499618906714826, - "val_loss": 2.2215975239684744, - "val_score": 0.3424852897940219 - }, - { - "epoch": 13, - "grad_norm": 0.7096759080886841, - "learning_rate": 0.06666666666666668, - "model_norm": 87.62943267822266, - "step_logs": { - "grad_norm": { - "702": 1.2094677686691284, - "703": 1.1364346742630005, - "704": 1.054935336112976, - "705": 0.9208025336265564, - "706": 0.8918678760528564, - "707": 0.8889166712760925, - "708": 0.8920423984527588, - "709": 0.8592656254768372, - "710": 0.7808614373207092, - "711": 0.7903451323509216, - "712": 0.8282138705253601, - "713": 0.8917585611343384, - "714": 0.9138843417167664, - "715": 0.898725688457489, - "716": 0.8300848603248596, - "717": 0.7455387711524963, - "718": 0.7375110983848572, - "719": 0.7463124394416809, - "720": 0.7465780973434448, - "721": 0.7408370971679688, - "722": 0.7487491965293884, - "723": 0.7014108896255493, - "724": 0.6839889287948608, - "725": 0.7828773856163025, - "726": 0.8136268854141235, - "727": 0.8110736012458801, - "728": 0.8075604438781738, - "729": 0.731393039226532, - "730": 0.6665154695510864, - "731": 0.7339186072349548, - "732": 0.7479880452156067, - "733": 0.7132483720779419, - "734": 0.6741827130317688, - "735": 0.6816652417182922, - "736": 0.7633786797523499, - "737": 0.766368567943573, - "738": 0.7119523882865906, - "739": 0.7511018514633179, - "740": 0.693392813205719, - "741": 0.7348403930664062, - "742": 0.690436601638794, - "743": 0.7857136726379395, - "744": 0.767687201499939, - "745": 0.7049455046653748, - "746": 0.6908838152885437, - "747": 0.6771897673606873, - "748": 0.6792553067207336, - "749": 0.7423776984214783, - "750": 0.7747229337692261, - "751": 0.6995111703872681, - "752": 0.7536569833755493, - "753": 0.687478244304657, - "754": 0.7189297080039978, - "755": 0.7096759080886841 - }, - "loss": { - "702": 2.13319993019104, - "703": 2.1362335681915283, - "704": 2.120169162750244, - "705": 2.117563009262085, - "706": 2.1312122344970703, - "707": 2.130305767059326, - "708": 2.1443395614624023, - "709": 2.1255943775177, - "710": 2.127607822418213, - "711": 2.1300694942474365, - "712": 2.1319546699523926, - "713": 2.136672019958496, - "714": 2.121074676513672, - "715": 2.1448912620544434, - "716": 2.113769769668579, - "717": 2.138568878173828, - "718": 2.1325201988220215, - "719": 2.112884998321533, - "720": 2.1137192249298096, - "721": 2.1203198432922363, - "722": 2.1200571060180664, - "723": 2.1013803482055664, - "724": 2.113459587097168, - "725": 2.1090354919433594, - "726": 2.1204957962036133, - "727": 2.0943808555603027, - "728": 2.110152006149292, - "729": 2.123112678527832, - "730": 2.1332902908325195, - "731": 2.102656841278076, - "732": 2.1249969005584717, - "733": 2.1087183952331543, - "734": 2.1010513305664062, - "735": 2.106625556945801, - "736": 2.110645294189453, - "737": 2.0891237258911133, - "738": 2.1045916080474854, - "739": 2.1199705600738525, - "740": 2.1246423721313477, - "741": 2.1044230461120605, - "742": 2.1334028244018555, - "743": 2.130838632583618, - "744": 2.113445281982422, - "745": 2.128753900527954, - "746": 2.1046688556671143, - "747": 2.112100124359131, - "748": 2.101102590560913, - "749": 2.069340467453003, - "750": 2.1018152236938477, - "751": 2.093327522277832, - "752": 2.091395854949951, - "753": 2.0998992919921875, - "754": 2.0963640213012695, - "755": 2.092761754989624 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 1.45829, - 1.65409, - 1.9051, - 2.49749, - 2.67933, - 2.696, - 2.69478, - 2.87889, - 3.48934, - 3.41005, - 3.10809, - 2.68685, - 2.53965, - 2.65553, - 3.0677, - 3.84754, - 3.92063, - 3.79345, - 3.79225, - 3.86328, - 3.78159, - 4.2713, - 4.51748, - 3.44109, - 3.20322, - 3.18372, - 3.23567, - 3.96891, - 4.80208, - 3.90367, - 3.79812, - 4.14512, - 4.62255, - 4.53362, - 3.62189, - 3.55704, - 4.15208, - 3.75779, - 4.41903, - 3.89715, - 4.47533, - 3.45161, - 3.5861, - 4.28365, - 4.40934, - 4.60568, - 4.55388, - 3.75476, - 3.50188, - 4.27807, - 3.68204, - 4.44304, - 4.05596, - 4.15527 - ], - "train_epoch_time": 5.051677703857422, - "train_loss": 2.0992124385094884, - "train_score": 0.3778447811709586, - "val_loss": 2.1831022269690217, - "val_score": 0.35468839736811464 - }, - { - "epoch": 14, - "grad_norm": 0.6314717531204224, - "learning_rate": 0.03333333333333334, - "model_norm": 87.63260650634766, - "step_logs": { - "grad_norm": { - "756": 0.7444124221801758, - "757": 0.7044227123260498, - "758": 0.7111073136329651, - "759": 0.741810142993927, - "760": 0.7313619256019592, - "761": 0.6684687733650208, - "762": 0.6533973217010498, - "763": 0.6640459299087524, - "764": 0.6890078186988831, - "765": 0.6936907768249512, - "766": 0.7096317410469055, - "767": 0.6938508749008179, - "768": 0.6939355134963989, - "769": 0.6502628922462463, - "770": 0.6904476881027222, - "771": 0.6724308133125305, - "772": 0.652132511138916, - "773": 0.6908398270606995, - "774": 0.6512293815612793, - "775": 0.6589061617851257, - "776": 0.6848368048667908, - "777": 0.6390787363052368, - "778": 0.6640979051589966, - "779": 0.6511648297309875, - "780": 0.636833906173706, - "781": 0.6551579833030701, - "782": 0.7036217451095581, - "783": 0.7764425277709961, - "784": 0.6543644666671753, - "785": 0.6530728340148926, - "786": 0.670935332775116, - "787": 0.7032603025436401, - "788": 0.7079858183860779, - "789": 0.6747470498085022, - "790": 0.6547266244888306, - "791": 0.6820287108421326, - "792": 0.6060270667076111, - "793": 0.6199682354927063, - "794": 0.6568408608436584, - "795": 0.6298273801803589, - "796": 0.6749716401100159, - "797": 0.6366844773292542, - "798": 0.6207833290100098, - "799": 0.639532744884491, - "800": 0.6093708872795105, - "801": 0.6194463968276978, - "802": 0.662527859210968, - "803": 0.6145942211151123, - "804": 0.6294696927070618, - "805": 0.6803239583969116, - "806": 0.6261860728263855, - "807": 0.6561301946640015, - "808": 0.7099471688270569, - "809": 0.6314717531204224 - }, - "loss": { - "756": 2.1088690757751465, - "757": 2.098982810974121, - "758": 2.1028637886047363, - "759": 2.0999045372009277, - "760": 2.0710790157318115, - "761": 2.104261875152588, - "762": 2.098921775817871, - "763": 2.088793992996216, - "764": 2.090606451034546, - "765": 2.090880870819092, - "766": 2.114882469177246, - "767": 2.099087715148926, - "768": 2.1193289756774902, - "769": 2.0847418308258057, - "770": 2.095512628555298, - "771": 2.078007221221924, - "772": 2.1021549701690674, - "773": 2.0957417488098145, - "774": 2.113266706466675, - "775": 2.106503963470459, - "776": 2.082190990447998, - "777": 2.084826946258545, - "778": 2.110208034515381, - "779": 2.0971906185150146, - "780": 2.101323127746582, - "781": 2.0823447704315186, - "782": 2.0737085342407227, - "783": 2.098522663116455, - "784": 2.0923144817352295, - "785": 2.0959861278533936, - "786": 2.0784354209899902, - "787": 2.103172779083252, - "788": 2.088852882385254, - "789": 2.114295482635498, - "790": 2.075995922088623, - "791": 2.1067326068878174, - "792": 2.0907301902770996, - "793": 2.0980305671691895, - "794": 2.1044297218322754, - "795": 2.087343454360962, - "796": 2.0867247581481934, - "797": 2.097341537475586, - "798": 2.0897085666656494, - "799": 2.0791001319885254, - "800": 2.0794973373413086, - "801": 2.085522174835205, - "802": 2.0800888538360596, - "803": 2.0732412338256836, - "804": 2.072136163711548, - "805": 2.094399929046631, - "806": 2.0907092094421387, - "807": 2.0761852264404297, - "808": 2.116187334060669, - "809": 2.0969619750976562 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 3.80559, - 4.23002, - 4.15854, - 3.81605, - 3.87197, - 4.7091, - 4.91634, - 4.73696, - 4.40376, - 4.34508, - 4.19972, - 4.36012, - 4.40109, - 4.93031, - 4.39571, - 4.5957, - 4.94303, - 4.3912, - 4.98295, - 4.85194, - 4.43963, - 5.10459, - 4.78477, - 4.94602, - 5.18132, - 4.85133, - 4.1886, - 3.48093, - 4.88638, - 4.91434, - 4.61716, - 4.25248, - 4.16734, - 4.64391, - 4.84291, - 4.52902, - 5.69264, - 5.4585, - 4.87769, - 5.262, - 4.5803, - 5.17393, - 5.42258, - 5.08335, - 5.60009, - 5.4351, - 4.73886, - 5.48874, - 5.2296, - 4.5251, - 5.33196, - 4.82265, - 4.19858, - 5.25875 - ], - "train_epoch_time": 5.051954984664917, - "train_loss": 2.087881578844965, - "train_score": 0.3797267306740349, - "val_loss": 2.1743666903980836, - "val_score": 0.3561280135719702 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:18:26.655239", - "final_model_norm": 87.63260650634766, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:16:40.770252", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 3.3889477252960205, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.43351745605469, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 8.455106735229492, - "3": 5.430062770843506, - "4": 5.6468892097473145, - "5": 11.033295631408691, - "6": 19.403236389160156, - "7": 7.91170072555542, - "8": 4.659134864807129, - "9": 2.318803071975708, - "10": 2.1720690727233887, - "11": 2.752659559249878, - "12": 4.2507452964782715, - "13": 13.20004653930664, - "14": 4.406890869140625, - "15": 6.960628509521484, - "16": 6.359367370605469, - "17": 6.250153541564941, - "18": 3.660008192062378, - "19": 33.08381271362305, - "20": 2.868354320526123, - "21": 4.112018585205078, - "22": 4.233462333679199, - "23": 4.344981670379639, - "24": 3.540527820587158, - "25": 5.1248674392700195, - "26": 4.448975086212158, - "27": 3.6585779190063477, - "28": 3.3421261310577393, - "29": 3.0488529205322266, - "30": 4.774571418762207, - "31": 19.267662048339844, - "32": 3.362060546875, - "33": 3.615204095840454, - "34": 3.1254024505615234, - "35": 2.9841437339782715, - "36": 3.054182529449463, - "37": 4.397572994232178, - "38": 5.369297027587891, - "39": 3.1309306621551514, - "40": 5.369064807891846, - "41": 7.794357776641846, - "42": 19.277755737304688, - "43": 5.582124710083008, - "44": 5.446364879608154, - "45": 3.250910520553589, - "46": 8.72555923461914, - "47": 5.285445690155029, - "48": 8.959678649902344, - "49": 4.935024261474609, - "50": 24.596059799194336, - "51": 4.8176774978637695, - "52": 3.8231101036071777, - "53": 3.3889477252960205 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.944894313812256, - "3": 3.726104974746704, - "4": 3.616518020629883, - "5": 3.6710801124572754, - "6": 4.058044910430908, - "7": 4.158349990844727, - "8": 3.6257386207580566, - "9": 3.4210009574890137, - "10": 3.3945889472961426, - "11": 3.3422651290893555, - "12": 3.3065052032470703, - "13": 3.434197425842285, - "14": 3.3901124000549316, - "15": 3.5200142860412598, - "16": 3.3997273445129395, - "17": 3.6144275665283203, - "18": 3.1550843715667725, - "19": 3.332406520843506, - "20": 3.0696306228637695, - "21": 3.039764404296875, - "22": 3.055429458618164, - "23": 3.0484390258789062, - "24": 2.9547481536865234, - "25": 3.0867815017700195, - "26": 3.014432430267334, - "27": 2.9839138984680176, - "28": 2.8958330154418945, - "29": 2.840064287185669, - "30": 2.9247920513153076, - "31": 3.5178093910217285, - "32": 3.006455421447754, - "33": 2.860466480255127, - "34": 2.837097644805908, - "35": 2.829009532928467, - "36": 2.790703773498535, - "37": 2.9146840572357178, - "38": 3.127567768096924, - "39": 2.8455801010131836, - "40": 2.8222756385803223, - "41": 3.1355457305908203, - "42": 3.0576977729797363, - "43": 2.990561008453369, - "44": 3.3500051498413086, - "45": 2.903428077697754, - "46": 3.255727767944336, - "47": 3.073944330215454, - "48": 2.9995102882385254, - "49": 2.9204652309417725, - "50": 3.68436336517334, - "51": 3.002871036529541, - "52": 3.006086826324463, - "53": 2.839353561401367 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 0.00869338, - 0.00874607, - 0.055182, - 0.12637, - 0.113415, - 0.0301567, - 0.0107788, - 0.0664326, - 0.167027, - 0.636247, - 0.719515, - 0.441099, - 0.182995, - 0.0197094, - 0.174562, - 0.072652, - 0.0840652, - 0.0925248, - 0.23553, - 0.00304458, - 0.373096, - 0.179775, - 0.170483, - 0.161474, - 0.235713, - 0.117528, - 0.152295, - 0.222926, - 0.259255, - 0.305531, - 0.1283, - 0.00947576, - 0.265977, - 0.218862, - 0.290444, - 0.317684, - 0.299174, - 0.150718, - 0.108486, - 0.290285, - 0.0979044, - 0.0516122, - 0.00822776, - 0.095974, - 0.112936, - 0.274727, - 0.0427623, - 0.110035, - 0.037365, - 0.119915, - 0.0060902, - 0.129378, - 0.205669, - 0.247223 - ], - "train_epoch_time": 5.052546262741089, - "train_loss": 3.368078115099301, - "train_score": 0.15117131457523772, - "val_loss": 3.394641490809269, - "val_score": 0.14839265246615754 - }, - { - "epoch": 1, - "grad_norm": 1.6649152040481567, - "learning_rate": 0.1, - "model_norm": 87.44978332519531, - "step_logs": { - "grad_norm": { - "54": 11.882963180541992, - "55": 2.3635265827178955, - "56": 3.352724552154541, - "57": 2.6107516288757324, - "58": 3.1675150394439697, - "59": 3.0631000995635986, - "60": 2.564509391784668, - "61": 1.6012704372406006, - "62": 1.8905109167099, - "63": 2.189606189727783, - "64": 1.955898404121399, - "65": 1.822391152381897, - "66": 1.8813598155975342, - "67": 2.1241745948791504, - "68": 2.038508176803589, - "69": 1.5413856506347656, - "70": 1.5947394371032715, - "71": 2.029998540878296, - "72": 1.9531116485595703, - "73": 1.586624026298523, - "74": 1.6178563833236694, - "75": 1.7283015251159668, - "76": 1.7703524827957153, - "77": 1.7480460405349731, - "78": 1.7916656732559204, - "79": 1.9708962440490723, - "80": 1.925018310546875, - "81": 1.6343408823013306, - "82": 1.7145962715148926, - "83": 2.0812294483184814, - "84": 1.965393304824829, - "85": 1.6621005535125732, - "86": 1.767539620399475, - "87": 2.3451972007751465, - "88": 2.0563714504241943, - "89": 1.2139837741851807, - "90": 1.1802303791046143, - "91": 1.2762067317962646, - "92": 1.412876844406128, - "93": 1.8772350549697876, - "94": 1.8925275802612305, - "95": 1.7011109590530396, - "96": 1.6600311994552612, - "97": 1.7145100831985474, - "98": 1.7258946895599365, - "99": 1.5846753120422363, - "100": 1.5328458547592163, - "101": 1.586608648300171, - "102": 1.581928014755249, - "103": 1.6588969230651855, - "104": 1.6137290000915527, - "105": 1.4103758335113525, - "106": 1.4387884140014648, - "107": 1.6649152040481567 - }, - "loss": { - "54": 3.3600575923919678, - "55": 2.787196636199951, - "56": 2.8264527320861816, - "57": 2.7755112648010254, - "58": 2.874067544937134, - "59": 2.8506340980529785, - "60": 2.9195547103881836, - "61": 2.649099588394165, - "62": 2.6556055545806885, - "63": 2.7002651691436768, - "64": 2.716719150543213, - "65": 2.64172625541687, - "66": 2.6891369819641113, - "67": 2.6354784965515137, - "68": 2.7284579277038574, - "69": 2.621030569076538, - "70": 2.6266555786132812, - "71": 2.6157021522521973, - "72": 2.7143189907073975, - "73": 2.600590229034424, - "74": 2.6325554847717285, - "75": 2.605961799621582, - "76": 2.655390739440918, - "77": 2.609612464904785, - "78": 2.632941722869873, - "79": 2.6077609062194824, - "80": 2.647109031677246, - "81": 2.595229387283325, - "82": 2.611328363418579, - "83": 2.5804758071899414, - "84": 2.6764211654663086, - "85": 2.5902743339538574, - "86": 2.5982279777526855, - "87": 2.5912327766418457, - "88": 2.712203025817871, - "89": 2.575115203857422, - "90": 2.5741593837738037, - "91": 2.5489249229431152, - "92": 2.585357427597046, - "93": 2.562504529953003, - "94": 2.632828712463379, - "95": 2.5821690559387207, - "96": 2.612212657928467, - "97": 2.5670647621154785, - "98": 2.6118991374969482, - "99": 2.574159622192383, - "100": 2.596236228942871, - "101": 2.5502476692199707, - "102": 2.600957155227661, - "103": 2.537538528442383, - "104": 2.5901052951812744, - "105": 2.5361320972442627, - "106": 2.5423049926757812, - "107": 2.5615177154541016 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 0.0237956, - 0.498938, - 0.251447, - 0.407204, - 0.286457, - 0.303822, - 0.443923, - 1.03316, - 0.743028, - 0.563215, - 0.710153, - 0.795435, - 0.759747, - 0.584089, - 0.656587, - 1.10319, - 1.03282, - 0.634741, - 0.711552, - 1.03306, - 1.00577, - 0.872427, - 0.847245, - 0.854024, - 0.820214, - 0.671336, - 0.714335, - 0.971607, - 0.888255, - 0.595744, - 0.692876, - 0.937629, - 0.831647, - 0.471138, - 0.641385, - 1.74731, - 1.848, - 1.565, - 1.29513, - 0.727156, - 0.735086, - 0.892317, - 0.947929, - 0.873286, - 0.876855, - 1.02507, - 1.10496, - 1.01308, - 1.03935, - 0.922091, - 0.994618, - 1.27498, - 1.2281, - 0.924088 - ], - "train_epoch_time": 5.04816198348999, - "train_loss": 2.599024853165899, - "train_score": 0.23535464490674318, - "val_loss": 2.630504760621746, - "val_score": 0.23474275241215659 - }, - { - "epoch": 2, - "grad_norm": 1.493432641029358, - "learning_rate": 0.1, - "model_norm": 87.46326446533203, - "step_logs": { - "grad_norm": { - "108": 1.6845800876617432, - "109": 1.596843957901001, - "110": 1.5800604820251465, - "111": 1.4663677215576172, - "112": 1.5330389738082886, - "113": 1.560696005821228, - "114": 1.5556598901748657, - "115": 1.6265424489974976, - "116": 1.7033133506774902, - "117": 1.711039662361145, - "118": 1.5589275360107422, - "119": 1.459154486656189, - "120": 1.4164886474609375, - "121": 1.3624179363250732, - "122": 1.3997383117675781, - "123": 1.482944369316101, - "124": 1.611213207244873, - "125": 1.8654340505599976, - "126": 1.717869520187378, - "127": 1.3610656261444092, - "128": 1.362554669380188, - "129": 1.6764695644378662, - "130": 1.6157361268997192, - "131": 1.4396878480911255, - "132": 1.4864356517791748, - "133": 1.642707347869873, - "134": 1.6450635194778442, - "135": 1.452309250831604, - "136": 1.4403904676437378, - "137": 1.431042194366455, - "138": 1.5069602727890015, - "139": 1.6157925128936768, - "140": 1.6178302764892578, - "141": 1.5775846242904663, - "142": 1.5009082555770874, - "143": 1.485425591468811, - "144": 1.4340190887451172, - "145": 1.4824427366256714, - "146": 1.5407404899597168, - "147": 1.4523173570632935, - "148": 1.5248172283172607, - "149": 1.560773253440857, - "150": 1.471592664718628, - "151": 1.4706635475158691, - "152": 1.4568729400634766, - "153": 1.3535856008529663, - "154": 1.322677493095398, - "155": 1.349790334701538, - "156": 1.3448057174682617, - "157": 1.32919180393219, - "158": 1.3414663076400757, - "159": 1.343133568763733, - "160": 1.4307098388671875, - "161": 1.493432641029358 - }, - "loss": { - "108": 2.600191831588745, - "109": 2.528883934020996, - "110": 2.593515396118164, - "111": 2.5468149185180664, - "112": 2.5517120361328125, - "113": 2.543083667755127, - "114": 2.568270206451416, - "115": 2.5522754192352295, - "116": 2.6112096309661865, - "117": 2.55145263671875, - "118": 2.5874886512756348, - "119": 2.5259652137756348, - "120": 2.557525157928467, - "121": 2.518737316131592, - "122": 2.5307493209838867, - "123": 2.5311412811279297, - "124": 2.5617401599884033, - "125": 2.5553529262542725, - "126": 2.605602741241455, - "127": 2.537452459335327, - "128": 2.544696569442749, - "129": 2.5528926849365234, - "130": 2.570498466491699, - "131": 2.5278353691101074, - "132": 2.5573782920837402, - "133": 2.52195405960083, - "134": 2.5916879177093506, - "135": 2.496354579925537, - "136": 2.5397682189941406, - "137": 2.5125160217285156, - "138": 2.5369157791137695, - "139": 2.5224549770355225, - "140": 2.5586395263671875, - "141": 2.536864757537842, - "142": 2.5578291416168213, - "143": 2.523402214050293, - "144": 2.537787914276123, - "145": 2.5420727729797363, - "146": 2.55967378616333, - "147": 2.5217766761779785, - "148": 2.528733253479004, - "149": 2.524456024169922, - "150": 2.5312998294830322, - "151": 2.5099291801452637, - "152": 2.5288877487182617, - "153": 2.5043282508850098, - "154": 2.5190539360046387, - "155": 2.5095953941345215, - "156": 2.5225343704223633, - "157": 2.4985921382904053, - "158": 2.5087904930114746, - "159": 2.5019383430480957, - "160": 2.5126709938049316, - "161": 2.5212783813476562 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 0.916267, - 0.991754, - 1.03882, - 1.18444, - 1.08574, - 1.04406, - 1.06123, - 0.96471, - 0.900021, - 0.8715, - 1.0647, - 1.18638, - 1.27466, - 1.35694, - 1.29168, - 1.15098, - 0.9868, - 0.73433, - 0.882933, - 1.36975, - 1.37065, - 0.908325, - 0.984638, - 1.21959, - 1.15745, - 0.934581, - 0.957673, - 1.18355, - 1.22415, - 1.22689, - 1.11713, - 0.966167, - 0.977559, - 1.01932, - 1.13544, - 1.14363, - 1.23409, - 1.15673, - 1.07827, - 1.19559, - 1.0876, - 1.03631, - 1.16888, - 1.16047, - 1.19148, - 1.36685, - 1.43989, - 1.37743, - 1.39482, - 1.41423, - 1.39414, - 1.38688, - 1.22753, - 1.13045 - ], - "train_epoch_time": 5.050227165222168, - "train_loss": 2.5330812643042937, - "train_score": 0.2363858500546328, - "val_loss": 2.5737051487516456, - "val_score": 0.22898877029714573 - }, - { - "epoch": 3, - "grad_norm": 1.526887059211731, - "learning_rate": 0.1, - "model_norm": 87.47559356689453, - "step_logs": { - "grad_norm": { - "162": 1.4928721189498901, - "163": 1.6865094900131226, - "164": 1.6986087560653687, - "165": 1.6533052921295166, - "166": 1.6001272201538086, - "167": 1.5447994470596313, - "168": 1.5828136205673218, - "169": 1.5642145872116089, - "170": 1.431638240814209, - "171": 1.2382491827011108, - "172": 1.29545259475708, - "173": 1.488620400428772, - "174": 1.5724915266036987, - "175": 1.5411988496780396, - "176": 1.4260880947113037, - "177": 1.2863078117370605, - "178": 1.2801792621612549, - "179": 1.3829270601272583, - "180": 1.461946725845337, - "181": 1.4035640954971313, - "182": 1.366930365562439, - "183": 1.2010037899017334, - "184": 1.1213728189468384, - "185": 1.3109997510910034, - "186": 1.372214674949646, - "187": 1.362729787826538, - "188": 1.3166824579238892, - "189": 1.3974947929382324, - "190": 1.4236501455307007, - "191": 1.3842368125915527, - "192": 1.383528709411621, - "193": 1.5255012512207031, - "194": 1.6087610721588135, - "195": 1.5967577695846558, - "196": 1.585250735282898, - "197": 1.5104517936706543, - "198": 1.5963188409805298, - "199": 1.6635619401931763, - "200": 1.4449334144592285, - "201": 1.2055463790893555, - "202": 1.1725956201553345, - "203": 1.214180827140808, - "204": 1.2591506242752075, - "205": 1.374233365058899, - "206": 1.441994071006775, - "207": 1.5264184474945068, - "208": 1.4113317728042603, - "209": 1.140454888343811, - "210": 1.1167337894439697, - "211": 1.2823407649993896, - "212": 1.3605154752731323, - "213": 1.3126513957977295, - "214": 1.3711744546890259, - "215": 1.526887059211731 - }, - "loss": { - "162": 2.5369386672973633, - "163": 2.509779691696167, - "164": 2.5565848350524902, - "165": 2.538588047027588, - "166": 2.545473098754883, - "167": 2.494863510131836, - "168": 2.5543622970581055, - "169": 2.534714937210083, - "170": 2.5358211994171143, - "171": 2.4962592124938965, - "172": 2.494605779647827, - "173": 2.525576591491699, - "174": 2.5370254516601562, - "175": 2.520183563232422, - "176": 2.5178821086883545, - "177": 2.511479377746582, - "178": 2.5065903663635254, - "179": 2.4960975646972656, - "180": 2.5319511890411377, - "181": 2.5154550075531006, - "182": 2.518453359603882, - "183": 2.4957737922668457, - "184": 2.5026257038116455, - "185": 2.494650363922119, - "186": 2.50592041015625, - "187": 2.4931392669677734, - "188": 2.492976665496826, - "189": 2.5015175342559814, - "190": 2.510263442993164, - "191": 2.481764316558838, - "192": 2.512326240539551, - "193": 2.488492965698242, - "194": 2.530038356781006, - "195": 2.5199599266052246, - "196": 2.5205793380737305, - "197": 2.498992443084717, - "198": 2.5292038917541504, - "199": 2.522573947906494, - "200": 2.525585174560547, - "201": 2.487456798553467, - "202": 2.4869112968444824, - "203": 2.471940040588379, - "204": 2.478116512298584, - "205": 2.479897975921631, - "206": 2.490063190460205, - "207": 2.503345012664795, - "208": 2.514010429382324, - "209": 2.4574787616729736, - "210": 2.4901504516601562, - "211": 2.480341672897339, - "212": 2.493208169937134, - "213": 2.4661519527435303, - "214": 2.4971795082092285, - "215": 2.495734930038452 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 1.13832, - 0.882385, - 0.886081, - 0.928723, - 0.994167, - 1.04545, - 1.01958, - 1.03594, - 1.23723, - 1.62807, - 1.48648, - 1.13971, - 1.026, - 1.061, - 1.23806, - 1.51789, - 1.52947, - 1.30516, - 1.18466, - 1.27689, - 1.34785, - 1.73028, - 1.9902, - 1.45146, - 1.33083, - 1.34254, - 1.43799, - 1.28086, - 1.23855, - 1.29521, - 1.3125, - 1.06933, - 0.977561, - 0.988361, - 1.00301, - 1.09535, - 0.992532, - 0.911519, - 1.20967, - 1.71154, - 1.80869, - 1.67676, - 1.56303, - 1.31315, - 1.19752, - 1.07442, - 1.26214, - 1.88944, - 1.99676, - 1.50836, - 1.34695, - 1.43127, - 1.3282, - 1.07049 - ], - "train_epoch_time": 5.052087783813477, - "train_loss": 2.5195413763930157, - "train_score": 0.2547043131105869, - "val_loss": 2.5706858183294705, - "val_score": 0.24922413160825568 - }, - { - "epoch": 4, - "grad_norm": 1.2100497484207153, - "learning_rate": 0.1, - "model_norm": 87.48690032958984, - "step_logs": { - "grad_norm": { - "216": 1.5042321681976318, - "217": 1.2972551584243774, - "218": 1.3525184392929077, - "219": 1.4106248617172241, - "220": 1.4683302640914917, - "221": 1.4328974485397339, - "222": 1.5292645692825317, - "223": 1.516752004623413, - "224": 1.3745100498199463, - "225": 1.2629231214523315, - "226": 1.2997370958328247, - "227": 1.344958782196045, - "228": 1.2913954257965088, - "229": 1.1746699810028076, - "230": 1.083894968032837, - "231": 1.1090645790100098, - "232": 1.175661563873291, - "233": 1.2521255016326904, - "234": 1.3336749076843262, - "235": 1.327859878540039, - "236": 1.335525631904602, - "237": 1.4305250644683838, - "238": 1.5687737464904785, - "239": 1.4777119159698486, - "240": 1.6281108856201172, - "241": 1.5883123874664307, - "242": 1.418677568435669, - "243": 1.3770877122879028, - "244": 1.4308468103408813, - "245": 1.418418049812317, - "246": 1.4686918258666992, - "247": 1.4832983016967773, - "248": 1.4652273654937744, - "249": 1.502508282661438, - "250": 1.5151818990707397, - "251": 1.3689600229263306, - "252": 1.2781827449798584, - "253": 1.311661958694458, - "254": 1.373867392539978, - "255": 1.3660823106765747, - "256": 1.3258785009384155, - "257": 1.326951503753662, - "258": 1.3120416402816772, - "259": 1.2513842582702637, - "260": 1.1746522188186646, - "261": 1.2064586877822876, - "262": 1.2606614828109741, - "263": 1.3701711893081665, - "264": 1.5060062408447266, - "265": 1.5568857192993164, - "266": 1.4231266975402832, - "267": 1.240964651107788, - "268": 1.1753987073898315, - "269": 1.2100497484207153 - }, - "loss": { - "216": 2.5180835723876953, - "217": 2.473853588104248, - "218": 2.50290584564209, - "219": 2.495795726776123, - "220": 2.4973506927490234, - "221": 2.5057730674743652, - "222": 2.494767189025879, - "223": 2.513709545135498, - "224": 2.510648250579834, - "225": 2.4886891841888428, - "226": 2.497945547103882, - "227": 2.4738125801086426, - "228": 2.4851808547973633, - "229": 2.4756364822387695, - "230": 2.4734315872192383, - "231": 2.4554872512817383, - "232": 2.4761457443237305, - "233": 2.491023063659668, - "234": 2.4589333534240723, - "235": 2.485600233078003, - "236": 2.488257884979248, - "237": 2.487308979034424, - "238": 2.492017984390259, - "239": 2.5216212272644043, - "240": 2.491151809692383, - "241": 2.5172672271728516, - "242": 2.5080184936523438, - "243": 2.49701189994812, - "244": 2.490177631378174, - "245": 2.506624698638916, - "246": 2.466916561126709, - "247": 2.5057616233825684, - "248": 2.4754409790039062, - "249": 2.4973416328430176, - "250": 2.477790117263794, - "251": 2.482135772705078, - "252": 2.455598831176758, - "253": 2.4782023429870605, - "254": 2.465925693511963, - "255": 2.499709129333496, - "256": 2.4612679481506348, - "257": 2.4860377311706543, - "258": 2.4684667587280273, - "259": 2.4743924140930176, - "260": 2.4582266807556152, - "261": 2.480653762817383, - "262": 2.472898006439209, - "263": 2.4647326469421387, - "264": 2.477093458175659, - "265": 2.497713804244995, - "266": 2.4905996322631836, - "267": 2.4573888778686523, - "268": 2.4505364894866943, - "269": 2.465961217880249 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 1.11286, - 1.47002, - 1.36823, - 1.25426, - 1.15833, - 1.22043, - 1.06676, - 1.09266, - 1.32889, - 1.56033, - 1.47867, - 1.36757, - 1.49018, - 1.79414, - 2.10536, - 1.99629, - 1.79148, - 1.58885, - 1.38244, - 1.4097, - 1.39505, - 1.21545, - 1.01258, - 1.15478, - 0.939793, - 0.997832, - 1.24613, - 1.31673, - 1.21631, - 1.24589, - 1.14365, - 1.13889, - 1.15304, - 1.10623, - 1.07928, - 1.32448, - 1.50304, - 1.44043, - 1.30644, - 1.33948, - 1.40008, - 1.41188, - 1.43394, - 1.58011, - 1.78157, - 1.70428, - 1.556, - 1.31287, - 1.09217, - 1.03046, - 1.22975, - 1.59571, - 1.77374, - 1.68415 - ], - "train_epoch_time": 5.048539161682129, - "train_loss": 2.4629252667748602, - "train_score": 0.27318081959248275, - "val_loss": 2.5018012871287856, - "val_score": 0.26599723774721645 - }, - { - "epoch": 5, - "grad_norm": 1.275873064994812, - "learning_rate": 0.1, - "model_norm": 87.50006103515625, - "step_logs": { - "grad_norm": { - "270": 1.242241382598877, - "271": 1.3752840757369995, - "272": 1.509659767150879, - "273": 1.4997674226760864, - "274": 1.4664418697357178, - "275": 1.4549927711486816, - "276": 1.433301568031311, - "277": 1.4563589096069336, - "278": 1.4142285585403442, - "279": 1.3457815647125244, - "280": 1.1438502073287964, - "281": 1.0439789295196533, - "282": 1.152848243713379, - "283": 1.298545002937317, - "284": 1.3708072900772095, - "285": 1.3351082801818848, - "286": 1.4092931747436523, - "287": 1.5157380104064941, - "288": 1.655364990234375, - "289": 1.6383436918258667, - "290": 1.5117796659469604, - "291": 1.3852317333221436, - "292": 1.2005242109298706, - "293": 1.1524837017059326, - "294": 1.2188464403152466, - "295": 1.2545958757400513, - "296": 1.2708994150161743, - "297": 1.2899991273880005, - "298": 1.3007780313491821, - "299": 1.1948150396347046, - "300": 1.29708993434906, - "301": 1.3692450523376465, - "302": 1.3729547262191772, - "303": 1.4372953176498413, - "304": 1.5518672466278076, - "305": 1.8208012580871582, - "306": 1.7197481393814087, - "307": 1.5619542598724365, - "308": 1.5854787826538086, - "309": 1.5662997961044312, - "310": 1.4740266799926758, - "311": 1.2492971420288086, - "312": 0.9861946105957031, - "313": 1.0496667623519897, - "314": 1.251468539237976, - "315": 1.2535101175308228, - "316": 1.201544165611267, - "317": 1.3078744411468506, - "318": 1.4728519916534424, - "319": 1.5280863046646118, - "320": 1.4423651695251465, - "321": 1.2795406579971313, - "322": 1.2070748805999756, - "323": 1.275873064994812 - }, - "loss": { - "270": 2.451322555541992, - "271": 2.473684787750244, - "272": 2.4785008430480957, - "273": 2.5072813034057617, - "274": 2.485354423522949, - "275": 2.477545738220215, - "276": 2.471564769744873, - "277": 2.4904720783233643, - "278": 2.466644525527954, - "279": 2.4801747798919678, - "280": 2.4546728134155273, - "281": 2.459286689758301, - "282": 2.462512969970703, - "283": 2.4804182052612305, - "284": 2.4487881660461426, - "285": 2.471494197845459, - "286": 2.458620548248291, - "287": 2.4713315963745117, - "288": 2.4676902294158936, - "289": 2.4956986904144287, - "290": 2.4918088912963867, - "291": 2.4724583625793457, - "292": 2.4272124767303467, - "293": 2.4410529136657715, - "294": 2.443024158477783, - "295": 2.470226287841797, - "296": 2.4550609588623047, - "297": 2.455300807952881, - "298": 2.4781670570373535, - "299": 2.4526126384735107, - "300": 2.4411096572875977, - "301": 2.4632813930511475, - "302": 2.444310426712036, - "303": 2.4847426414489746, - "304": 2.4452319145202637, - "305": 2.4813785552978516, - "306": 2.465850830078125, - "307": 2.494922637939453, - "308": 2.47973370552063, - "309": 2.4533133506774902, - "310": 2.4604482650756836, - "311": 2.457676649093628, - "312": 2.421933650970459, - "313": 2.424926996231079, - "314": 2.4268150329589844, - "315": 2.4415690898895264, - "316": 2.435175895690918, - "317": 2.447998046875, - "318": 2.4521968364715576, - "319": 2.4638612270355225, - "320": 2.4494247436523438, - "321": 2.4374911785125732, - "322": 2.437119722366333, - "323": 2.4203295707702637 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 1.5885, - 1.30785, - 1.0875, - 1.11469, - 1.15574, - 1.17031, - 1.20309, - 1.17421, - 1.2333, - 1.36941, - 1.8761, - 2.25645, - 1.85282, - 1.47099, - 1.30316, - 1.38652, - 1.23791, - 1.07568, - 0.90054, - 0.929784, - 1.09028, - 1.2885, - 1.68409, - 1.83784, - 1.64448, - 1.56938, - 1.51999, - 1.47545, - 1.46462, - 1.71802, - 1.45093, - 1.31387, - 1.29671, - 1.20279, - 1.01534, - 0.748459, - 0.833752, - 1.02263, - 0.986471, - 1.00001, - 1.13241, - 1.57468, - 2.49022, - 2.20088, - 1.54952, - 1.55387, - 1.68675, - 1.43113, - 1.13041, - 1.05517, - 1.17737, - 1.48879, - 1.67266, - 1.48682 - ], - "train_epoch_time": 5.05258846282959, - "train_loss": 2.425375849068421, - "train_score": 0.2787907998650796, - "val_loss": 2.4786855774823615, - "val_score": 0.26686280158302406 - }, - { - "epoch": 6, - "grad_norm": 1.60768723487854, - "learning_rate": 0.1, - "model_norm": 87.51543426513672, - "step_logs": { - "grad_norm": { - "324": 1.4600036144256592, - "325": 1.5363696813583374, - "326": 1.5483111143112183, - "327": 1.5883113145828247, - "328": 1.6253679990768433, - "329": 1.6698472499847412, - "330": 1.6124595403671265, - "331": 1.4245474338531494, - "332": 1.2198418378829956, - "333": 1.1659995317459106, - "334": 1.1940242052078247, - "335": 1.3308666944503784, - "336": 1.2919392585754395, - "337": 1.1751962900161743, - "338": 1.243245005607605, - "339": 1.2223283052444458, - "340": 1.3887358903884888, - "341": 1.4822484254837036, - "342": 1.6142373085021973, - "343": 1.8940600156784058, - "344": 2.068099021911621, - "345": 1.7159664630889893, - "346": 1.5897043943405151, - "347": 1.515467882156372, - "348": 1.5037363767623901, - "349": 1.5573487281799316, - "350": 1.685118556022644, - "351": 1.511350154876709, - "352": 1.276816725730896, - "353": 1.2557131052017212, - "354": 1.4487239122390747, - "355": 1.4267280101776123, - "356": 1.3650164604187012, - "357": 1.3793189525604248, - "358": 1.3184151649475098, - "359": 1.4550317525863647, - "360": 1.5756475925445557, - "361": 1.5412311553955078, - "362": 1.458289384841919, - "363": 1.3948017358779907, - "364": 1.2699570655822754, - "365": 1.1540223360061646, - "366": 1.098054051399231, - "367": 1.1575748920440674, - "368": 1.4290499687194824, - "369": 1.671949028968811, - "370": 1.7307555675506592, - "371": 1.6433302164077759, - "372": 1.419509768486023, - "373": 1.2741972208023071, - "374": 1.2830681800842285, - "375": 1.5328922271728516, - "376": 1.5815271139144897, - "377": 1.60768723487854 - }, - "loss": { - "324": 2.423346996307373, - "325": 2.444814682006836, - "326": 2.4383111000061035, - "327": 2.457697868347168, - "328": 2.4347856044769287, - "329": 2.452908992767334, - "330": 2.4509775638580322, - "331": 2.4557056427001953, - "332": 2.405529737472534, - "333": 2.404137372970581, - "334": 2.4043030738830566, - "335": 2.3914694786071777, - "336": 2.403027296066284, - "337": 2.39839506149292, - "338": 2.395979404449463, - "339": 2.422151803970337, - "340": 2.4026143550872803, - "341": 2.438060760498047, - "342": 2.4172914028167725, - "343": 2.4507336616516113, - "344": 2.477540969848633, - "345": 2.4538772106170654, - "346": 2.4470860958099365, - "347": 2.412900447845459, - "348": 2.4140400886535645, - "349": 2.4156644344329834, - "350": 2.4223508834838867, - "351": 2.434015989303589, - "352": 2.38643741607666, - "353": 2.3736932277679443, - "354": 2.421555757522583, - "355": 2.3984429836273193, - "356": 2.381211757659912, - "357": 2.3999085426330566, - "358": 2.3930954933166504, - "359": 2.4029130935668945, - "360": 2.4074549674987793, - "361": 2.4009592533111572, - "362": 2.3978631496429443, - "363": 2.4047818183898926, - "364": 2.3720412254333496, - "365": 2.385714530944824, - "366": 2.3702468872070312, - "367": 2.3664233684539795, - "368": 2.3798351287841797, - "369": 2.4168667793273926, - "370": 2.4074058532714844, - "371": 2.407222270965576, - "372": 2.39224910736084, - "373": 2.3683271408081055, - "374": 2.352229595184326, - "375": 2.413966655731201, - "376": 2.403733968734741, - "377": 2.4112908840179443 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 1.13686, - 1.03575, - 1.01712, - 0.97422, - 0.921632, - 0.879687, - 0.942674, - 1.2101, - 1.6166, - 1.76833, - 1.68641, - 1.35019, - 1.43971, - 1.7366, - 1.55014, - 1.62116, - 1.24579, - 1.10969, - 0.927672, - 0.683138, - 0.579266, - 0.833365, - 0.968315, - 1.05062, - 1.06758, - 0.996013, - 0.853053, - 1.0656, - 1.46384, - 1.50537, - 1.15378, - 1.17828, - 1.27797, - 1.26144, - 1.37675, - 1.13499, - 0.969706, - 1.01076, - 1.12755, - 1.23609, - 1.47077, - 1.79139, - 1.96583, - 1.76601, - 1.16534, - 0.864583, - 0.80367, - 0.891387, - 1.18722, - 1.45871, - 1.42883, - 1.02733, - 0.961021, - 0.932925 - ], - "train_epoch_time": 5.051176071166992, - "train_loss": 2.406868515739824, - "train_score": 0.2787560528114844, - "val_loss": 2.466258319456186, - "val_score": 0.2689168343724668 - }, - { - "epoch": 7, - "grad_norm": 1.1687053442001343, - "learning_rate": 0.1, - "model_norm": 87.53131866455078, - "step_logs": { - "grad_norm": { - "378": 1.60160231590271, - "379": 1.6076996326446533, - "380": 1.6012232303619385, - "381": 1.4979603290557861, - "382": 1.4272422790527344, - "383": 1.2978098392486572, - "384": 1.2874720096588135, - "385": 1.3287444114685059, - "386": 1.381339192390442, - "387": 1.3700239658355713, - "388": 1.3750993013381958, - "389": 1.3893704414367676, - "390": 1.3712728023529053, - "391": 1.4219294786453247, - "392": 1.346638560295105, - "393": 1.288305401802063, - "394": 1.3512741327285767, - "395": 1.440542221069336, - "396": 1.5063265562057495, - "397": 1.649722933769226, - "398": 1.6636254787445068, - "399": 1.612720251083374, - "400": 1.5331531763076782, - "401": 1.2922505140304565, - "402": 1.2506208419799805, - "403": 1.429261565208435, - "404": 1.4418500661849976, - "405": 1.4607679843902588, - "406": 1.4172638654708862, - "407": 1.2512582540512085, - "408": 1.151175856590271, - "409": 1.2085741758346558, - "410": 1.3249741792678833, - "411": 1.4612982273101807, - "412": 1.584967017173767, - "413": 1.5188218355178833, - "414": 1.4100773334503174, - "415": 1.2858773469924927, - "416": 1.2695493698120117, - "417": 1.3973702192306519, - "418": 1.4315433502197266, - "419": 1.4916226863861084, - "420": 1.4846605062484741, - "421": 1.4671449661254883, - "422": 1.4627171754837036, - "423": 1.3842188119888306, - "424": 1.4306995868682861, - "425": 1.3572522401809692, - "426": 1.4067387580871582, - "427": 1.6193788051605225, - "428": 1.754942774772644, - "429": 1.572987675666809, - "430": 1.3550280332565308, - "431": 1.1687053442001343 - }, - "loss": { - "378": 2.411616563796997, - "379": 2.387085437774658, - "380": 2.4100770950317383, - "381": 2.393401622772217, - "382": 2.412733793258667, - "383": 2.3630266189575195, - "384": 2.376993417739868, - "385": 2.3643651008605957, - "386": 2.385039806365967, - "387": 2.3838796615600586, - "388": 2.37530255317688, - "389": 2.382737159729004, - "390": 2.3571596145629883, - "391": 2.377121925354004, - "392": 2.389420509338379, - "393": 2.354663372039795, - "394": 2.387359857559204, - "395": 2.3569040298461914, - "396": 2.397303342819214, - "397": 2.3658015727996826, - "398": 2.425973653793335, - "399": 2.39426851272583, - "400": 2.388237953186035, - "401": 2.3718059062957764, - "402": 2.3537840843200684, - "403": 2.337611675262451, - "404": 2.3659653663635254, - "405": 2.358236074447632, - "406": 2.3582913875579834, - "407": 2.34114933013916, - "408": 2.3399524688720703, - "409": 2.3467700481414795, - "410": 2.363800525665283, - "411": 2.3622684478759766, - "412": 2.374744415283203, - "413": 2.36867618560791, - "414": 2.374596118927002, - "415": 2.3485848903656006, - "416": 2.338998794555664, - "417": 2.3492870330810547, - "418": 2.3476479053497314, - "419": 2.353184938430786, - "420": 2.3702774047851562, - "421": 2.3458361625671387, - "422": 2.3727898597717285, - "423": 2.3481180667877197, - "424": 2.3533499240875244, - "425": 2.3392038345336914, - "426": 2.361246109008789, - "427": 2.372911214828491, - "428": 2.3703529834747314, - "429": 2.3798115253448486, - "430": 2.3412435054779053, - "431": 2.3429765701293945 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 0.940154, - 0.923545, - 0.939999, - 1.06663, - 1.18444, - 1.40296, - 1.43401, - 1.33916, - 1.24996, - 1.27007, - 1.25618, - 1.23435, - 1.25355, - 1.1757, - 1.31762, - 1.4187, - 1.30747, - 1.13577, - 1.05654, - 0.869273, - 0.876547, - 0.920566, - 1.01603, - 1.42032, - 1.50493, - 1.14432, - 1.13807, - 1.10516, - 1.17408, - 1.49532, - 1.76573, - 1.60666, - 1.34647, - 1.10625, - 0.945315, - 1.02681, - 1.19427, - 1.42039, - 1.45121, - 1.20313, - 1.14558, - 1.05764, - 1.07534, - 1.08981, - 1.10902, - 1.22549, - 1.14971, - 1.26983, - 1.1932, - 0.904867, - 0.769639, - 0.961816, - 1.27512, - 1.71537 - ], - "train_epoch_time": 5.059226751327515, - "train_loss": 2.335863566638065, - "train_score": 0.3121558914225618, - "val_loss": 2.3935280300582726, - "val_score": 0.29677633511760065 - }, - { - "epoch": 8, - "grad_norm": 1.1332361698150635, - "learning_rate": 0.1, - "model_norm": 87.5473861694336, - "step_logs": { - "grad_norm": { - "432": 1.137690544128418, - "433": 1.1460480690002441, - "434": 1.2171761989593506, - "435": 1.2076777219772339, - "436": 1.2529245615005493, - "437": 1.402597427368164, - "438": 1.3453058004379272, - "439": 1.212056279182434, - "440": 1.2251746654510498, - "441": 1.2845358848571777, - "442": 1.362449049949646, - "443": 1.6387406587600708, - "444": 1.5799988508224487, - "445": 1.5100924968719482, - "446": 1.6327098608016968, - "447": 1.7873666286468506, - "448": 1.6572623252868652, - "449": 1.3638532161712646, - "450": 1.3508301973342896, - "451": 1.4135549068450928, - "452": 1.4002708196640015, - "453": 1.3939937353134155, - "454": 1.3698304891586304, - "455": 1.2942852973937988, - "456": 1.2993489503860474, - "457": 1.3878233432769775, - "458": 1.3831275701522827, - "459": 1.3291853666305542, - "460": 1.3543205261230469, - "461": 1.428424596786499, - "462": 1.6272356510162354, - "463": 1.7715330123901367, - "464": 1.6866225004196167, - "465": 1.4478378295898438, - "466": 1.2568714618682861, - "467": 1.1030749082565308, - "468": 1.0275148153305054, - "469": 1.0894169807434082, - "470": 1.1351925134658813, - "471": 1.2910327911376953, - "472": 1.4991974830627441, - "473": 1.5773869752883911, - "474": 1.5351835489273071, - "475": 1.4473085403442383, - "476": 1.3482539653778076, - "477": 1.56797194480896, - "478": 1.9412206411361694, - "479": 1.932516098022461, - "480": 1.9513517618179321, - "481": 1.7118409872055054, - "482": 1.7004340887069702, - "483": 1.3866249322891235, - "484": 1.1696233749389648, - "485": 1.1332361698150635 - }, - "loss": { - "432": 2.340517520904541, - "433": 2.3275694847106934, - "434": 2.3364553451538086, - "435": 2.325854539871216, - "436": 2.3437535762786865, - "437": 2.327770233154297, - "438": 2.33870792388916, - "439": 2.305976390838623, - "440": 2.33780574798584, - "441": 2.3219358921051025, - "442": 2.3375911712646484, - "443": 2.3292489051818848, - "444": 2.3892416954040527, - "445": 2.310730457305908, - "446": 2.355114459991455, - "447": 2.349273920059204, - "448": 2.3818507194519043, - "449": 2.348038673400879, - "450": 2.35707426071167, - "451": 2.330489158630371, - "452": 2.3492016792297363, - "453": 2.3373923301696777, - "454": 2.3339121341705322, - "455": 2.3187499046325684, - "456": 2.3144209384918213, - "457": 2.306708335876465, - "458": 2.32818603515625, - "459": 2.320002555847168, - "460": 2.3211567401885986, - "461": 2.336489677429199, - "462": 2.349205493927002, - "463": 2.3457818031311035, - "464": 2.341017961502075, - "465": 2.3531289100646973, - "466": 2.312844753265381, - "467": 2.3081393241882324, - "468": 2.3177995681762695, - "469": 2.3095102310180664, - "470": 2.3065314292907715, - "471": 2.339859962463379, - "472": 2.336761951446533, - "473": 2.347179412841797, - "474": 2.3455986976623535, - "475": 2.3234448432922363, - "476": 2.316662073135376, - "477": 2.340973377227783, - "478": 2.368338108062744, - "479": 2.338229179382324, - "480": 2.3880410194396973, - "481": 2.3419253826141357, - "482": 2.35868763923645, - "483": 2.355233907699585, - "484": 2.3115785121917725, - "485": 2.320404052734375 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 1.80827, - 1.77214, - 1.57707, - 1.59471, - 1.49301, - 1.18324, - 1.29221, - 1.56967, - 1.55744, - 1.40721, - 1.2593, - 0.867352, - 0.957076, - 1.01331, - 0.883474, - 0.735371, - 0.867225, - 1.26232, - 1.29173, - 1.16633, - 1.19811, - 1.20285, - 1.2438, - 1.38418, - 1.37085, - 1.19763, - 1.21701, - 1.31316, - 1.2655, - 1.14512, - 0.887197, - 0.747462, - 0.822941, - 1.12255, - 1.46408, - 1.89693, - 2.19533, - 1.94595, - 1.78987, - 1.40383, - 1.03967, - 0.943343, - 0.995252, - 1.1092, - 1.27444, - 0.952182, - 0.628484, - 0.626096, - 0.627149, - 0.799183, - 0.815738, - 1.22494, - 1.68973, - 1.80685 - ], - "train_epoch_time": 5.050874948501587, - "train_loss": 2.2988176273991763, - "train_score": 0.3208314652251376, - "val_loss": 2.3519796161783, - "val_score": 0.3054813069604705 - }, - { - "epoch": 9, - "grad_norm": 1.2005212306976318, - "learning_rate": 0.1, - "model_norm": 87.5645980834961, - "step_logs": { - "grad_norm": { - "486": 1.1214721202850342, - "487": 1.1258738040924072, - "488": 1.1950767040252686, - "489": 1.3669465780258179, - "490": 1.3901455402374268, - "491": 1.4446160793304443, - "492": 1.5292778015136719, - "493": 1.4820761680603027, - "494": 1.525681972503662, - "495": 1.5819711685180664, - "496": 1.6606371402740479, - "497": 1.845616340637207, - "498": 1.8000943660736084, - "499": 1.6151983737945557, - "500": 1.476933479309082, - "501": 1.3950945138931274, - "502": 1.2935055494308472, - "503": 1.2673839330673218, - "504": 1.3930432796478271, - "505": 1.5287995338439941, - "506": 1.5310273170471191, - "507": 1.36247980594635, - "508": 1.43450927734375, - "509": 1.5258667469024658, - "510": 1.4105826616287231, - "511": 1.3734098672866821, - "512": 1.4327290058135986, - "513": 1.5748199224472046, - "514": 1.6368809938430786, - "515": 1.5906060934066772, - "516": 1.4321568012237549, - "517": 1.4684892892837524, - "518": 1.6119998693466187, - "519": 2.138305187225342, - "520": 3.083324432373047, - "521": 2.522343873977661, - "522": 1.7955716848373413, - "523": 1.698992371559143, - "524": 1.6268047094345093, - "525": 1.5548611879348755, - "526": 1.5203980207443237, - "527": 1.4332115650177002, - "528": 1.3108603954315186, - "529": 1.2389236688613892, - "530": 1.1775331497192383, - "531": 1.1402958631515503, - "532": 1.1709182262420654, - "533": 1.1948164701461792, - "534": 1.3235933780670166, - "535": 1.3287609815597534, - "536": 1.1896299123764038, - "537": 1.1644668579101562, - "538": 1.1485133171081543, - "539": 1.2005212306976318 - }, - "loss": { - "486": 2.306220531463623, - "487": 2.2960259914398193, - "488": 2.3082311153411865, - "489": 2.3108201026916504, - "490": 2.3137893676757812, - "491": 2.3080358505249023, - "492": 2.3295693397521973, - "493": 2.3241429328918457, - "494": 2.3161849975585938, - "495": 2.3040428161621094, - "496": 2.326857805252075, - "497": 2.3326635360717773, - "498": 2.339661121368408, - "499": 2.3283119201660156, - "500": 2.321657180786133, - "501": 2.2988147735595703, - "502": 2.280843734741211, - "503": 2.2978503704071045, - "504": 2.2929351329803467, - "505": 2.3016517162323, - "506": 2.3221685886383057, - "507": 2.281674861907959, - "508": 2.2965335845947266, - "509": 2.30495548248291, - "510": 2.286433696746826, - "511": 2.297111988067627, - "512": 2.2914953231811523, - "513": 2.3124561309814453, - "514": 2.3285834789276123, - "515": 2.30527400970459, - "516": 2.286728858947754, - "517": 2.2645764350891113, - "518": 2.301481246948242, - "519": 2.3149728775024414, - "520": 2.3718090057373047, - "521": 2.4531021118164062, - "522": 2.3675405979156494, - "523": 2.3290956020355225, - "524": 2.339332103729248, - "525": 2.3267135620117188, - "526": 2.2976150512695312, - "527": 2.2909488677978516, - "528": 2.300706386566162, - "529": 2.2797560691833496, - "530": 2.287539482116699, - "531": 2.2514655590057373, - "532": 2.250145435333252, - "533": 2.248020648956299, - "534": 2.2692179679870605, - "535": 2.2780089378356934, - "536": 2.278042793273926, - "537": 2.2823715209960938, - "538": 2.2705466747283936, - "539": 2.252415180206299 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 1.83368, - 1.81133, - 1.61617, - 1.2367, - 1.1973, - 1.10596, - 0.9961, - 1.05809, - 0.995051, - 0.920648, - 0.843763, - 0.684809, - 0.722042, - 0.892461, - 1.06433, - 1.18113, - 1.3632, - 1.43056, - 1.18158, - 0.984778, - 0.990667, - 1.22912, - 1.116, - 0.989987, - 1.14911, - 1.21782, - 1.11633, - 0.93242, - 0.869076, - 0.911166, - 1.11489, - 1.05014, - 0.885681, - 0.506298, - 0.249483, - 0.385573, - 0.734331, - 0.806872, - 0.883936, - 0.962409, - 0.993946, - 1.11531, - 1.3389, - 1.48525, - 1.64977, - 1.73153, - 1.64118, - 1.5747, - 1.29529, - 1.29021, - 1.60967, - 1.68319, - 1.72131, - 1.56282 - ], - "train_epoch_time": 5.052049875259399, - "train_loss": 2.265104422849767, - "train_score": 0.3293994351644256, - "val_loss": 2.322902155250961, - "val_score": 0.31362568141398556 - }, - { - "epoch": 10, - "grad_norm": 1.6595216989517212, - "learning_rate": 0.1, - "model_norm": 87.58207702636719, - "step_logs": { - "grad_norm": { - "540": 1.2803752422332764, - "541": 1.3663889169692993, - "542": 1.4194055795669556, - "543": 1.4473828077316284, - "544": 1.4625210762023926, - "545": 1.3707042932510376, - "546": 1.3922961950302124, - "547": 1.317380428314209, - "548": 1.3131691217422485, - "549": 1.1916605234146118, - "550": 1.1941823959350586, - "551": 1.1862330436706543, - "552": 1.2014741897583008, - "553": 1.2803758382797241, - "554": 1.159397840499878, - "555": 1.0783162117004395, - "556": 1.096177577972412, - "557": 1.158869743347168, - "558": 1.2966809272766113, - "559": 1.3969601392745972, - "560": 1.4888845682144165, - "561": 1.499085783958435, - "562": 1.4343029260635376, - "563": 1.3455581665039062, - "564": 1.2944754362106323, - "565": 1.2354768514633179, - "566": 1.2829288244247437, - "567": 1.3580394983291626, - "568": 1.3430910110473633, - "569": 1.4303832054138184, - "570": 1.8011568784713745, - "571": 1.9026520252227783, - "572": 1.9378623962402344, - "573": 1.789549469947815, - "574": 1.6741491556167603, - "575": 1.739890456199646, - "576": 1.5230759382247925, - "577": 1.397168517112732, - "578": 1.2681032419204712, - "579": 1.2249735593795776, - "580": 1.165692687034607, - "581": 1.1072674989700317, - "582": 1.1658586263656616, - "583": 1.1926738023757935, - "584": 1.202600121498108, - "585": 1.3199353218078613, - "586": 1.439717173576355, - "587": 1.5407600402832031, - "588": 1.529766321182251, - "589": 1.4613008499145508, - "590": 1.5500118732452393, - "591": 1.7532572746276855, - "592": 1.8600577116012573, - "593": 1.6595216989517212 - }, - "loss": { - "540": 2.270373582839966, - "541": 2.2558369636535645, - "542": 2.2684831619262695, - "543": 2.2699015140533447, - "544": 2.2554569244384766, - "545": 2.2837038040161133, - "546": 2.273789644241333, - "547": 2.267402172088623, - "548": 2.245145320892334, - "549": 2.261744737625122, - "550": 2.2455899715423584, - "551": 2.2805604934692383, - "552": 2.222210168838501, - "553": 2.269192695617676, - "554": 2.252525806427002, - "555": 2.2254936695098877, - "556": 2.2567338943481445, - "557": 2.2426486015319824, - "558": 2.2250969409942627, - "559": 2.248084783554077, - "560": 2.26503324508667, - "561": 2.269549608230591, - "562": 2.265213966369629, - "563": 2.2614564895629883, - "564": 2.245302677154541, - "565": 2.2372922897338867, - "566": 2.250276803970337, - "567": 2.260746479034424, - "568": 2.25730037689209, - "569": 2.250591516494751, - "570": 2.276700019836426, - "571": 2.3106374740600586, - "572": 2.295170307159424, - "573": 2.2932000160217285, - "574": 2.2557008266448975, - "575": 2.2844595909118652, - "576": 2.2920069694519043, - "577": 2.2681570053100586, - "578": 2.243772268295288, - "579": 2.2578420639038086, - "580": 2.2256407737731934, - "581": 2.266733169555664, - "582": 2.230048418045044, - "583": 2.222313165664673, - "584": 2.2402119636535645, - "585": 2.248361587524414, - "586": 2.268207550048828, - "587": 2.231321096420288, - "588": 2.2259740829467773, - "589": 2.27091121673584, - "590": 2.2529563903808594, - "591": 2.266594648361206, - "592": 2.276758909225464, - "593": 2.2648444175720215 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 1.38491, - 1.20826, - 1.12596, - 1.08353, - 1.05446, - 1.21549, - 1.17297, - 1.30649, - 1.30198, - 1.59272, - 1.57467, - 1.6207, - 1.53942, - 1.38419, - 1.67573, - 1.91397, - 1.8781, - 1.66991, - 1.32337, - 1.15198, - 1.02177, - 1.00992, - 1.1011, - 1.24906, - 1.33995, - 1.46573, - 1.3672, - 1.22582, - 1.25135, - 1.1, - 0.701783, - 0.638283, - 0.61118, - 0.716068, - 0.80481, - 0.75464, - 0.988036, - 1.16192, - 1.39531, - 1.50467, - 1.6379, - 1.84882, - 1.64067, - 1.56229, - 1.54898, - 1.29051, - 1.09428, - 0.939923, - 0.951196, - 1.06346, - 0.937741, - 0.737365, - 0.658058, - 0.82238 - ], - "train_epoch_time": 5.051379680633545, - "train_loss": 2.2414143720350443, - "train_score": 0.34328035323144374, - "val_loss": 2.300314697414534, - "val_score": 0.3277707022243745 - }, - { - "epoch": 11, - "grad_norm": 1.422560214996338, - "learning_rate": 0.1, - "model_norm": 87.59950256347656, - "step_logs": { - "grad_norm": { - "594": 1.4724085330963135, - "595": 1.5659208297729492, - "596": 1.5948623418807983, - "597": 1.5096204280853271, - "598": 1.476780652999878, - "599": 1.4445205926895142, - "600": 1.2030155658721924, - "601": 1.1663322448730469, - "602": 1.2035514116287231, - "603": 1.3046060800552368, - "604": 1.2823153734207153, - "605": 1.29511296749115, - "606": 1.437174916267395, - "607": 1.6487631797790527, - "608": 1.6355232000350952, - "609": 1.4788060188293457, - "610": 1.3755265474319458, - "611": 1.2401158809661865, - "612": 1.067068338394165, - "613": 1.0521490573883057, - "614": 1.1348017454147339, - "615": 1.1846015453338623, - "616": 1.2649641036987305, - "617": 1.2051887512207031, - "618": 1.0959563255310059, - "619": 1.1193002462387085, - "620": 1.0330448150634766, - "621": 0.9784910082817078, - "622": 0.9489515423774719, - "623": 0.9668910503387451, - "624": 1.2256289720535278, - "625": 1.2776364088058472, - "626": 1.3848919868469238, - "627": 1.5670509338378906, - "628": 1.620158076286316, - "629": 1.6570059061050415, - "630": 1.6332999467849731, - "631": 1.572061538696289, - "632": 1.6720561981201172, - "633": 1.7698657512664795, - "634": 1.7282272577285767, - "635": 1.7018442153930664, - "636": 1.6266013383865356, - "637": 1.6040335893630981, - "638": 1.6183538436889648, - "639": 1.5059492588043213, - "640": 1.2513744831085205, - "641": 1.1860790252685547, - "642": 1.1543378829956055, - "643": 1.2302004098892212, - "644": 1.3721766471862793, - "645": 1.4554718732833862, - "646": 1.433977484703064, - "647": 1.422560214996338 - }, - "loss": { - "594": 2.259866237640381, - "595": 2.2519421577453613, - "596": 2.2438693046569824, - "597": 2.2767887115478516, - "598": 2.218242645263672, - "599": 2.2393274307250977, - "600": 2.238276720046997, - "601": 2.220353603363037, - "602": 2.2341716289520264, - "603": 2.227184772491455, - "604": 2.2201545238494873, - "605": 2.226320266723633, - "606": 2.253817558288574, - "607": 2.2502970695495605, - "608": 2.259983539581299, - "609": 2.2395195960998535, - "610": 2.216625928878784, - "611": 2.2334721088409424, - "612": 2.201089382171631, - "613": 2.207909345626831, - "614": 2.2100014686584473, - "615": 2.1870317459106445, - "616": 2.2290401458740234, - "617": 2.194772243499756, - "618": 2.193455219268799, - "619": 2.2254507541656494, - "620": 2.201402187347412, - "621": 2.2043111324310303, - "622": 2.204188346862793, - "623": 2.2055840492248535, - "624": 2.2074508666992188, - "625": 2.2152113914489746, - "626": 2.2013015747070312, - "627": 2.248645305633545, - "628": 2.228558301925659, - "629": 2.235501766204834, - "630": 2.21940279006958, - "631": 2.2460076808929443, - "632": 2.223534107208252, - "633": 2.2548344135284424, - "634": 2.2394323348999023, - "635": 2.25168776512146, - "636": 2.2220373153686523, - "637": 2.2316551208496094, - "638": 2.232147216796875, - "639": 2.23519229888916, - "640": 2.220662832260132, - "641": 2.211493730545044, - "642": 2.212052822113037, - "643": 2.176553964614868, - "644": 2.201409339904785, - "645": 2.223477840423584, - "646": 2.2088513374328613, - "647": 2.213635206222534 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 1.04238, - 0.91837, - 0.882168, - 0.99905, - 1.01713, - 1.07317, - 1.54658, - 1.63222, - 1.54237, - 1.30857, - 1.35019, - 1.32731, - 1.09119, - 0.827796, - 0.844874, - 1.02408, - 1.17153, - 1.4523, - 1.9331, - 1.99447, - 1.71614, - 1.55851, - 1.39303, - 1.51105, - 1.82617, - 1.77633, - 2.06282, - 2.30229, - 2.44771, - 2.35922, - 1.46951, - 1.35706, - 1.14775, - 0.915703, - 0.849003, - 0.814192, - 0.831963, - 0.908808, - 0.79532, - 0.719837, - 0.749784, - 0.777443, - 0.839826, - 0.867362, - 0.852267, - 0.985585, - 1.4181, - 1.57202, - 1.66008, - 1.43819, - 1.16918, - 1.0496, - 1.07419, - 1.09387 - ], - "train_epoch_time": 5.051513433456421, - "train_loss": 2.2047455189046765, - "train_score": 0.35434899569583933, - "val_loss": 2.2707991964097136, - "val_score": 0.33801395748020174 - }, - { - "epoch": 12, - "grad_norm": 0.9680524468421936, - "learning_rate": 0.1, - "model_norm": 87.61489868164062, - "step_logs": { - "grad_norm": { - "648": 1.3333641290664673, - "649": 1.31859290599823, - "650": 1.3806748390197754, - "651": 1.347794532775879, - "652": 1.4184019565582275, - "653": 1.4079928398132324, - "654": 1.4346829652786255, - "655": 1.4215614795684814, - "656": 1.3713256120681763, - "657": 1.4001219272613525, - "658": 1.3395192623138428, - "659": 1.3574950695037842, - "660": 1.399519681930542, - "661": 1.3149547576904297, - "662": 1.1317532062530518, - "663": 0.9868398308753967, - "664": 0.9376863837242126, - "665": 0.9813666343688965, - "666": 1.069417119026184, - "667": 1.1566966772079468, - "668": 1.1301541328430176, - "669": 1.096773624420166, - "670": 1.13601553440094, - "671": 1.1344237327575684, - "672": 1.1862298250198364, - "673": 1.1706833839416504, - "674": 1.3028088808059692, - "675": 1.3327603340148926, - "676": 1.2247729301452637, - "677": 1.130468487739563, - "678": 1.0298511981964111, - "679": 1.0488574504852295, - "680": 1.07109797000885, - "681": 1.0068838596343994, - "682": 0.9685542583465576, - "683": 0.9551768898963928, - "684": 0.9691235423088074, - "685": 0.9821853637695312, - "686": 0.8839348554611206, - "687": 0.8862894773483276, - "688": 0.8559242486953735, - "689": 0.8531018495559692, - "690": 0.8814327120780945, - "691": 0.9617823958396912, - "692": 0.9558363556861877, - "693": 0.9620358943939209, - "694": 0.8950363993644714, - "695": 0.8203489780426025, - "696": 0.8070682287216187, - "697": 0.859437882900238, - "698": 0.9139648675918579, - "699": 0.855360746383667, - "700": 0.8960896730422974, - "701": 0.9680524468421936 - }, - "loss": { - "648": 2.224252462387085, - "649": 2.2232770919799805, - "650": 2.2095859050750732, - "651": 2.213632583618164, - "652": 2.1954970359802246, - "653": 2.1968834400177, - "654": 2.2026238441467285, - "655": 2.212841033935547, - "656": 2.196122884750366, - "657": 2.197126626968384, - "658": 2.2139625549316406, - "659": 2.180508852005005, - "660": 2.183412551879883, - "661": 2.1939024925231934, - "662": 2.178495407104492, - "663": 2.1660470962524414, - "664": 2.1710925102233887, - "665": 2.1895346641540527, - "666": 2.170623302459717, - "667": 2.1945128440856934, - "668": 2.1607513427734375, - "669": 2.1708621978759766, - "670": 2.1882219314575195, - "671": 2.169952869415283, - "672": 2.147392511367798, - "673": 2.1595332622528076, - "674": 2.1798410415649414, - "675": 2.178086042404175, - "676": 2.1641979217529297, - "677": 2.1711418628692627, - "678": 2.172391891479492, - "679": 2.133408546447754, - "680": 2.143892765045166, - "681": 2.1644768714904785, - "682": 2.1919074058532715, - "683": 2.1479907035827637, - "684": 2.1551153659820557, - "685": 2.15683650970459, - "686": 2.1568961143493652, - "687": 2.158215045928955, - "688": 2.151196241378784, - "689": 2.1430177688598633, - "690": 2.1212120056152344, - "691": 2.1417641639709473, - "692": 2.1535301208496094, - "693": 2.1516737937927246, - "694": 2.1559958457946777, - "695": 2.1714184284210205, - "696": 2.147042751312256, - "697": 2.1262757778167725, - "698": 2.1543636322021484, - "699": 2.1321985721588135, - "700": 2.1494712829589844, - "701": 2.149899959564209 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 1.25108, - 1.27871, - 1.15912, - 1.21859, - 1.09127, - 1.10817, - 1.07011, - 1.09501, - 1.16782, - 1.12079, - 1.23388, - 1.18326, - 1.11475, - 1.26881, - 1.7008, - 2.2242, - 2.46924, - 2.27347, - 1.89797, - 1.64021, - 1.69172, - 1.80467, - 1.6956, - 1.68616, - 1.52607, - 1.57573, - 1.28429, - 1.22623, - 1.44273, - 1.69891, - 2.04828, - 1.93928, - 1.86872, - 2.13498, - 2.33655, - 2.35432, - 2.29463, - 2.23579, - 2.76051, - 2.74754, - 2.93636, - 2.94458, - 2.73027, - 2.31536, - 2.35713, - 2.32484, - 2.69133, - 3.22661, - 3.29625, - 2.87866, - 2.57905, - 2.91426, - 2.67688, - 2.29414 - ], - "train_epoch_time": 5.059179306030273, - "train_loss": 2.141586204375564, - "train_score": 0.36812791427546626, - "val_loss": 2.216165391777468, - "val_score": 0.34919453190744404 - }, - { - "epoch": 13, - "grad_norm": 0.6713171005249023, - "learning_rate": 0.06666666666666668, - "model_norm": 87.6242904663086, - "step_logs": { - "grad_norm": { - "702": 0.9447621703147888, - "703": 0.8605120182037354, - "704": 0.8270865082740784, - "705": 0.8139187097549438, - "706": 0.9207656383514404, - "707": 0.9539266228675842, - "708": 0.9187578558921814, - "709": 0.8544454574584961, - "710": 0.8573761582374573, - "711": 0.8325833678245544, - "712": 0.7487674951553345, - "713": 0.8300611972808838, - "714": 0.9403583407402039, - "715": 0.9269595742225647, - "716": 0.8547358512878418, - "717": 0.8651770353317261, - "718": 0.9049345254898071, - "719": 0.8973135948181152, - "720": 0.9019340872764587, - "721": 0.8226966857910156, - "722": 0.7691552042961121, - "723": 0.6991496086120605, - "724": 0.6866421699523926, - "725": 0.7393158078193665, - "726": 0.79970782995224, - "727": 0.7658272981643677, - "728": 0.7023969888687134, - "729": 0.6750680804252625, - "730": 0.7801414728164673, - "731": 0.7106881737709045, - "732": 0.703877329826355, - "733": 0.6623641848564148, - "734": 0.6847352385520935, - "735": 0.7203131318092346, - "736": 0.6406399607658386, - "737": 0.6447098851203918, - "738": 0.6694806218147278, - "739": 0.6632204651832581, - "740": 0.7323748469352722, - "741": 0.8258577585220337, - "742": 0.7254605293273926, - "743": 0.6315357685089111, - "744": 0.7073687314987183, - "745": 0.7020779848098755, - "746": 0.7511247992515564, - "747": 0.7816352844238281, - "748": 0.7458645105361938, - "749": 0.7709537148475647, - "750": 0.7125895023345947, - "751": 0.6858197450637817, - "752": 0.747973620891571, - "753": 0.7492133378982544, - "754": 0.716821014881134, - "755": 0.6713171005249023 - }, - "loss": { - "702": 2.1394238471984863, - "703": 2.135296583175659, - "704": 2.1354780197143555, - "705": 2.1325507164001465, - "706": 2.143461227416992, - "707": 2.122426986694336, - "708": 2.127103805541992, - "709": 2.1159729957580566, - "710": 2.1261091232299805, - "711": 2.1214585304260254, - "712": 2.105083703994751, - "713": 2.128053903579712, - "714": 2.1445674896240234, - "715": 2.128133773803711, - "716": 2.1315219402313232, - "717": 2.137157678604126, - "718": 2.13249135017395, - "719": 2.1486611366271973, - "720": 2.133594512939453, - "721": 2.119948148727417, - "722": 2.126617431640625, - "723": 2.13726806640625, - "724": 2.1391525268554688, - "725": 2.1157310009002686, - "726": 2.1103405952453613, - "727": 2.1100518703460693, - "728": 2.140583038330078, - "729": 2.1129374504089355, - "730": 2.1272308826446533, - "731": 2.1331515312194824, - "732": 2.1278982162475586, - "733": 2.1136536598205566, - "734": 2.1283087730407715, - "735": 2.1350760459899902, - "736": 2.106642246246338, - "737": 2.11907958984375, - "738": 2.1068787574768066, - "739": 2.1340057849884033, - "740": 2.1070449352264404, - "741": 2.104229688644409, - "742": 2.123256206512451, - "743": 2.114095449447632, - "744": 2.12141752243042, - "745": 2.108335494995117, - "746": 2.1152632236480713, - "747": 2.104945182800293, - "748": 2.1202139854431152, - "749": 2.120616912841797, - "750": 2.1229805946350098, - "751": 2.098238229751587, - "752": 2.1216821670532227, - "753": 2.111076831817627, - "754": 2.0979056358337402, - "755": 2.1012048721313477 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 2.39691, - 2.88366, - 3.12171, - 3.21912, - 2.52823, - 2.3324, - 2.51992, - 2.89829, - 2.8923, - 3.06041, - 3.7547, - 3.08861, - 2.42523, - 2.47672, - 2.9176, - 2.85513, - 2.60407, - 2.66858, - 2.62278, - 3.13217, - 3.59469, - 4.37239, - 4.53713, - 3.8708, - 3.29982, - 3.59775, - 4.33877, - 4.63651, - 3.49517, - 4.22341, - 4.29494, - 4.8177, - 4.5393, - 4.11501, - 5.1329, - 5.09822, - 4.70071, - 4.85154, - 3.92832, - 3.0852, - 4.03436, - 5.30064, - 4.23969, - 4.27729, - 3.74921, - 3.44534, - 3.81118, - 3.56784, - 4.18088, - 4.46103, - 3.79234, - 3.76091, - 4.08286, - 4.66244 - ], - "train_epoch_time": 5.051976680755615, - "train_loss": 2.105823877928418, - "train_score": 0.37626434707402157, - "val_loss": 2.1842726755087467, - "val_score": 0.3553656000220543 - }, - { - "epoch": 14, - "grad_norm": 0.6120442152023315, - "learning_rate": 0.03333333333333334, - "model_norm": 87.62741088867188, - "step_logs": { - "grad_norm": { - "756": 0.6779073476791382, - "757": 0.6146882176399231, - "758": 0.7082776427268982, - "759": 0.6646072268486023, - "760": 0.6687437891960144, - "761": 0.6980213522911072, - "762": 0.6405911445617676, - "763": 0.6580644845962524, - "764": 0.666760265827179, - "765": 0.6627838611602783, - "766": 0.6748723387718201, - "767": 0.6789063215255737, - "768": 0.6050003170967102, - "769": 0.6512197256088257, - "770": 0.6254962086677551, - "771": 0.6485661268234253, - "772": 0.6217379570007324, - "773": 0.620040774345398, - "774": 0.6436275243759155, - "775": 0.6680901646614075, - "776": 0.6732286214828491, - "777": 0.6128937005996704, - "778": 0.6669917702674866, - "779": 0.6437169909477234, - "780": 0.6341109871864319, - "781": 0.6595199704170227, - "782": 0.6319490075111389, - "783": 0.6454424262046814, - "784": 0.6392427682876587, - "785": 0.6286539435386658, - "786": 0.7049348950386047, - "787": 0.572549045085907, - "788": 0.6379544734954834, - "789": 0.6582005620002747, - "790": 0.6561388373374939, - "791": 0.7041144967079163, - "792": 0.6080366373062134, - "793": 0.6152358055114746, - "794": 0.6456167101860046, - "795": 0.625890851020813, - "796": 0.6409896016120911, - "797": 0.6351111531257629, - "798": 0.5750425457954407, - "799": 0.5945930480957031, - "800": 0.6238871216773987, - "801": 0.6249281167984009, - "802": 0.6484242081642151, - "803": 0.5904596447944641, - "804": 0.6446259617805481, - "805": 0.6036942005157471, - "806": 0.600235104560852, - "807": 0.6659184098243713, - "808": 0.6049234867095947, - "809": 0.6120442152023315 - }, - "loss": { - "756": 2.091010332107544, - "757": 2.098677635192871, - "758": 2.1143531799316406, - "759": 2.0961642265319824, - "760": 2.0748157501220703, - "761": 2.1173954010009766, - "762": 2.1137185096740723, - "763": 2.102023124694824, - "764": 2.1141042709350586, - "765": 2.094511032104492, - "766": 2.1117911338806152, - "767": 2.1226460933685303, - "768": 2.097991466522217, - "769": 2.0794951915740967, - "770": 2.1216061115264893, - "771": 2.1137049198150635, - "772": 2.1160500049591064, - "773": 2.096975803375244, - "774": 2.1074235439300537, - "775": 2.1048173904418945, - "776": 2.102797746658325, - "777": 2.1090340614318848, - "778": 2.083104133605957, - "779": 2.1024649143218994, - "780": 2.0781493186950684, - "781": 2.08461332321167, - "782": 2.1110751628875732, - "783": 2.1057887077331543, - "784": 2.119384765625, - "785": 2.0936455726623535, - "786": 2.097327709197998, - "787": 2.0956673622131348, - "788": 2.131375789642334, - "789": 2.0970778465270996, - "790": 2.095752716064453, - "791": 2.110517978668213, - "792": 2.1039462089538574, - "793": 2.096714496612549, - "794": 2.0943994522094727, - "795": 2.1116466522216797, - "796": 2.0844802856445312, - "797": 2.1108970642089844, - "798": 2.096395492553711, - "799": 2.086451768875122, - "800": 2.106167793273926, - "801": 2.1052422523498535, - "802": 2.077186107635498, - "803": 2.081002712249756, - "804": 2.0980730056762695, - "805": 2.093629837036133, - "806": 2.09326171875, - "807": 2.1027448177337646, - "808": 2.077929735183716, - "809": 2.0949196815490723 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 4.55004, - 5.55438, - 4.21474, - 4.74564, - 4.63938, - 4.34575, - 5.15093, - 4.85401, - 4.7554, - 4.76803, - 4.63669, - 4.6053, - 5.73182, - 4.90346, - 5.4227, - 5.025, - 5.47408, - 5.45447, - 5.08724, - 4.71568, - 4.63951, - 5.61453, - 4.68242, - 5.07386, - 5.16828, - 4.79258, - 5.28615, - 5.05475, - 5.18654, - 5.29761, - 4.22054, - 6.39289, - 5.23698, - 4.84059, - 4.86798, - 4.25699, - 5.69082, - 5.53932, - 5.0247, - 5.39044, - 5.07336, - 5.2332, - 6.33977, - 5.90159, - 5.41104, - 5.39066, - 4.94034, - 5.96887, - 5.049, - 5.74468, - 5.81006, - 4.74181, - 5.67845, - 5.59245 - ], - "train_epoch_time": 5.0515358448028564, - "train_loss": 2.0949813161382034, - "train_score": 0.3791775018361583, - "val_loss": 2.1772965982505017, - "val_score": 0.35670206624796413 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:20:11.537013", - "final_model_norm": 87.62741088867188, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:18:26.809531", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 5.927224159240723, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.43559265136719, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 8.492406845092773, - "3": 5.133657455444336, - "4": 4.355436325073242, - "5": 8.350513458251953, - "6": 20.943735122680664, - "7": 8.886734962463379, - "8": 4.939404487609863, - "9": 2.7346272468566895, - "10": 2.2990474700927734, - "11": 2.869755983352661, - "12": 5.0551557540893555, - "13": 6.891150951385498, - "14": 39.990028381347656, - "15": 6.289729595184326, - "16": 7.449644088745117, - "17": 5.954482555389404, - "18": 6.2183146476745605, - "19": 11.386857032775879, - "20": 6.120622158050537, - "21": 46.00760269165039, - "22": 5.337105751037598, - "23": 4.709524631500244, - "24": 4.025487899780273, - "25": 6.538298606872559, - "26": 3.3507795333862305, - "27": 36.01987075805664, - "28": 2.523289680480957, - "29": 2.9157509803771973, - "30": 4.439497947692871, - "31": 6.468553066253662, - "32": 4.940361022949219, - "33": 3.9475128650665283, - "34": 3.7959046363830566, - "35": 2.856990098953247, - "36": 2.2356903553009033, - "37": 3.854083299636841, - "38": 2.8693065643310547, - "39": 4.858524322509766, - "40": 3.169224500656128, - "41": 13.64751148223877, - "42": 2.795562267303467, - "43": 6.008693218231201, - "44": 3.218686819076538, - "45": 5.200477123260498, - "46": 4.667453765869141, - "47": 3.983552932739258, - "48": 7.1761393547058105, - "49": 4.028877258300781, - "50": 4.500582695007324, - "51": 4.383876800537109, - "52": 4.444519996643066, - "53": 5.927224159240723 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.9338831901550293, - "3": 3.7350575923919678, - "4": 3.591062068939209, - "5": 3.5876054763793945, - "6": 4.030679702758789, - "7": 4.174951553344727, - "8": 3.606139898300171, - "9": 3.4460549354553223, - "10": 3.3512208461761475, - "11": 3.330306053161621, - "12": 3.2731595039367676, - "13": 3.400935649871826, - "14": 4.000199317932129, - "15": 3.4472057819366455, - "16": 3.329242706298828, - "17": 3.1616313457489014, - "18": 3.22605037689209, - "19": 3.235077142715454, - "20": 3.2256669998168945, - "21": 4.876416206359863, - "22": 3.360476016998291, - "23": 3.0198841094970703, - "24": 2.9926400184631348, - "25": 3.08091402053833, - "26": 2.98525333404541, - "27": 3.5717737674713135, - "28": 2.849923849105835, - "29": 2.8644518852233887, - "30": 2.9232075214385986, - "31": 3.096769332885742, - "32": 3.514759063720703, - "33": 3.031528949737549, - "34": 2.908228874206543, - "35": 2.935027599334717, - "36": 2.764258861541748, - "37": 2.795379638671875, - "38": 2.8013205528259277, - "39": 2.8269448280334473, - "40": 2.781714916229248, - "41": 3.175449848175049, - "42": 2.7635140419006348, - "43": 3.0239179134368896, - "44": 2.9823057651519775, - "45": 2.937479019165039, - "46": 3.2030391693115234, - "47": 2.9475035667419434, - "48": 3.015583038330078, - "49": 2.857910394668579, - "50": 2.9124820232391357, - "51": 2.893673896789551, - "52": 3.0640993118286133, - "53": 2.9861316680908203 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 0.00874362, - 0.00824185, - 0.0545456, - 0.141724, - 0.189304, - 0.0514492, - 0.00918904, - 0.0528648, - 0.147806, - 0.460814, - 0.634026, - 0.404384, - 0.128085, - 0.0716168, - 0.00250137, - 0.0871371, - 0.0599894, - 0.0891709, - 0.0834307, - 0.0249504, - 0.086105, - 0.00230378, - 0.117975, - 0.136156, - 0.184679, - 0.0720693, - 0.265882, - 0.00275296, - 0.447609, - 0.336931, - 0.148317, - 0.0740107, - 0.144005, - 0.194543, - 0.201836, - 0.359579, - 0.553039, - 0.188191, - 0.340258, - 0.119759, - 0.276953, - 0.017049, - 0.353609, - 0.0837548, - 0.287869, - 0.108615, - 0.147029, - 0.185743, - 0.0585584, - 0.176068, - 0.143789, - 0.150568, - 0.155115, - 0.0849975 - ], - "train_epoch_time": 5.055506944656372, - "train_loss": 2.971780118709657, - "train_score": 0.15369776722091855, - "val_loss": 2.9890767000572827, - "val_score": 0.15274289626752194 - }, - { - "epoch": 1, - "grad_norm": 1.6106148958206177, - "learning_rate": 0.1, - "model_norm": 87.45330047607422, - "step_logs": { - "grad_norm": { - "54": 5.978774070739746, - "55": 8.950082778930664, - "56": 6.119593143463135, - "57": 31.461462020874023, - "58": 3.1882057189941406, - "59": 10.551898956298828, - "60": 5.144927978515625, - "61": 2.8061091899871826, - "62": 3.760117530822754, - "63": 2.740030527114868, - "64": 4.894859790802002, - "65": 2.3066086769104004, - "66": 2.3835325241088867, - "67": 3.767838478088379, - "68": 2.745457649230957, - "69": 1.503357172012329, - "70": 1.1944791078567505, - "71": 1.1905282735824585, - "72": 1.7554534673690796, - "73": 2.110347032546997, - "74": 2.3435840606689453, - "75": 1.9726920127868652, - "76": 1.3262838125228882, - "77": 1.469195008277893, - "78": 2.046281337738037, - "79": 1.8338699340820312, - "80": 1.2676805257797241, - "81": 1.3730958700180054, - "82": 1.7932828664779663, - "83": 1.8086676597595215, - "84": 1.7238720655441284, - "85": 1.694278359413147, - "86": 1.600956916809082, - "87": 1.5541541576385498, - "88": 1.527895212173462, - "89": 1.6551250219345093, - "90": 1.901789903640747, - "91": 1.7149564027786255, - "92": 1.185032844543457, - "93": 1.265830636024475, - "94": 1.6216756105422974, - "95": 1.6631293296813965, - "96": 1.5494980812072754, - "97": 1.5495703220367432, - "98": 1.5657968521118164, - "99": 1.5824859142303467, - "100": 1.6045254468917847, - "101": 1.5318145751953125, - "102": 1.490466833114624, - "103": 1.5522249937057495, - "104": 1.713820219039917, - "105": 1.7040716409683228, - "106": 1.5766328573226929, - "107": 1.6106148958206177 - }, - "loss": { - "54": 2.9796619415283203, - "55": 2.897850275039673, - "56": 2.892820358276367, - "57": 4.205759048461914, - "58": 2.8588154315948486, - "59": 3.3858137130737305, - "60": 3.0331780910491943, - "61": 2.7692489624023438, - "62": 2.8786139488220215, - "63": 2.830838203430176, - "64": 2.910327196121216, - "65": 2.757439613342285, - "66": 2.7358312606811523, - "67": 2.7778244018554688, - "68": 3.032517433166504, - "69": 2.68621563911438, - "70": 2.6283602714538574, - "71": 2.619724750518799, - "72": 2.624297857284546, - "73": 2.730600118637085, - "74": 2.6886730194091797, - "75": 2.7313616275787354, - "76": 2.595308303833008, - "77": 2.627058744430542, - "78": 2.6370882987976074, - "79": 2.7046689987182617, - "80": 2.594330310821533, - "81": 2.586346387863159, - "82": 2.5819172859191895, - "83": 2.667757511138916, - "84": 2.5907230377197266, - "85": 2.648146629333496, - "86": 2.579831600189209, - "87": 2.5950562953948975, - "88": 2.568941116333008, - "89": 2.617931842803955, - "90": 2.585576295852661, - "91": 2.656446933746338, - "92": 2.5517587661743164, - "93": 2.565455436706543, - "94": 2.5689311027526855, - "95": 2.629897117614746, - "96": 2.5709469318389893, - "97": 2.6209893226623535, - "98": 2.556056261062622, - "99": 2.613426923751831, - "100": 2.5697927474975586, - "101": 2.60725474357605, - "102": 2.5579676628112793, - "103": 2.5875186920166016, - "104": 2.575045108795166, - "105": 2.615037441253662, - "106": 2.555068016052246, - "107": 2.5922436714172363 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 0.0833571, - 0.0361761, - 0.0772461, - 0.004249, - 0.281251, - 0.030409, - 0.114588, - 0.351684, - 0.203601, - 0.377055, - 0.121468, - 0.518273, - 0.481556, - 0.195668, - 0.402322, - 1.18855, - 1.84216, - 1.84832, - 0.851598, - 0.613127, - 0.489527, - 0.701876, - 1.47542, - 1.21706, - 0.629787, - 0.804224, - 1.61438, - 1.37178, - 0.802869, - 0.815509, - 0.871788, - 0.922513, - 1.00654, - 1.07438, - 1.10044, - 0.955645, - 0.714879, - 0.903223, - 1.8171, - 1.60108, - 0.976842, - 0.950795, - 1.07081, - 1.09155, - 1.04256, - 1.04359, - 0.998171, - 1.11115, - 1.15146, - 1.07393, - 0.876707, - 0.900538, - 1.02788, - 0.999292 - ], - "train_epoch_time": 5.0519633293151855, - "train_loss": 2.555980904988271, - "train_score": 0.2495438038404908, - "val_loss": 2.5955280745754834, - "val_score": 0.24849759639335967 - }, - { - "epoch": 2, - "grad_norm": 1.3384978771209717, - "learning_rate": 0.1, - "model_norm": 87.46625518798828, - "step_logs": { - "grad_norm": { - "108": 1.5474275350570679, - "109": 1.5940730571746826, - "110": 1.7816921472549438, - "111": 1.648228645324707, - "112": 1.3583422899246216, - "113": 1.363898515701294, - "114": 1.4790979623794556, - "115": 1.4925323724746704, - "116": 1.4969450235366821, - "117": 1.5330485105514526, - "118": 1.5277963876724243, - "119": 1.5661312341690063, - "120": 1.6411337852478027, - "121": 1.6089191436767578, - "122": 1.3647021055221558, - "123": 1.322136402130127, - "124": 1.4278115034103394, - "125": 1.4608854055404663, - "126": 1.5177714824676514, - "127": 1.5881707668304443, - "128": 1.5485810041427612, - "129": 1.5805672407150269, - "130": 1.6098755598068237, - "131": 1.533129096031189, - "132": 1.513983130455017, - "133": 1.4128550291061401, - "134": 1.3621541261672974, - "135": 1.3466095924377441, - "136": 1.4730033874511719, - "137": 1.386151909828186, - "138": 1.2447234392166138, - "139": 1.310735821723938, - "140": 1.3264137506484985, - "141": 1.370408296585083, - "142": 1.5405223369598389, - "143": 1.518472671508789, - "144": 1.3180512189865112, - "145": 1.2883540391921997, - "146": 1.3945871591567993, - "147": 1.470998764038086, - "148": 1.3696165084838867, - "149": 1.3303577899932861, - "150": 1.3642216920852661, - "151": 1.3836241960525513, - "152": 1.44769287109375, - "153": 1.5620477199554443, - "154": 1.7564959526062012, - "155": 1.6865432262420654, - "156": 1.4339799880981445, - "157": 1.3522312641143799, - "158": 1.279317021369934, - "159": 1.318213939666748, - "160": 1.3564062118530273, - "161": 1.3384978771209717 - }, - "loss": { - "108": 2.5644514560699463, - "109": 2.57401704788208, - "110": 2.565141201019287, - "111": 2.620950698852539, - "112": 2.5296711921691895, - "113": 2.5376832485198975, - "114": 2.560171604156494, - "115": 2.5790293216705322, - "116": 2.5469746589660645, - "117": 2.5625815391540527, - "118": 2.5406293869018555, - "119": 2.569234848022461, - "120": 2.5673959255218506, - "121": 2.5761876106262207, - "122": 2.5377085208892822, - "123": 2.5510940551757812, - "124": 2.55973482131958, - "125": 2.5625646114349365, - "126": 2.5147266387939453, - "127": 2.5600316524505615, - "128": 2.5553152561187744, - "129": 2.5767405033111572, - "130": 2.54809308052063, - "131": 2.5585923194885254, - "132": 2.5383718013763428, - "133": 2.5417728424072266, - "134": 2.5236687660217285, - "135": 2.540006160736084, - "136": 2.521879196166992, - "137": 2.5519471168518066, - "138": 2.5191805362701416, - "139": 2.539604663848877, - "140": 2.506265640258789, - "141": 2.5389111042022705, - "142": 2.5203967094421387, - "143": 2.5624523162841797, - "144": 2.5144500732421875, - "145": 2.5246286392211914, - "146": 2.5040059089660645, - "147": 2.523301839828491, - "148": 2.515552043914795, - "149": 2.525379180908203, - "150": 2.5191526412963867, - "151": 2.538222551345825, - "152": 2.502185583114624, - "153": 2.5259652137756348, - "154": 2.5255136489868164, - "155": 2.5799806118011475, - "156": 2.5219311714172363, - "157": 2.5506556034088135, - "158": 2.503695011138916, - "159": 2.5317506790161133, - "160": 2.5217232704162598, - "161": 2.523049831390381 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 1.07096, - 1.01297, - 0.808064, - 0.96477, - 1.37103, - 1.36419, - 1.17024, - 1.15773, - 1.13661, - 1.09035, - 1.08845, - 1.04748, - 0.953246, - 0.995197, - 1.36259, - 1.4594, - 1.25561, - 1.20072, - 1.09164, - 1.01496, - 1.06556, - 1.03144, - 0.983175, - 1.08854, - 1.10742, - 1.27333, - 1.36013, - 1.40072, - 1.1623, - 1.32816, - 1.62597, - 1.47821, - 1.42452, - 1.35191, - 1.06202, - 1.11133, - 1.44737, - 1.52099, - 1.28749, - 1.16612, - 1.34102, - 1.42689, - 1.35358, - 1.32585, - 1.1939, - 1.03523, - 0.818569, - 0.90703, - 1.22644, - 1.39492, - 1.52977, - 1.45697, - 1.37062, - 1.40828 - ], - "train_epoch_time": 5.0541157722473145, - "train_loss": 2.5136015885871656, - "train_score": 0.2566680864333422, - "val_loss": 2.5533794010000306, - "val_score": 0.25084313989780527 - }, - { - "epoch": 3, - "grad_norm": 1.318820595741272, - "learning_rate": 0.1, - "model_norm": 87.47798919677734, - "step_logs": { - "grad_norm": { - "162": 1.4114265441894531, - "163": 1.431295394897461, - "164": 1.4903470277786255, - "165": 1.4201840162277222, - "166": 1.419381022453308, - "167": 1.39986252784729, - "168": 1.2418795824050903, - "169": 1.1801707744598389, - "170": 1.2034727334976196, - "171": 1.2583473920822144, - "172": 1.3439244031906128, - "173": 1.396019697189331, - "174": 1.415901780128479, - "175": 1.3842941522598267, - "176": 1.349034070968628, - "177": 1.3424577713012695, - "178": 1.1667481660842896, - "179": 1.13761305809021, - "180": 1.2138445377349854, - "181": 1.2609201669692993, - "182": 1.2920323610305786, - "183": 1.2840451002120972, - "184": 1.3944884538650513, - "185": 1.389801263809204, - "186": 1.3639730215072632, - "187": 1.3643674850463867, - "188": 1.5542376041412354, - "189": 1.5981364250183105, - "190": 1.5491358041763306, - "191": 1.4451212882995605, - "192": 1.3491425514221191, - "193": 1.3006073236465454, - "194": 1.2778058052062988, - "195": 1.2574074268341064, - "196": 1.152113914489746, - "197": 1.2072381973266602, - "198": 1.4360460042953491, - "199": 1.5577796697616577, - "200": 1.5604770183563232, - "201": 1.447811245918274, - "202": 1.309539794921875, - "203": 1.2651509046554565, - "204": 1.1666126251220703, - "205": 1.3112585544586182, - "206": 1.5309338569641113, - "207": 1.518886923789978, - "208": 1.3946483135223389, - "209": 1.3189250230789185, - "210": 1.3046302795410156, - "211": 1.2808794975280762, - "212": 1.4370882511138916, - "213": 1.337239384651184, - "214": 1.225730538368225, - "215": 1.318820595741272 - }, - "loss": { - "162": 2.5070159435272217, - "163": 2.5475645065307617, - "164": 2.5105767250061035, - "165": 2.527508020401001, - "166": 2.5137524604797363, - "167": 2.559257984161377, - "168": 2.4882638454437256, - "169": 2.4969942569732666, - "170": 2.4993886947631836, - "171": 2.506685495376587, - "172": 2.500074863433838, - "173": 2.499678134918213, - "174": 2.511322021484375, - "175": 2.5015060901641846, - "176": 2.4881248474121094, - "177": 2.5125815868377686, - "178": 2.5039944648742676, - "179": 2.4961695671081543, - "180": 2.4842424392700195, - "181": 2.5197134017944336, - "182": 2.491075038909912, - "183": 2.5007705688476562, - "184": 2.501986503601074, - "185": 2.518996000289917, - "186": 2.5041511058807373, - "187": 2.5261926651000977, - "188": 2.5192923545837402, - "189": 2.541409492492676, - "190": 2.513819456100464, - "191": 2.5296738147735596, - "192": 2.4996180534362793, - "193": 2.5108487606048584, - "194": 2.475506067276001, - "195": 2.5060291290283203, - "196": 2.459573745727539, - "197": 2.503289222717285, - "198": 2.4924795627593994, - "199": 2.5321359634399414, - "200": 2.499049663543701, - "201": 2.5179004669189453, - "202": 2.4973392486572266, - "203": 2.5076847076416016, - "204": 2.4686331748962402, - "205": 2.4993233680725098, - "206": 2.507267475128174, - "207": 2.5081958770751953, - "208": 2.492157459259033, - "209": 2.5198802947998047, - "210": 2.4981138706207275, - "211": 2.4846696853637695, - "212": 2.4921669960021973, - "213": 2.51159930229187, - "214": 2.4895572662353516, - "215": 2.504476547241211 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 1.25846, - 1.24356, - 1.13031, - 1.25315, - 1.24774, - 1.306, - 1.61338, - 1.79278, - 1.72568, - 1.58307, - 1.38421, - 1.28263, - 1.25267, - 1.3054, - 1.36718, - 1.39418, - 1.83941, - 1.92879, - 1.68604, - 1.58481, - 1.49224, - 1.51675, - 1.28663, - 1.30413, - 1.34601, - 1.35707, - 1.0429, - 0.995055, - 1.0475, - 1.21131, - 1.37328, - 1.48432, - 1.51612, - 1.58502, - 1.85297, - 1.71761, - 1.20863, - 1.04346, - 1.02627, - 1.2012, - 1.45626, - 1.56671, - 1.81386, - 1.4536, - 1.06976, - 1.0872, - 1.28129, - 1.44857, - 1.4677, - 1.51444, - 1.20673, - 1.40453, - 1.65704, - 1.43994 - ], - "train_epoch_time": 5.051695108413696, - "train_loss": 2.4899268717471634, - "train_score": 0.27132801283551766, - "val_loss": 2.526738029670496, - "val_score": 0.2624049220580595 - }, - { - "epoch": 4, - "grad_norm": 1.3548485040664673, - "learning_rate": 0.1, - "model_norm": 87.48912048339844, - "step_logs": { - "grad_norm": { - "216": 1.348342776298523, - "217": 1.3550546169281006, - "218": 1.3296412229537964, - "219": 1.3579684495925903, - "220": 1.3660697937011719, - "221": 1.3053972721099854, - "222": 1.208400845527649, - "223": 1.1640582084655762, - "224": 1.1284453868865967, - "225": 1.1163370609283447, - "226": 1.2030351161956787, - "227": 1.3001850843429565, - "228": 1.265457034111023, - "229": 1.3263202905654907, - "230": 1.6208581924438477, - "231": 1.526879072189331, - "232": 1.2974154949188232, - "233": 1.3816595077514648, - "234": 1.4321775436401367, - "235": 1.4069297313690186, - "236": 1.4668422937393188, - "237": 1.3747762441635132, - "238": 1.439998984336853, - "239": 1.4145033359527588, - "240": 1.3636399507522583, - "241": 1.2812285423278809, - "242": 1.1984456777572632, - "243": 1.2026156187057495, - "244": 1.1779249906539917, - "245": 1.1976126432418823, - "246": 1.271930456161499, - "247": 1.385982871055603, - "248": 1.379332423210144, - "249": 1.3337682485580444, - "250": 1.4559179544448853, - "251": 1.5996768474578857, - "252": 1.3911381959915161, - "253": 1.236518383026123, - "254": 1.2514238357543945, - "255": 1.2472258806228638, - "256": 1.1549932956695557, - "257": 1.204041838645935, - "258": 1.369576096534729, - "259": 1.5055357217788696, - "260": 1.5250625610351562, - "261": 1.3742485046386719, - "262": 1.2223076820373535, - "263": 1.14608633518219, - "264": 1.0943989753723145, - "265": 1.1048413515090942, - "266": 1.308950662612915, - "267": 1.4192947149276733, - "268": 1.4301488399505615, - "269": 1.3548485040664673 - }, - "loss": { - "216": 2.4909420013427734, - "217": 2.4906444549560547, - "218": 2.466233253479004, - "219": 2.5231754779815674, - "220": 2.504706382751465, - "221": 2.4931962490081787, - "222": 2.495638370513916, - "223": 2.4765968322753906, - "224": 2.489243984222412, - "225": 2.465637683868408, - "226": 2.4790308475494385, - "227": 2.4879531860351562, - "228": 2.487992525100708, - "229": 2.4812231063842773, - "230": 2.496302604675293, - "231": 2.5248308181762695, - "232": 2.4637451171875, - "233": 2.4943437576293945, - "234": 2.497357130050659, - "235": 2.4934983253479004, - "236": 2.506168842315674, - "237": 2.507537603378296, - "238": 2.4810452461242676, - "239": 2.504035472869873, - "240": 2.488614082336426, - "241": 2.5019989013671875, - "242": 2.461118698120117, - "243": 2.4807281494140625, - "244": 2.4645473957061768, - "245": 2.4701766967773438, - "246": 2.4564895629882812, - "247": 2.483874559402466, - "248": 2.5018720626831055, - "249": 2.481210470199585, - "250": 2.483489513397217, - "251": 2.5111019611358643, - "252": 2.515650987625122, - "253": 2.4815587997436523, - "254": 2.4624993801116943, - "255": 2.4724130630493164, - "256": 2.4616270065307617, - "257": 2.4670495986938477, - "258": 2.475165605545044, - "259": 2.4850661754608154, - "260": 2.49428391456604, - "261": 2.486996650695801, - "262": 2.4579572677612305, - "263": 2.4820985794067383, - "264": 2.478109359741211, - "265": 2.466869592666626, - "266": 2.4649291038513184, - "267": 2.488013744354248, - "268": 2.4809932708740234, - "269": 2.484416961669922 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 1.37013, - 1.35643, - 1.39497, - 1.36826, - 1.34218, - 1.46309, - 1.70907, - 1.8277, - 1.95482, - 1.97851, - 1.71287, - 1.47174, - 1.55365, - 1.41049, - 0.950183, - 1.08299, - 1.46365, - 1.30663, - 1.21755, - 1.25969, - 1.16478, - 1.32673, - 1.19649, - 1.2515, - 1.33831, - 1.52417, - 1.71355, - 1.71524, - 1.77624, - 1.72225, - 1.51841, - 1.29305, - 1.315, - 1.39477, - 1.17162, - 0.981296, - 1.2999, - 1.62302, - 1.57242, - 1.58939, - 1.84528, - 1.70175, - 1.31957, - 1.09637, - 1.07243, - 1.31688, - 1.64518, - 1.88966, - 2.06904, - 2.02091, - 1.43866, - 1.23512, - 1.21301, - 1.35345 - ], - "train_epoch_time": 5.052002429962158, - "train_loss": 2.471717495555686, - "train_score": 0.2667839848071636, - "val_loss": 2.5183303096127427, - "val_score": 0.26122990770673915 - }, - { - "epoch": 5, - "grad_norm": 1.5109161138534546, - "learning_rate": 0.1, - "model_norm": 87.50033569335938, - "step_logs": { - "grad_norm": { - "270": 1.4047431945800781, - "271": 1.421698808670044, - "272": 1.3978400230407715, - "273": 1.3299293518066406, - "274": 1.2401758432388306, - "275": 1.2176589965820312, - "276": 1.1978715658187866, - "277": 1.1949207782745361, - "278": 1.2177835702896118, - "279": 1.224126935005188, - "280": 1.2096213102340698, - "281": 1.2558765411376953, - "282": 1.443474531173706, - "283": 1.596191167831421, - "284": 1.6108492612838745, - "285": 1.6240370273590088, - "286": 1.6148786544799805, - "287": 1.5551954507827759, - "288": 1.4496687650680542, - "289": 1.3272839784622192, - "290": 1.3802237510681152, - "291": 1.3543483018875122, - "292": 1.245140790939331, - "293": 1.219671607017517, - "294": 1.2365336418151855, - "295": 1.2203431129455566, - "296": 1.2545716762542725, - "297": 1.3500263690948486, - "298": 1.4065240621566772, - "299": 1.3695147037506104, - "300": 1.2958126068115234, - "301": 1.3573617935180664, - "302": 1.5311113595962524, - "303": 1.4070990085601807, - "304": 1.1375900506973267, - "305": 1.159070372581482, - "306": 1.2499953508377075, - "307": 1.332653522491455, - "308": 1.3756132125854492, - "309": 1.2655593156814575, - "310": 1.1920593976974487, - "311": 1.2005290985107422, - "312": 1.196609616279602, - "313": 1.2147624492645264, - "314": 1.1494425535202026, - "315": 1.172121524810791, - "316": 1.1625635623931885, - "317": 1.2386637926101685, - "318": 1.3741620779037476, - "319": 1.376841425895691, - "320": 1.3388792276382446, - "321": 1.3464971780776978, - "322": 1.405653715133667, - "323": 1.5109161138534546 - }, - "loss": { - "270": 2.478456974029541, - "271": 2.4920506477355957, - "272": 2.472493886947632, - "273": 2.4852397441864014, - "274": 2.4534354209899902, - "275": 2.4587340354919434, - "276": 2.4528369903564453, - "277": 2.4688453674316406, - "278": 2.445171356201172, - "279": 2.4824700355529785, - "280": 2.448643684387207, - "281": 2.4714701175689697, - "282": 2.4722397327423096, - "283": 2.516361713409424, - "284": 2.478762149810791, - "285": 2.497800827026367, - "286": 2.5022873878479004, - "287": 2.509681463241577, - "288": 2.484309673309326, - "289": 2.4764108657836914, - "290": 2.4673147201538086, - "291": 2.472403049468994, - "292": 2.444923162460327, - "293": 2.458810329437256, - "294": 2.4640769958496094, - "295": 2.4554147720336914, - "296": 2.461606740951538, - "297": 2.4729299545288086, - "298": 2.4781930446624756, - "299": 2.473634719848633, - "300": 2.457024335861206, - "301": 2.4785900115966797, - "302": 2.4860496520996094, - "303": 2.472198724746704, - "304": 2.4644036293029785, - "305": 2.466243267059326, - "306": 2.452704429626465, - "307": 2.4463019371032715, - "308": 2.481018543243408, - "309": 2.4876208305358887, - "310": 2.466614007949829, - "311": 2.4680709838867188, - "312": 2.450718641281128, - "313": 2.460909843444824, - "314": 2.448789596557617, - "315": 2.443652629852295, - "316": 2.418773651123047, - "317": 2.4618258476257324, - "318": 2.452504873275757, - "319": 2.447181224822998, - "320": 2.476144313812256, - "321": 2.4466404914855957, - "322": 2.4622063636779785, - "323": 2.4617767333984375 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 1.25599, - 1.23294, - 1.26538, - 1.40511, - 1.59517, - 1.65829, - 1.70942, - 1.72908, - 1.6488, - 1.65665, - 1.6735, - 1.56697, - 1.18651, - 0.98765, - 0.955268, - 0.947035, - 0.959527, - 1.03764, - 1.18214, - 1.40571, - 1.29517, - 1.3479, - 1.57699, - 1.65287, - 1.61154, - 1.64877, - 1.56397, - 1.35684, - 1.25268, - 1.31887, - 1.46327, - 1.34528, - 1.06046, - 1.24863, - 1.90432, - 1.83576, - 1.56974, - 1.37745, - 1.3111, - 1.55317, - 1.73582, - 1.71243, - 1.71155, - 1.66768, - 1.85343, - 1.77867, - 1.78962, - 1.60454, - 1.29878, - 1.29092, - 1.38132, - 1.34946, - 1.24614, - 1.07837 - ], - "train_epoch_time": 5.051944971084595, - "train_loss": 2.4745563034347686, - "train_score": 0.26685347926496944, - "val_loss": 2.5326947599273053, - "val_score": 0.25766001770061137 - }, - { - "epoch": 6, - "grad_norm": 1.759065866470337, - "learning_rate": 0.1, - "model_norm": 87.5138931274414, - "step_logs": { - "grad_norm": { - "324": 1.623439908027649, - "325": 1.6806143522262573, - "326": 1.5055813789367676, - "327": 1.2876757383346558, - "328": 1.254889726638794, - "329": 1.409358024597168, - "330": 1.5430922508239746, - "331": 1.5045151710510254, - "332": 1.4431153535842896, - "333": 1.3577274084091187, - "334": 1.2942644357681274, - "335": 1.1906518936157227, - "336": 1.1726138591766357, - "337": 1.2196074724197388, - "338": 1.188008427619934, - "339": 1.0564650297164917, - "340": 1.066809058189392, - "341": 1.1904737949371338, - "342": 1.3305294513702393, - "343": 1.4903757572174072, - "344": 1.6333520412445068, - "345": 1.7529473304748535, - "346": 1.5258636474609375, - "347": 1.3201848268508911, - "348": 1.2253371477127075, - "349": 1.1936882734298706, - "350": 1.2745351791381836, - "351": 1.310713291168213, - "352": 1.3653395175933838, - "353": 1.4823520183563232, - "354": 1.567481279373169, - "355": 1.689394474029541, - "356": 1.49756920337677, - "357": 1.1672707796096802, - "358": 1.0936285257339478, - "359": 1.1354784965515137, - "360": 1.26737380027771, - "361": 1.3135985136032104, - "362": 1.3856745958328247, - "363": 1.3981680870056152, - "364": 1.369124174118042, - "365": 1.3923665285110474, - "366": 1.3907634019851685, - "367": 1.3212506771087646, - "368": 1.3154053688049316, - "369": 1.2992113828659058, - "370": 1.2521928548812866, - "371": 1.3133469820022583, - "372": 1.432778239250183, - "373": 1.5911734104156494, - "374": 1.5572890043258667, - "375": 1.640194296836853, - "376": 1.7942968606948853, - "377": 1.759065866470337 - }, - "loss": { - "324": 2.4846949577331543, - "325": 2.508121967315674, - "326": 2.4766106605529785, - "327": 2.4612317085266113, - "328": 2.45953369140625, - "329": 2.449069023132324, - "330": 2.4828591346740723, - "331": 2.4601805210113525, - "332": 2.455899238586426, - "333": 2.4449198246002197, - "334": 2.4369330406188965, - "335": 2.459473133087158, - "336": 2.4395594596862793, - "337": 2.4353997707366943, - "338": 2.478641986846924, - "339": 2.430203914642334, - "340": 2.434865951538086, - "341": 2.433072566986084, - "342": 2.4332549571990967, - "343": 2.4297852516174316, - "344": 2.465857982635498, - "345": 2.45650577545166, - "346": 2.4539458751678467, - "347": 2.442265033721924, - "348": 2.4369921684265137, - "349": 2.4326696395874023, - "350": 2.422942638397217, - "351": 2.448277235031128, - "352": 2.436768054962158, - "353": 2.440791606903076, - "354": 2.457829236984253, - "355": 2.453620433807373, - "356": 2.4883389472961426, - "357": 2.4340109825134277, - "358": 2.428116798400879, - "359": 2.4015393257141113, - "360": 2.41034197807312, - "361": 2.417464256286621, - "362": 2.421893358230591, - "363": 2.431382656097412, - "364": 2.4245219230651855, - "365": 2.4390602111816406, - "366": 2.4091711044311523, - "367": 2.437689781188965, - "368": 2.424776315689087, - "369": 2.430070638656616, - "370": 2.4288299083709717, - "371": 2.420199155807495, - "372": 2.4276294708251953, - "373": 2.4442105293273926, - "374": 2.4462759494781494, - "375": 2.421290636062622, - "376": 2.4667458534240723, - "377": 2.4498980045318604 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 0.942759, - 0.887999, - 1.09257, - 1.48436, - 1.56186, - 1.23299, - 1.04272, - 1.08686, - 1.17926, - 1.32629, - 1.45478, - 1.73489, - 1.7742, - 1.63731, - 1.7562, - 2.17737, - 2.13945, - 1.71678, - 1.37448, - 1.0939, - 0.92429, - 0.799429, - 1.05398, - 1.40127, - 1.62309, - 1.70727, - 1.49156, - 1.4251, - 1.30717, - 1.11078, - 1.00034, - 0.859697, - 1.10952, - 1.7864, - 2.03016, - 1.86265, - 1.50061, - 1.40099, - 1.26134, - 1.24375, - 1.29342, - 1.2581, - 1.24555, - 1.39639, - 1.40137, - 1.43966, - 1.54901, - 1.40311, - 1.18256, - 0.965392, - 1.00871, - 0.900029, - 0.766189, - 0.791742 - ], - "train_epoch_time": 5.054595947265625, - "train_loss": 2.4428781078398827, - "train_score": 0.2725889974293784, - "val_loss": 2.4942171108298408, - "val_score": 0.2624138918385577 - }, - { - "epoch": 7, - "grad_norm": 1.4262630939483643, - "learning_rate": 0.1, - "model_norm": 87.52951049804688, - "step_logs": { - "grad_norm": { - "378": 1.617846965789795, - "379": 1.411563754081726, - "380": 1.2483372688293457, - "381": 1.1806268692016602, - "382": 1.2389273643493652, - "383": 1.4030169248580933, - "384": 1.276199221611023, - "385": 1.108875036239624, - "386": 1.008466124534607, - "387": 0.9886185526847839, - "388": 1.0921992063522339, - "389": 1.3389133214950562, - "390": 1.4035688638687134, - "391": 1.41665518283844, - "392": 1.41960871219635, - "393": 1.592949628829956, - "394": 1.620247721672058, - "395": 1.6470298767089844, - "396": 1.545442819595337, - "397": 1.4301609992980957, - "398": 1.3720855712890625, - "399": 1.3858622312545776, - "400": 1.49479341506958, - "401": 1.562925100326538, - "402": 1.5144386291503906, - "403": 1.4692139625549316, - "404": 1.5100115537643433, - "405": 1.483263373374939, - "406": 1.3693817853927612, - "407": 1.4439547061920166, - "408": 1.4968265295028687, - "409": 1.340765357017517, - "410": 1.3486063480377197, - "411": 1.4067001342773438, - "412": 1.4785293340682983, - "413": 1.5283474922180176, - "414": 1.4990612268447876, - "415": 1.5192127227783203, - "416": 1.5972009897232056, - "417": 1.4781601428985596, - "418": 1.340295433998108, - "419": 1.2414122819900513, - "420": 1.1782641410827637, - "421": 1.3022778034210205, - "422": 1.4148863554000854, - "423": 1.7196805477142334, - "424": 1.660214900970459, - "425": 1.3662163019180298, - "426": 1.265149474143982, - "427": 1.2317677736282349, - "428": 1.3024029731750488, - "429": 1.4698814153671265, - "430": 1.4755594730377197, - "431": 1.4262630939483643 - }, - "loss": { - "378": 2.464127540588379, - "379": 2.4174060821533203, - "380": 2.411942481994629, - "381": 2.3847360610961914, - "382": 2.396725654602051, - "383": 2.4089713096618652, - "384": 2.408296585083008, - "385": 2.385756015777588, - "386": 2.380687713623047, - "387": 2.3640313148498535, - "388": 2.349022388458252, - "389": 2.386237621307373, - "390": 2.4364547729492188, - "391": 2.394516944885254, - "392": 2.3872575759887695, - "393": 2.3922128677368164, - "394": 2.432094097137451, - "395": 2.408723831176758, - "396": 2.4120514392852783, - "397": 2.4008212089538574, - "398": 2.387998104095459, - "399": 2.394864082336426, - "400": 2.39601731300354, - "401": 2.406705379486084, - "402": 2.424264907836914, - "403": 2.4021058082580566, - "404": 2.4218873977661133, - "405": 2.378183364868164, - "406": 2.3882908821105957, - "407": 2.386151075363159, - "408": 2.4186501502990723, - "409": 2.37593936920166, - "410": 2.3756043910980225, - "411": 2.3814640045166016, - "412": 2.3972043991088867, - "413": 2.3847219944000244, - "414": 2.3922367095947266, - "415": 2.3874189853668213, - "416": 2.4014251232147217, - "417": 2.4035773277282715, - "418": 2.3629751205444336, - "419": 2.3751280307769775, - "420": 2.365384101867676, - "421": 2.3484015464782715, - "422": 2.389936923980713, - "423": 2.3830745220184326, - "424": 2.41225266456604, - "425": 2.3652896881103516, - "426": 2.353670597076416, - "427": 2.3570289611816406, - "428": 2.361449718475342, - "429": 2.3687450885772705, - "430": 2.3891549110412598, - "431": 2.366312026977539 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 0.941431, - 1.21325, - 1.54776, - 1.71086, - 1.56144, - 1.22379, - 1.47868, - 1.94026, - 2.34088, - 2.41878, - 1.96917, - 1.33109, - 1.23678, - 1.19314, - 1.18457, - 0.942748, - 0.926441, - 0.887941, - 1.00991, - 1.17379, - 1.26844, - 1.24693, - 1.07233, - 0.98525, - 1.057, - 1.11281, - 1.06217, - 1.08096, - 1.27361, - 1.14443, - 1.07952, - 1.32169, - 1.30618, - 1.20349, - 1.09659, - 1.02092, - 1.06455, - 1.03441, - 0.941347, - 1.10006, - 1.3154, - 1.54119, - 1.70379, - 1.38473, - 1.19383, - 0.805827, - 0.875174, - 1.2672, - 1.47049, - 1.55349, - 1.39216, - 1.09636, - 1.09731, - 1.16325 - ], - "train_epoch_time": 5.0514843463897705, - "train_loss": 2.3739958438845927, - "train_score": 0.295296807815902, - "val_loss": 2.4253303528379493, - "val_score": 0.283129126415187 - }, - { - "epoch": 8, - "grad_norm": 1.444602131843567, - "learning_rate": 0.1, - "model_norm": 87.54541778564453, - "step_logs": { - "grad_norm": { - "432": 1.3425726890563965, - "433": 1.2621179819107056, - "434": 1.2779802083969116, - "435": 1.4188487529754639, - "436": 1.4042917490005493, - "437": 1.2528634071350098, - "438": 1.282992959022522, - "439": 1.380332112312317, - "440": 1.3788363933563232, - "441": 1.6068919897079468, - "442": 1.6531224250793457, - "443": 1.3426464796066284, - "444": 1.1646243333816528, - "445": 1.2227520942687988, - "446": 1.4188110828399658, - "447": 1.4509479999542236, - "448": 1.4236797094345093, - "449": 1.4222859144210815, - "450": 1.3198795318603516, - "451": 1.278914213180542, - "452": 1.3036612272262573, - "453": 1.3619812726974487, - "454": 1.386054515838623, - "455": 1.497849464416504, - "456": 1.5307101011276245, - "457": 1.4602429866790771, - "458": 1.4081178903579712, - "459": 1.3239916563034058, - "460": 1.3485093116760254, - "461": 1.3465546369552612, - "462": 1.2619314193725586, - "463": 1.2228964567184448, - "464": 1.3545159101486206, - "465": 1.57125985622406, - "466": 1.5788429975509644, - "467": 1.5804986953735352, - "468": 1.5144660472869873, - "469": 1.399890661239624, - "470": 1.3252006769180298, - "471": 1.2314919233322144, - "472": 1.1383312940597534, - "473": 1.0897200107574463, - "474": 1.1071062088012695, - "475": 1.2052793502807617, - "476": 1.3018338680267334, - "477": 1.5188758373260498, - "478": 1.4975824356079102, - "479": 1.4089040756225586, - "480": 1.4930580854415894, - "481": 1.7426979541778564, - "482": 1.6179229021072388, - "483": 1.3499640226364136, - "484": 1.4126147031784058, - "485": 1.444602131843567 - }, - "loss": { - "432": 2.396427631378174, - "433": 2.344536304473877, - "434": 2.370551109313965, - "435": 2.37554931640625, - "436": 2.382157325744629, - "437": 2.3646914958953857, - "438": 2.3248276710510254, - "439": 2.357404947280884, - "440": 2.358269691467285, - "441": 2.356196403503418, - "442": 2.4061853885650635, - "443": 2.3650639057159424, - "444": 2.370314598083496, - "445": 2.356516122817993, - "446": 2.3476247787475586, - "447": 2.379183769226074, - "448": 2.3693859577178955, - "449": 2.377377986907959, - "450": 2.3442137241363525, - "451": 2.3414998054504395, - "452": 2.34651517868042, - "453": 2.3363609313964844, - "454": 2.358280658721924, - "455": 2.3616933822631836, - "456": 2.363029956817627, - "457": 2.364698886871338, - "458": 2.3509631156921387, - "459": 2.349639415740967, - "460": 2.338320732116699, - "461": 2.3632969856262207, - "462": 2.3475143909454346, - "463": 2.334425926208496, - "464": 2.3368706703186035, - "465": 2.352728843688965, - "466": 2.3565022945404053, - "467": 2.3745017051696777, - "468": 2.3789525032043457, - "469": 2.3568365573883057, - "470": 2.3726563453674316, - "471": 2.3423638343811035, - "472": 2.332307815551758, - "473": 2.3284988403320312, - "474": 2.323655128479004, - "475": 2.3422038555145264, - "476": 2.3303956985473633, - "477": 2.3193583488464355, - "478": 2.379096269607544, - "479": 2.3467745780944824, - "480": 2.3588194847106934, - "481": 2.3436622619628906, - "482": 2.3912572860717773, - "483": 2.3228673934936523, - "484": 2.329345703125, - "485": 2.345519542694092 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 1.3295, - 1.47183, - 1.45145, - 1.18003, - 1.20797, - 1.50649, - 1.41235, - 1.23728, - 1.24042, - 0.912511, - 0.880479, - 1.31196, - 1.74757, - 1.57614, - 1.16622, - 1.13012, - 1.16899, - 1.17523, - 1.34564, - 1.43157, - 1.38068, - 1.2595, - 1.22754, - 1.05266, - 1.00852, - 1.10898, - 1.18568, - 1.34039, - 1.28587, - 1.30338, - 1.47413, - 1.56099, - 1.2737, - 0.952963, - 0.945344, - 0.95057, - 1.03721, - 1.20266, - 1.35105, - 1.54451, - 1.7999, - 1.96086, - 1.8958, - 1.61231, - 1.37505, - 1.00536, - 1.06079, - 1.18225, - 1.05814, - 0.771704, - 0.913504, - 1.27462, - 1.16731, - 1.12394 - ], - "train_epoch_time": 5.051590204238892, - "train_loss": 2.346574435582975, - "train_score": 0.302081465216586, - "val_loss": 2.3984540513407624, - "val_score": 0.2889907437405548 - }, - { - "epoch": 9, - "grad_norm": 1.709283709526062, - "learning_rate": 0.1, - "model_norm": 87.56116485595703, - "step_logs": { - "grad_norm": { - "486": 1.426444411277771, - "487": 1.2690880298614502, - "488": 1.1729875802993774, - "489": 1.2084312438964844, - "490": 1.231128215789795, - "491": 1.265526533126831, - "492": 1.3779784440994263, - "493": 1.3759522438049316, - "494": 1.260408878326416, - "495": 1.2346863746643066, - "496": 1.2666555643081665, - "497": 1.2679500579833984, - "498": 1.2630565166473389, - "499": 1.2052359580993652, - "500": 1.1893833875656128, - "501": 1.1256600618362427, - "502": 1.1359037160873413, - "503": 1.1839313507080078, - "504": 1.2609279155731201, - "505": 1.2821588516235352, - "506": 1.2694697380065918, - "507": 1.177640438079834, - "508": 1.1328892707824707, - "509": 1.3132929801940918, - "510": 1.3829059600830078, - "511": 1.4194427728652954, - "512": 1.440608263015747, - "513": 1.5464714765548706, - "514": 1.6855614185333252, - "515": 1.8420305252075195, - "516": 1.544111967086792, - "517": 1.2531142234802246, - "518": 1.4684197902679443, - "519": 1.57754647731781, - "520": 1.4499608278274536, - "521": 1.3234126567840576, - "522": 1.2215524911880493, - "523": 1.2147200107574463, - "524": 1.2052631378173828, - "525": 1.2438725233078003, - "526": 1.429188847541809, - "527": 1.4128731489181519, - "528": 1.2569087743759155, - "529": 1.1902834177017212, - "530": 1.2827911376953125, - "531": 1.4841653108596802, - "532": 1.6421515941619873, - "533": 1.5563585758209229, - "534": 1.5640913248062134, - "535": 1.899158239364624, - "536": 1.9190765619277954, - "537": 1.7355573177337646, - "538": 1.6624763011932373, - "539": 1.709283709526062 - }, - "loss": { - "486": 2.3604815006256104, - "487": 2.344442844390869, - "488": 2.3220529556274414, - "489": 2.3139679431915283, - "490": 2.3063836097717285, - "491": 2.3233680725097656, - "492": 2.3442485332489014, - "493": 2.352097272872925, - "494": 2.30673885345459, - "495": 2.3149304389953613, - "496": 2.334404230117798, - "497": 2.3126087188720703, - "498": 2.3284311294555664, - "499": 2.310431957244873, - "500": 2.315993309020996, - "501": 2.3191850185394287, - "502": 2.3066604137420654, - "503": 2.294793128967285, - "504": 2.2924513816833496, - "505": 2.3190152645111084, - "506": 2.326279878616333, - "507": 2.2896616458892822, - "508": 2.322035551071167, - "509": 2.326859951019287, - "510": 2.309098958969116, - "511": 2.3234498500823975, - "512": 2.341808319091797, - "513": 2.332016944885254, - "514": 2.341855525970459, - "515": 2.336174964904785, - "516": 2.363861560821533, - "517": 2.3061585426330566, - "518": 2.3379969596862793, - "519": 2.3325400352478027, - "520": 2.3183236122131348, - "521": 2.3377583026885986, - "522": 2.3155860900878906, - "523": 2.3229575157165527, - "524": 2.3258862495422363, - "525": 2.301849365234375, - "526": 2.313551425933838, - "527": 2.3046035766601562, - "528": 2.290283203125, - "529": 2.287106990814209, - "530": 2.295423984527588, - "531": 2.2950873374938965, - "532": 2.3275697231292725, - "533": 2.335520029067993, - "534": 2.331029176712036, - "535": 2.3500795364379883, - "536": 2.379878282546997, - "537": 2.3246142864227295, - "538": 2.3307132720947266, - "539": 2.3478667736053467 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 1.16009, - 1.45565, - 1.68766, - 1.58458, - 1.52169, - 1.45069, - 1.23458, - 1.24236, - 1.45203, - 1.51853, - 1.45499, - 1.43846, - 1.45955, - 1.59056, - 1.63717, - 1.83029, - 1.78772, - 1.63716, - 1.44185, - 1.41065, - 1.4435, - 1.651, - 1.80923, - 1.34911, - 1.20742, - 1.15318, - 1.12839, - 0.975097, - 0.824273, - 0.688512, - 0.991435, - 1.46861, - 1.08429, - 0.93727, - 1.10271, - 1.33478, - 1.5518, - 1.57431, - 1.60112, - 1.48773, - 1.13266, - 1.15449, - 1.44971, - 1.61431, - 1.39493, - 1.04192, - 0.86313, - 0.964194, - 0.952848, - 0.651569, - 0.646205, - 0.771744, - 0.843292, - 0.80361 - ], - "train_epoch_time": 5.052663326263428, - "train_loss": 2.3349281706461094, - "train_score": 0.3076578190969771, - "val_loss": 2.3837451266913954, - "val_score": 0.2937984352612742 - }, - { - "epoch": 10, - "grad_norm": 1.2902566194534302, - "learning_rate": 0.1, - "model_norm": 87.578369140625, - "step_logs": { - "grad_norm": { - "540": 1.5759263038635254, - "541": 1.3194650411605835, - "542": 1.3201873302459717, - "543": 1.3581231832504272, - "544": 1.4158029556274414, - "545": 1.5097893476486206, - "546": 1.4278348684310913, - "547": 1.3444151878356934, - "548": 1.2905628681182861, - "549": 1.1409507989883423, - "550": 0.976250171661377, - "551": 0.8401185274124146, - "552": 0.8717550039291382, - "553": 0.9516346454620361, - "554": 1.0399898290634155, - "555": 1.0360325574874878, - "556": 1.1502712965011597, - "557": 1.2456767559051514, - "558": 1.3800832033157349, - "559": 1.5062284469604492, - "560": 1.6548281908035278, - "561": 1.8384106159210205, - "562": 1.6748757362365723, - "563": 1.4818562269210815, - "564": 1.5310593843460083, - "565": 1.5373516082763672, - "566": 1.4384281635284424, - "567": 1.3966439962387085, - "568": 1.2711124420166016, - "569": 1.3061379194259644, - "570": 1.3145487308502197, - "571": 1.4513746500015259, - "572": 1.6005983352661133, - "573": 1.7970792055130005, - "574": 1.8261210918426514, - "575": 1.5491801500320435, - "576": 1.4420125484466553, - "577": 1.3286991119384766, - "578": 1.2842791080474854, - "579": 1.2154967784881592, - "580": 1.1259206533432007, - "581": 1.1142082214355469, - "582": 1.096204161643982, - "583": 1.1447943449020386, - "584": 1.3225001096725464, - "585": 1.3663084506988525, - "586": 1.3333994150161743, - "587": 1.2913836240768433, - "588": 1.3958765268325806, - "589": 1.5368345975875854, - "590": 1.6467427015304565, - "591": 1.6730985641479492, - "592": 1.4967221021652222, - "593": 1.2902566194534302 - }, - "loss": { - "540": 2.341536521911621, - "541": 2.2961456775665283, - "542": 2.2922070026397705, - "543": 2.2935428619384766, - "544": 2.302182197570801, - "545": 2.318861722946167, - "546": 2.330303907394409, - "547": 2.285210609436035, - "548": 2.2862589359283447, - "549": 2.274963855743408, - "550": 2.2744057178497314, - "551": 2.2525784969329834, - "552": 2.2742624282836914, - "553": 2.2582478523254395, - "554": 2.26719069480896, - "555": 2.2740767002105713, - "556": 2.2779016494750977, - "557": 2.2638401985168457, - "558": 2.2591631412506104, - "559": 2.285280704498291, - "560": 2.302722692489624, - "561": 2.30159330368042, - "562": 2.3355891704559326, - "563": 2.320845603942871, - "564": 2.2858409881591797, - "565": 2.2855262756347656, - "566": 2.3017635345458984, - "567": 2.2737414836883545, - "568": 2.267089366912842, - "569": 2.2763099670410156, - "570": 2.3022842407226562, - "571": 2.274648666381836, - "572": 2.296595811843872, - "573": 2.279085874557495, - "574": 2.305741310119629, - "575": 2.307661533355713, - "576": 2.26253342628479, - "577": 2.267441987991333, - "578": 2.2679781913757324, - "579": 2.26358699798584, - "580": 2.25803804397583, - "581": 2.251507520675659, - "582": 2.234433889389038, - "583": 2.239211320877075, - "584": 2.2679219245910645, - "585": 2.2594470977783203, - "586": 2.2744503021240234, - "587": 2.2897608280181885, - "588": 2.266309976577759, - "589": 2.2815120220184326, - "590": 2.282259941101074, - "591": 2.298517942428589, - "592": 2.275049924850464, - "593": 2.2507901191711426 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 0.942821, - 1.31888, - 1.31517, - 1.24345, - 1.14851, - 1.01728, - 1.14303, - 1.26433, - 1.37267, - 1.74759, - 2.38641, - 3.19153, - 2.99262, - 2.49362, - 2.09619, - 2.11865, - 1.72161, - 1.45893, - 1.18614, - 1.0073, - 0.840883, - 0.680994, - 0.83259, - 1.0569, - 0.975129, - 0.96703, - 1.11246, - 1.16565, - 1.40314, - 1.3343, - 1.33231, - 1.07983, - 0.896437, - 0.70571, - 0.691435, - 0.961542, - 1.08807, - 1.28435, - 1.37505, - 1.53211, - 1.78121, - 1.8136, - 1.85945, - 1.7086, - 1.29669, - 1.21033, - 1.27925, - 1.37303, - 1.16312, - 0.965981, - 0.841615, - 0.821117, - 1.01557, - 1.35202 - ], - "train_epoch_time": 5.051812648773193, - "train_loss": 2.255965086308921, - "train_score": 0.33647439928786826, - "val_loss": 2.3217272350626614, - "val_score": 0.3190343356365178 - }, - { - "epoch": 11, - "grad_norm": 1.4152730703353882, - "learning_rate": 0.1, - "model_norm": 87.59550476074219, - "step_logs": { - "grad_norm": { - "594": 1.258890151977539, - "595": 1.203203558921814, - "596": 1.2106739282608032, - "597": 1.246432900428772, - "598": 1.46476149559021, - "599": 1.6713200807571411, - "600": 1.7261934280395508, - "601": 1.5266212224960327, - "602": 1.257462501525879, - "603": 1.2569507360458374, - "604": 1.2369118928909302, - "605": 1.3587913513183594, - "606": 1.450439453125, - "607": 1.4521106481552124, - "608": 1.488783597946167, - "609": 1.5188524723052979, - "610": 1.4980356693267822, - "611": 1.445404291152954, - "612": 1.469069242477417, - "613": 1.4227392673492432, - "614": 1.3973402976989746, - "615": 1.3478527069091797, - "616": 1.3153905868530273, - "617": 1.339142084121704, - "618": 1.514383316040039, - "619": 1.6853901147842407, - "620": 1.7181789875030518, - "621": 1.6907334327697754, - "622": 1.5309399366378784, - "623": 1.5674694776535034, - "624": 1.5582903623580933, - "625": 1.5204417705535889, - "626": 1.369624137878418, - "627": 1.2383073568344116, - "628": 1.224563717842102, - "629": 1.2779327630996704, - "630": 1.2952293157577515, - "631": 1.2530248165130615, - "632": 1.203320026397705, - "633": 1.2130340337753296, - "634": 1.1765729188919067, - "635": 1.1974188089370728, - "636": 1.2775593996047974, - "637": 1.4810636043548584, - "638": 1.3493518829345703, - "639": 1.246085286140442, - "640": 1.19171941280365, - "641": 1.2672860622406006, - "642": 1.3846321105957031, - "643": 1.3609858751296997, - "644": 1.3878403902053833, - "645": 1.3903461694717407, - "646": 1.3772350549697876, - "647": 1.4152730703353882 - }, - "loss": { - "594": 2.2567625045776367, - "595": 2.241260290145874, - "596": 2.2644906044006348, - "597": 2.241572141647339, - "598": 2.259786605834961, - "599": 2.2448043823242188, - "600": 2.2831976413726807, - "601": 2.290914535522461, - "602": 2.240591049194336, - "603": 2.258387327194214, - "604": 2.2383036613464355, - "605": 2.2649953365325928, - "606": 2.269930362701416, - "607": 2.2779407501220703, - "608": 2.2678422927856445, - "609": 2.2533488273620605, - "610": 2.2468838691711426, - "611": 2.275527000427246, - "612": 2.243919849395752, - "613": 2.278785228729248, - "614": 2.2456555366516113, - "615": 2.26540470123291, - "616": 2.272749662399292, - "617": 2.2570319175720215, - "618": 2.2563540935516357, - "619": 2.2770133018493652, - "620": 2.2670493125915527, - "621": 2.273940086364746, - "622": 2.2614378929138184, - "623": 2.248924970626831, - "624": 2.2827892303466797, - "625": 2.2635655403137207, - "626": 2.250562906265259, - "627": 2.216752052307129, - "628": 2.231747627258301, - "629": 2.206434965133667, - "630": 2.2379536628723145, - "631": 2.2190659046173096, - "632": 2.2118284702301025, - "633": 2.243088722229004, - "634": 2.2341508865356445, - "635": 2.221170425415039, - "636": 2.2227730751037598, - "637": 2.257899284362793, - "638": 2.2596077919006348, - "639": 2.2156600952148438, - "640": 2.2326769828796387, - "641": 2.2194743156433105, - "642": 2.229914426803589, - "643": 2.258718252182007, - "644": 2.2309961318969727, - "645": 2.2576956748962402, - "646": 2.252530097961426, - "647": 2.257633686065674 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 1.424, - 1.54815, - 1.54496, - 1.44283, - 1.05326, - 0.803636, - 0.76624, - 0.982984, - 1.41701, - 1.42943, - 1.46299, - 1.22677, - 1.07898, - 1.0803, - 1.02317, - 0.976781, - 1.00124, - 1.08919, - 1.03974, - 1.12578, - 1.15011, - 1.24698, - 1.31354, - 1.25859, - 0.983865, - 0.801613, - 0.767934, - 0.795479, - 0.964869, - 0.915328, - 0.940089, - 0.97916, - 1.19974, - 1.44564, - 1.48827, - 1.35106, - 1.33401, - 1.41335, - 1.52753, - 1.52441, - 1.61389, - 1.54914, - 1.36186, - 1.02934, - 1.24103, - 1.42695, - 1.57209, - 1.38198, - 1.16311, - 1.21942, - 1.1583, - 1.16794, - 1.18756, - 1.12713 - ], - "train_epoch_time": 5.056878089904785, - "train_loss": 2.237460089277161, - "train_score": 0.346287661465887, - "val_loss": 2.306067126216078, - "val_score": 0.3297574627892158 - }, - { - "epoch": 12, - "grad_norm": 0.8387596011161804, - "learning_rate": 0.1, - "model_norm": 87.61042022705078, - "step_logs": { - "grad_norm": { - "648": 1.581982135772705, - "649": 1.5737553834915161, - "650": 1.4004969596862793, - "651": 1.3306586742401123, - "652": 1.2554517984390259, - "653": 1.1243354082107544, - "654": 1.0491749048233032, - "655": 1.0814530849456787, - "656": 1.0577411651611328, - "657": 1.0769085884094238, - "658": 1.0773382186889648, - "659": 1.1758944988250732, - "660": 1.4077743291854858, - "661": 1.3541282415390015, - "662": 1.2772701978683472, - "663": 1.184023380279541, - "664": 1.1352832317352295, - "665": 1.0736087560653687, - "666": 1.0102338790893555, - "667": 1.0370293855667114, - "668": 1.0756092071533203, - "669": 1.0453011989593506, - "670": 1.0390774011611938, - "671": 1.0482290983200073, - "672": 1.048433542251587, - "673": 1.1108558177947998, - "674": 1.127617359161377, - "675": 1.1093788146972656, - "676": 0.9312753081321716, - "677": 0.7296909689903259, - "678": 0.7033728957176208, - "679": 0.7739483118057251, - "680": 0.938654363155365, - "681": 1.0277092456817627, - "682": 1.0152990818023682, - "683": 0.9473673701286316, - "684": 0.9405576586723328, - "685": 0.9931362271308899, - "686": 0.9151434898376465, - "687": 0.7597059607505798, - "688": 0.7866283059120178, - "689": 0.7895268201828003, - "690": 0.7335109710693359, - "691": 0.7413969039916992, - "692": 0.8307865262031555, - "693": 0.7810856699943542, - "694": 0.7339589595794678, - "695": 0.7777649760246277, - "696": 0.7652184367179871, - "697": 0.7264647483825684, - "698": 0.6602302193641663, - "699": 0.6880760192871094, - "700": 0.7712909579277039, - "701": 0.8387596011161804 - }, - "loss": { - "648": 2.224674701690674, - "649": 2.2643589973449707, - "650": 2.22965669631958, - "651": 2.229290008544922, - "652": 2.2342844009399414, - "653": 2.2144880294799805, - "654": 2.189312696456909, - "655": 2.1986241340637207, - "656": 2.2125964164733887, - "657": 2.2047319412231445, - "658": 2.2047762870788574, - "659": 2.1904592514038086, - "660": 2.2051358222961426, - "661": 2.230994701385498, - "662": 2.231522798538208, - "663": 2.214048385620117, - "664": 2.206422805786133, - "665": 2.213144302368164, - "666": 2.2061305046081543, - "667": 2.1752383708953857, - "668": 2.1865456104278564, - "669": 2.209840774536133, - "670": 2.196215867996216, - "671": 2.179856300354004, - "672": 2.183173179626465, - "673": 2.215100049972534, - "674": 2.1977384090423584, - "675": 2.187796115875244, - "676": 2.2159652709960938, - "677": 2.1777749061584473, - "678": 2.1774747371673584, - "679": 2.195223331451416, - "680": 2.183835983276367, - "681": 2.1674954891204834, - "682": 2.1961796283721924, - "683": 2.195509433746338, - "684": 2.180155038833618, - "685": 2.1860227584838867, - "686": 2.175570249557495, - "687": 2.1664438247680664, - "688": 2.173919200897217, - "689": 2.1759843826293945, - "690": 2.1583423614501953, - "691": 2.178779125213623, - "692": 2.1496191024780273, - "693": 2.1481401920318604, - "694": 2.159451961517334, - "695": 2.157120704650879, - "696": 2.177654504776001, - "697": 2.1564247608184814, - "698": 2.150519847869873, - "699": 2.1538474559783936, - "700": 2.16737699508667, - "701": 2.1726338863372803 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 0.888921, - 0.914262, - 1.13677, - 1.25902, - 1.41755, - 1.75179, - 1.9889, - 1.8799, - 1.97762, - 1.90107, - 1.89959, - 1.58416, - 1.11268, - 1.21669, - 1.36784, - 1.57931, - 1.71191, - 1.92007, - 2.16166, - 2.02267, - 1.88995, - 2.02245, - 2.03413, - 1.98388, - 1.98612, - 1.79506, - 1.72843, - 1.77765, - 2.55509, - 4.09011, - 4.40131, - 3.66484, - 2.47861, - 2.05219, - 2.13049, - 2.44624, - 2.46443, - 2.21634, - 2.59773, - 3.75367, - 3.51321, - 3.49078, - 4.0115, - 3.9638, - 3.11446, - 3.52099, - 4.00867, - 3.56597, - 3.71893, - 4.08606, - 4.93347, - 4.54927, - 3.64333, - 3.08824 - ], - "train_epoch_time": 5.052704334259033, - "train_loss": 2.1614000637185797, - "train_score": 0.3634807657904741, - "val_loss": 2.233464566766189, - "val_score": 0.34346745810744134 - }, - { - "epoch": 13, - "grad_norm": 0.6592527627944946, - "learning_rate": 0.06666666666666668, - "model_norm": 87.6195297241211, - "step_logs": { - "grad_norm": { - "702": 0.8362524509429932, - "703": 0.8091814517974854, - "704": 0.845625638961792, - "705": 0.9983937740325928, - "706": 1.0044894218444824, - "707": 0.962888777256012, - "708": 0.8864151835441589, - "709": 0.8935837745666504, - "710": 0.8574743866920471, - "711": 0.7297285199165344, - "712": 0.7358474731445312, - "713": 0.7165845632553101, - "714": 0.6686339378356934, - "715": 0.7225404977798462, - "716": 0.7110039591789246, - "717": 0.662558913230896, - "718": 0.603072464466095, - "719": 0.6447945237159729, - "720": 0.6536538004875183, - "721": 0.6368194818496704, - "722": 0.673835039138794, - "723": 0.6417527794837952, - "724": 0.585327684879303, - "725": 0.5983076691627502, - "726": 0.6275795102119446, - "727": 0.5941815972328186, - "728": 0.6566662192344666, - "729": 0.6971524357795715, - "730": 0.6727516651153564, - "731": 0.6684824824333191, - "732": 0.6768526434898376, - "733": 0.6906571388244629, - "734": 0.6527054309844971, - "735": 0.5872622132301331, - "736": 0.5852527618408203, - "737": 0.6615782380104065, - "738": 0.6764004826545715, - "739": 0.576840877532959, - "740": 0.6007465720176697, - "741": 0.677241861820221, - "742": 0.6685288548469543, - "743": 0.693906307220459, - "744": 0.6582739353179932, - "745": 0.650596022605896, - "746": 0.6399224996566772, - "747": 0.6072272062301636, - "748": 0.6378557085990906, - "749": 0.632337749004364, - "750": 0.6046590209007263, - "751": 0.6512715220451355, - "752": 0.6133934855461121, - "753": 0.627324640750885, - "754": 0.5898172855377197, - "755": 0.6592527627944946 - }, - "loss": { - "702": 2.160598039627075, - "703": 2.153968095779419, - "704": 2.157777786254883, - "705": 2.164325475692749, - "706": 2.156890869140625, - "707": 2.1611809730529785, - "708": 2.157391309738159, - "709": 2.173656463623047, - "710": 2.168610095977783, - "711": 2.1615982055664062, - "712": 2.131659507751465, - "713": 2.159134864807129, - "714": 2.1437277793884277, - "715": 2.1801514625549316, - "716": 2.139573574066162, - "717": 2.1167964935302734, - "718": 2.137956380844116, - "719": 2.163022041320801, - "720": 2.131282091140747, - "721": 2.134119987487793, - "722": 2.155618667602539, - "723": 2.1259775161743164, - "724": 2.146799087524414, - "725": 2.1739699840545654, - "726": 2.1365103721618652, - "727": 2.1435837745666504, - "728": 2.1390767097473145, - "729": 2.1439733505249023, - "730": 2.1333580017089844, - "731": 2.1631131172180176, - "732": 2.1334164142608643, - "733": 2.1504671573638916, - "734": 2.1357102394104004, - "735": 2.1633048057556152, - "736": 2.124472141265869, - "737": 2.126359462738037, - "738": 2.1509666442871094, - "739": 2.1523540019989014, - "740": 2.1348721981048584, - "741": 2.1738030910491943, - "742": 2.12825345993042, - "743": 2.129455089569092, - "744": 2.1404051780700684, - "745": 2.123248338699341, - "746": 2.1565322875976562, - "747": 2.141040802001953, - "748": 2.133934497833252, - "749": 2.1187424659729004, - "750": 2.149569272994995, - "751": 2.1229147911071777, - "752": 2.15488338470459, - "753": 2.1460983753204346, - "754": 2.1396210193634033, - "755": 2.1444015502929688 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 3.08958, - 3.28963, - 3.01752, - 2.1713, - 2.13765, - 2.33098, - 2.74571, - 2.7222, - 2.94944, - 4.05931, - 3.93679, - 4.2048, - 4.79505, - 4.17602, - 4.23237, - 4.82203, - 5.87841, - 5.20257, - 4.98822, - 5.26243, - 4.7475, - 5.16206, - 6.26604, - 6.07302, - 5.4246, - 6.07158, - 4.96063, - 4.41127, - 4.71362, - 4.8406, - 4.6568, - 4.50825, - 5.01312, - 6.27269, - 6.20246, - 4.85819, - 4.70139, - 6.46847, - 5.91547, - 4.7395, - 4.76193, - 4.42249, - 4.9395, - 5.01624, - 5.26625, - 5.80661, - 5.24489, - 5.29883, - 5.87936, - 5.00505, - 5.72724, - 5.45337, - 6.15038, - 4.93403 - ], - "train_epoch_time": 5.0530359745025635, - "train_loss": 2.1319318702265386, - "train_score": 0.36925215209744755, - "val_loss": 2.207699832631581, - "val_score": 0.34922592566016075 - }, - { - "epoch": 14, - "grad_norm": 0.5784660577774048, - "learning_rate": 0.03333333333333334, - "model_norm": 87.62258911132812, - "step_logs": { - "grad_norm": { - "756": 0.612333357334137, - "757": 0.657143771648407, - "758": 0.6825922727584839, - "759": 0.6265087127685547, - "760": 0.6389917135238647, - "761": 0.6159690022468567, - "762": 0.6025940775871277, - "763": 0.590510904788971, - "764": 0.6645591259002686, - "765": 0.5695677399635315, - "766": 0.5927686095237732, - "767": 0.5843535661697388, - "768": 0.5996710658073425, - "769": 0.6344755291938782, - "770": 0.6641318202018738, - "771": 0.5539579391479492, - "772": 0.5904717445373535, - "773": 0.59966641664505, - "774": 0.5793558359146118, - "775": 0.6131924390792847, - "776": 0.5643621683120728, - "777": 0.5599797368049622, - "778": 0.5634403228759766, - "779": 0.6696975827217102, - "780": 0.5814907550811768, - "781": 0.6196321249008179, - "782": 0.5625707507133484, - "783": 0.5729119181632996, - "784": 0.5670679807662964, - "785": 0.6024376749992371, - "786": 0.6088864207267761, - "787": 0.5948326587677002, - "788": 0.5956305861473083, - "789": 0.5724408626556396, - "790": 0.6002993583679199, - "791": 0.5960835814476013, - "792": 0.5096903443336487, - "793": 0.5664844512939453, - "794": 0.546478271484375, - "795": 0.5342928767204285, - "796": 0.54475337266922, - "797": 0.5758211612701416, - "798": 0.6035395264625549, - "799": 0.5516563057899475, - "800": 0.6098818778991699, - "801": 0.6136682629585266, - "802": 0.5548878908157349, - "803": 0.5874367952346802, - "804": 0.5641118884086609, - "805": 0.5488502383232117, - "806": 0.5530485510826111, - "807": 0.6005003452301025, - "808": 0.5693800449371338, - "809": 0.5784660577774048 - }, - "loss": { - "756": 2.140475034713745, - "757": 2.1435508728027344, - "758": 2.1449623107910156, - "759": 2.144589424133301, - "760": 2.1265616416931152, - "761": 2.1372361183166504, - "762": 2.135319232940674, - "763": 2.1409971714019775, - "764": 2.0976028442382812, - "765": 2.1189403533935547, - "766": 2.1375911235809326, - "767": 2.132354259490967, - "768": 2.149562358856201, - "769": 2.1116445064544678, - "770": 2.162325859069824, - "771": 2.1087965965270996, - "772": 2.1294798851013184, - "773": 2.1239724159240723, - "774": 2.1256752014160156, - "775": 2.11171555519104, - "776": 2.1371350288391113, - "777": 2.126445770263672, - "778": 2.12386417388916, - "779": 2.1403026580810547, - "780": 2.1253581047058105, - "781": 2.0939249992370605, - "782": 2.143845558166504, - "783": 2.096829414367676, - "784": 2.138974189758301, - "785": 2.1372528076171875, - "786": 2.1119861602783203, - "787": 2.139653444290161, - "788": 2.1490094661712646, - "789": 2.0913941860198975, - "790": 2.1084046363830566, - "791": 2.141140937805176, - "792": 2.1391408443450928, - "793": 2.111428737640381, - "794": 2.128871440887451, - "795": 2.121690034866333, - "796": 2.128572463989258, - "797": 2.0958313941955566, - "798": 2.1182193756103516, - "799": 2.1381726264953613, - "800": 2.1112403869628906, - "801": 2.103242874145508, - "802": 2.134166717529297, - "803": 2.1283695697784424, - "804": 2.142695903778076, - "805": 2.1347200870513916, - "806": 2.1347808837890625, - "807": 2.128366231918335, - "808": 2.121126890182495, - "809": 2.113149881362915 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 5.70866, - 4.96379, - 4.60359, - 5.46374, - 5.2082, - 5.63294, - 5.88048, - 6.13989, - 4.74959, - 6.53173, - 6.08351, - 6.24465, - 5.97756, - 5.24555, - 4.90244, - 6.87197, - 6.10767, - 5.90649, - 6.33295, - 5.61619, - 6.7099, - 6.78125, - 6.69007, - 4.77219, - 6.2856, - 5.45373, - 6.77391, - 6.38833, - 6.65174, - 5.88887, - 5.69664, - 6.04719, - 6.05737, - 6.38226, - 5.85084, - 6.02603, - 8.2343, - 6.57961, - 7.12859, - 7.4323, - 7.1728, - 6.32093, - 5.81513, - 7.02596, - 5.67605, - 5.58499, - 6.93135, - 6.16772, - 6.73333, - 7.08652, - 6.97954, - 5.90228, - 6.54278, - 6.31502 - ], - "train_epoch_time": 5.052657604217529, - "train_loss": 2.122466077284628, - "train_score": 0.3722011747541523, - "val_loss": 2.2005189133018908, - "val_score": 0.35092117616010726 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:21:56.476870", - "final_model_norm": 87.62258911132812, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:20:11.667143", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 3.2457480430603027, - "learning_rate": 2.15e-11, - "model_norm": 87.43665313720703, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.512977123260498, - "3": 7.69253396987915, - "4": 21.81309700012207, - "5": 6.899621963500977, - "6": 5.383406639099121, - "7": 4.05136775970459, - "8": 3.837218999862671, - "9": 7.806838512420654, - "10": 6.003067970275879, - "11": 63.25626754760742, - "12": 5.074599742889404, - "13": 46.29304504394531, - "14": 5.295746326446533, - "15": 18.24626922607422, - "16": 6.997523307800293, - "17": 29.57970428466797, - "18": 7.914642333984375, - "19": 12.852014541625977, - "20": 9.042000770568848, - "21": 8.264215469360352, - "22": 34.86244201660156, - "23": 6.628152847290039, - "24": 18.288515090942383, - "25": 7.109195709228516, - "26": 5.042996406555176, - "27": 12.910846710205078, - "28": 3.199608325958252, - "29": 4.209935188293457, - "30": 8.058609008789062, - "31": 3.721379280090332, - "32": 52.77738571166992, - "33": 14.339056968688965, - "34": 21.678544998168945, - "35": 5.035406589508057, - "36": 20.3298282623291, - "37": 7.667484283447266, - "38": 3.5563859939575195, - "39": 4.208524703979492, - "40": 3.5099568367004395, - "41": 15.399702072143555, - "42": 11.075334548950195, - "43": 6.654026508331299, - "44": 8.310967445373535, - "45": 2.8949220180511475, - "46": 4.301228046417236, - "47": 3.8653478622436523, - "48": 3.875664472579956, - "49": 3.565122604370117, - "50": 6.90683650970459, - "51": 7.323732852935791, - "52": 7.216414928436279, - "53": 3.2457480430603027 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.8393378257751465, - "3": 3.6987524032592773, - "4": 4.2365193367004395, - "5": 4.024836540222168, - "6": 3.5493674278259277, - "7": 3.623638391494751, - "8": 3.4474129676818848, - "9": 3.550882339477539, - "10": 3.9463655948638916, - "11": 3.8574094772338867, - "12": 3.5057315826416016, - "13": 3.7508187294006348, - "14": 3.35552978515625, - "15": 3.5992021560668945, - "16": 3.3698222637176514, - "17": 4.331902027130127, - "18": 3.7917962074279785, - "19": 3.724433422088623, - "20": 3.7172322273254395, - "21": 3.4425954818725586, - "22": 3.969120979309082, - "23": 3.6345341205596924, - "24": 3.5270655155181885, - "25": 3.467682361602783, - "26": 3.8323540687561035, - "27": 3.489938735961914, - "28": 3.0839614868164062, - "29": 3.272839069366455, - "30": 3.579218626022339, - "31": 3.0954370498657227, - "32": 4.643240451812744, - "33": 3.8729076385498047, - "34": 5.958535671234131, - "35": 3.183952808380127, - "36": 3.470449209213257, - "37": 3.502824068069458, - "38": 3.414090156555176, - "39": 3.2204222679138184, - "40": 3.0980968475341797, - "41": 4.255000114440918, - "42": 3.625798463821411, - "43": 3.717097759246826, - "44": 3.2471938133239746, - "45": 2.9165167808532715, - "46": 3.624490261077881, - "47": 3.419923782348633, - "48": 3.5652120113372803, - "49": 3.2851014137268066, - "50": 3.773500919342041, - "51": 3.671895980834961, - "52": 3.500760555267334, - "53": 3.1673593521118164 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 0.00839976, - 0.00857642, - 0.0905101, - 0.0625052, - 0.00890378, - 0.0845469, - 0.122472, - 0.220771, - 0.234132, - 0.0582621, - 0.109509, - 0.000964026, - 0.136137, - 0.00175023, - 0.119648, - 0.0108108, - 0.0688206, - 0.00495098, - 0.0605316, - 0.0225485, - 0.0454664, - 0.0504061, - 0.00326572, - 0.0827301, - 0.0105452, - 0.0686117, - 0.150691, - 0.0209367, - 0.301242, - 0.18466, - 0.0551148, - 0.223519, - 0.00166696, - 0.0188363, - 0.0126788, - 0.125573, - 0.00839689, - 0.0595817, - 0.269934, - 0.181825, - 0.251473, - 0.0179422, - 0.029559, - 0.0839527, - 0.0470116, - 0.348009, - 0.195912, - 0.228897, - 0.237352, - 0.258464, - 0.0791017, - 0.0684582, - 0.0672232, - 0.300655 - ], - "train_epoch_time": 5.0551862716674805, - "train_loss": 3.249413633004494, - "train_score": 0.21943821738149377, - "val_loss": 3.2759033399115474, - "val_score": 0.21235020802571222 - }, - { - "epoch": 1, - "grad_norm": 1.557374358177185, - "learning_rate": 0.215, - "model_norm": 87.44507598876953, - "step_logs": { - "grad_norm": { - "54": 4.774287700653076, - "55": 20.154672622680664, - "56": 4.404318332672119, - "57": 4.288097381591797, - "58": 8.36601734161377, - "59": 8.29132080078125, - "60": 3.161160945892334, - "61": 2.579345703125, - "62": 5.378228187561035, - "63": 3.1769394874572754, - "64": 6.62891149520874, - "65": 2.2578682899475098, - "66": 2.9576480388641357, - "67": 3.805307149887085, - "68": 2.2968363761901855, - "69": 3.2989535331726074, - "70": 3.6977450847625732, - "71": 3.1557819843292236, - "72": 2.3498356342315674, - "73": 8.389657974243164, - "74": 3.4586596488952637, - "75": 4.117329120635986, - "76": 3.046645164489746, - "77": 2.420107364654541, - "78": 2.6633551120758057, - "79": 3.2983312606811523, - "80": 2.5718305110931396, - "81": 2.168947458267212, - "82": 3.0560479164123535, - "83": 2.806969165802002, - "84": 2.8554811477661133, - "85": 2.9730701446533203, - "86": 2.311844825744629, - "87": 2.8837015628814697, - "88": 1.9802645444869995, - "89": 1.5054583549499512, - "90": 1.8459925651550293, - "91": 2.2450551986694336, - "92": 1.8281776905059814, - "93": 1.7590476274490356, - "94": 2.589425563812256, - "95": 1.7091599702835083, - "96": 1.3318382501602173, - "97": 1.1193625926971436, - "98": 1.177375078201294, - "99": 1.3803173303604126, - "100": 1.3559755086898804, - "101": 1.373702049255371, - "102": 1.2458759546279907, - "103": 1.2519652843475342, - "104": 1.7411977052688599, - "105": 1.732823371887207, - "106": 1.6153154373168945, - "107": 1.557374358177185 - }, - "loss": { - "54": 3.2355923652648926, - "55": 5.297096252441406, - "56": 3.3338818550109863, - "57": 3.4436700344085693, - "58": 3.8406684398651123, - "59": 3.746638298034668, - "60": 3.1932473182678223, - "61": 3.080146074295044, - "62": 3.348526954650879, - "63": 3.0301764011383057, - "64": 3.911581039428711, - "65": 3.2012410163879395, - "66": 2.9062182903289795, - "67": 3.354140281677246, - "68": 2.872352123260498, - "69": 3.0757651329040527, - "70": 3.455598831176758, - "71": 3.1716394424438477, - "72": 3.1585805416107178, - "73": 3.5887584686279297, - "74": 3.1803665161132812, - "75": 3.290294647216797, - "76": 3.4018030166625977, - "77": 3.062267303466797, - "78": 2.9873971939086914, - "79": 3.2536568641662598, - "80": 3.013258695602417, - "81": 3.065859794616699, - "82": 2.8529186248779297, - "83": 3.2957162857055664, - "84": 3.068861484527588, - "85": 3.077514171600342, - "86": 3.0496582984924316, - "87": 2.9805450439453125, - "88": 3.0729541778564453, - "89": 2.779258966445923, - "90": 2.7471539974212646, - "91": 2.861774444580078, - "92": 2.887636661529541, - "93": 2.722166061401367, - "94": 2.826554298400879, - "95": 3.0241246223449707, - "96": 2.711867332458496, - "97": 2.606844902038574, - "98": 2.619281530380249, - "99": 2.630624771118164, - "100": 2.679307699203491, - "101": 2.624044895172119, - "102": 2.66232967376709, - "103": 2.615865468978882, - "104": 2.6691906452178955, - "105": 2.7768869400024414, - "106": 2.653564453125, - "107": 2.751941680908203 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.14195, - 0.0130403, - 0.171867, - 0.18728, - 0.0548743, - 0.0544997, - 0.31955, - 0.462969, - 0.115764, - 0.300227, - 0.089016, - 0.627944, - 0.332227, - 0.231634, - 0.544475, - 0.282619, - 0.252726, - 0.318471, - 0.572028, - 0.0509866, - 0.265865, - 0.19409, - 0.366493, - 0.522846, - 0.421148, - 0.299077, - 0.455566, - 0.65171, - 0.30547, - 0.418287, - 0.376373, - 0.348169, - 0.570603, - 0.358422, - 0.783627, - 1.22629, - 0.806163, - 0.567782, - 0.863984, - 0.87975, - 0.421551, - 1.03522, - 1.52885, - 2.08053, - 1.88952, - 1.38071, - 1.4572, - 1.39055, - 1.71519, - 1.6689, - 0.880407, - 0.924804, - 1.01699, - 1.13463 - ], - "train_epoch_time": 5.058413505554199, - "train_loss": 2.6439669382623485, - "train_score": 0.24040194584358712, - "val_loss": 2.6650987561069317, - "val_score": 0.23652321324351186 - }, - { - "epoch": 2, - "grad_norm": 1.361799955368042, - "learning_rate": 0.215, - "model_norm": 87.4599609375, - "step_logs": { - "grad_norm": { - "108": 1.4028363227844238, - "109": 1.4082996845245361, - "110": 1.49027681350708, - "111": 1.4508966207504272, - "112": 1.2799209356307983, - "113": 1.2841819524765015, - "114": 1.5262126922607422, - "115": 1.4296855926513672, - "116": 1.2931698560714722, - "117": 1.2932544946670532, - "118": 1.374250888824463, - "119": 1.2298253774642944, - "120": 0.9848251342773438, - "121": 1.1569411754608154, - "122": 1.4206010103225708, - "123": 1.1878057718276978, - "124": 1.0276018381118774, - "125": 1.283652663230896, - "126": 1.8712903261184692, - "127": 1.6439473628997803, - "128": 1.2326799631118774, - "129": 1.4321972131729126, - "130": 1.450684666633606, - "131": 1.2546913623809814, - "132": 1.3548060655593872, - "133": 1.4761539697647095, - "134": 1.6513665914535522, - "135": 1.5099892616271973, - "136": 1.3036129474639893, - "137": 1.2944056987762451, - "138": 1.615399718284607, - "139": 1.40032160282135, - "140": 1.0077502727508545, - "141": 1.0127054452896118, - "142": 1.1501387357711792, - "143": 1.2182928323745728, - "144": 1.3234952688217163, - "145": 1.3381359577178955, - "146": 1.3107799291610718, - "147": 1.3745554685592651, - "148": 1.2872231006622314, - "149": 1.3383318185806274, - "150": 1.4619154930114746, - "151": 1.2690492868423462, - "152": 0.9869495034217834, - "153": 0.9818185567855835, - "154": 1.2708150148391724, - "155": 1.3768160343170166, - "156": 1.358048677444458, - "157": 1.2519779205322266, - "158": 1.058868646621704, - "159": 1.1156221628189087, - "160": 1.2423121929168701, - "161": 1.361799955368042 - }, - "loss": { - "108": 2.631319761276245, - "109": 2.6504969596862793, - "110": 2.640850067138672, - "111": 2.696254253387451, - "112": 2.6220450401306152, - "113": 2.638132095336914, - "114": 2.621736526489258, - "115": 2.6706607341766357, - "116": 2.60536527633667, - "117": 2.6401000022888184, - "118": 2.6053626537323, - "119": 2.6542954444885254, - "120": 2.5550832748413086, - "121": 2.586343765258789, - "122": 2.6107537746429443, - "123": 2.6542739868164062, - "124": 2.560403347015381, - "125": 2.5869429111480713, - "126": 2.6324198246002197, - "127": 2.7599895000457764, - "128": 2.6008591651916504, - "129": 2.5921430587768555, - "130": 2.6323347091674805, - "131": 2.60345196723938, - "132": 2.617171049118042, - "133": 2.6182680130004883, - "134": 2.653480052947998, - "135": 2.6878232955932617, - "136": 2.606743812561035, - "137": 2.6013011932373047, - "138": 2.5990309715270996, - "139": 2.6867997646331787, - "140": 2.553896903991699, - "141": 2.5521392822265625, - "142": 2.542072296142578, - "143": 2.5950050354003906, - "144": 2.5921449661254883, - "145": 2.622699737548828, - "146": 2.5679845809936523, - "147": 2.619826555252075, - "148": 2.598999261856079, - "149": 2.586848735809326, - "150": 2.611640214920044, - "151": 2.6322391033172607, - "152": 2.550750255584717, - "153": 2.548252582550049, - "154": 2.563251495361328, - "155": 2.6212615966796875, - "156": 2.5728607177734375, - "157": 2.613525867462158, - "158": 2.5438265800476074, - "159": 2.5615530014038086, - "160": 2.552793502807617, - "161": 2.572293519973755 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 1.33709, - 1.3364, - 1.18908, - 1.28082, - 1.60057, - 1.59972, - 1.12553, - 1.30658, - 1.55796, - 1.57853, - 1.37955, - 1.75494, - 2.63443, - 1.93225, - 1.29366, - 1.88129, - 2.4247, - 1.56997, - 0.751749, - 1.02125, - 1.71165, - 1.26373, - 1.25082, - 1.65377, - 1.42586, - 1.20157, - 0.973036, - 1.17883, - 1.53391, - 1.55257, - 0.995982, - 1.37019, - 2.51477, - 2.4885, - 1.92171, - 1.74838, - 1.47984, - 1.4647, - 1.49463, - 1.38659, - 1.56855, - 1.44425, - 1.22199, - 1.63444, - 2.61865, - 2.6435, - 1.58718, - 1.3828, - 1.39504, - 1.66738, - 2.26884, - 2.05811, - 1.65407, - 1.38706 - ], - "train_epoch_time": 5.05213475227356, - "train_loss": 2.589251991118728, - "train_score": 0.23802456956682794, - "val_loss": 2.6198141522850973, - "val_score": 0.2345768152880477 - }, - { - "epoch": 3, - "grad_norm": 1.0490036010742188, - "learning_rate": 0.215, - "model_norm": 87.47447967529297, - "step_logs": { - "grad_norm": { - "162": 1.2861301898956299, - "163": 1.1588457822799683, - "164": 1.1387637853622437, - "165": 1.2015197277069092, - "166": 1.1771337985992432, - "167": 1.2637208700180054, - "168": 1.3964570760726929, - "169": 1.3009259700775146, - "170": 1.4832748174667358, - "171": 1.3391103744506836, - "172": 1.001443862915039, - "173": 1.0641924142837524, - "174": 1.1780095100402832, - "175": 1.2123091220855713, - "176": 1.2728246450424194, - "177": 1.2441760301589966, - "178": 1.1102662086486816, - "179": 1.0653257369995117, - "180": 1.213426947593689, - "181": 1.2454233169555664, - "182": 1.1902133226394653, - "183": 1.2155243158340454, - "184": 1.310206413269043, - "185": 1.2149938344955444, - "186": 1.1201399564743042, - "187": 1.1830081939697266, - "188": 1.166102647781372, - "189": 1.0935567617416382, - "190": 1.0420727729797363, - "191": 1.0049830675125122, - "192": 1.1084294319152832, - "193": 1.3101873397827148, - "194": 1.3110649585723877, - "195": 1.2940549850463867, - "196": 1.141141414642334, - "197": 1.0688573122024536, - "198": 1.2241190671920776, - "199": 1.7137930393218994, - "200": 1.4287818670272827, - "201": 0.9249826669692993, - "202": 0.9427591562271118, - "203": 0.9777793884277344, - "204": 0.9809828400611877, - "205": 1.0389865636825562, - "206": 1.056341528892517, - "207": 1.0056283473968506, - "208": 1.1745526790618896, - "209": 1.5735182762145996, - "210": 1.4854751825332642, - "211": 1.260992407798767, - "212": 1.1626397371292114, - "213": 1.4322115182876587, - "214": 1.373690128326416, - "215": 1.0490036010742188 - }, - "loss": { - "162": 2.5961270332336426, - "163": 2.56341814994812, - "164": 2.5469627380371094, - "165": 2.552170753479004, - "166": 2.5810842514038086, - "167": 2.5551209449768066, - "168": 2.5750069618225098, - "169": 2.5918307304382324, - "170": 2.56986927986145, - "171": 2.6384692192077637, - "172": 2.5417380332946777, - "173": 2.5497114658355713, - "174": 2.5267906188964844, - "175": 2.552063465118408, - "176": 2.5574936866760254, - "177": 2.5750246047973633, - "178": 2.520806312561035, - "179": 2.559762716293335, - "180": 2.5456981658935547, - "181": 2.5894641876220703, - "182": 2.5442113876342773, - "183": 2.567554473876953, - "184": 2.5674996376037598, - "185": 2.588540554046631, - "186": 2.5444016456604004, - "187": 2.5603115558624268, - "188": 2.5432286262512207, - "189": 2.568527936935425, - "190": 2.5365238189697266, - "191": 2.5240440368652344, - "192": 2.5167489051818848, - "193": 2.5622100830078125, - "194": 2.5690016746520996, - "195": 2.552128553390503, - "196": 2.561570167541504, - "197": 2.5302505493164062, - "198": 2.534597396850586, - "199": 2.5753073692321777, - "200": 2.669279098510742, - "201": 2.5109434127807617, - "202": 2.5188565254211426, - "203": 2.513155937194824, - "204": 2.5260703563690186, - "205": 2.5210156440734863, - "206": 2.541933536529541, - "207": 2.4990439414978027, - "208": 2.523283004760742, - "209": 2.563011646270752, - "210": 2.61623477935791, - "211": 2.5540199279785156, - "212": 2.53739333152771, - "213": 2.5462939739227295, - "214": 2.6274044513702393, - "215": 2.526057720184326 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 1.56948, - 1.90883, - 1.96406, - 1.76786, - 1.86273, - 1.59996, - 1.32045, - 1.53145, - 1.16807, - 1.47136, - 2.53441, - 2.25139, - 1.82084, - 1.73646, - 1.57862, - 1.66348, - 2.04496, - 2.25546, - 1.72894, - 1.66946, - 1.79599, - 1.73777, - 1.49565, - 1.7535, - 2.02787, - 1.82944, - 1.8703, - 2.14784, - 2.33584, - 2.49908, - 2.04844, - 1.49262, - 1.49457, - 1.52404, - 1.9671, - 2.21475, - 1.69146, - 0.876824, - 1.30756, - 2.93474, - 2.83401, - 2.62868, - 2.62496, - 2.33537, - 2.27801, - 2.47115, - 1.82903, - 1.03516, - 1.18562, - 1.6062, - 1.87714, - 1.24135, - 1.39235, - 2.29556 - ], - "train_epoch_time": 5.0522496700286865, - "train_loss": 2.523870642455442, - "train_score": 0.2451835993458243, - "val_loss": 2.5673934510052545, - "val_score": 0.2400034083188196 - }, - { - "epoch": 4, - "grad_norm": 1.0854158401489258, - "learning_rate": 0.215, - "model_norm": 87.49336242675781, - "step_logs": { - "grad_norm": { - "216": 1.0241482257843018, - "217": 1.1809654235839844, - "218": 1.2362719774246216, - "219": 1.1973521709442139, - "220": 1.2557106018066406, - "221": 1.207959532737732, - "222": 1.2223831415176392, - "223": 1.1609492301940918, - "224": 1.0549728870391846, - "225": 1.0758628845214844, - "226": 1.0378057956695557, - "227": 1.0285675525665283, - "228": 1.0697710514068604, - "229": 1.1172983646392822, - "230": 1.1721439361572266, - "231": 1.2012693881988525, - "232": 1.0827316045761108, - "233": 0.9628183245658875, - "234": 1.0146420001983643, - "235": 1.0098919868469238, - "236": 1.1025352478027344, - "237": 1.236091136932373, - "238": 1.222111701965332, - "239": 1.1935385465621948, - "240": 1.2283692359924316, - "241": 1.181260347366333, - "242": 1.0883049964904785, - "243": 0.9462724924087524, - "244": 0.841920793056488, - "245": 0.904147744178772, - "246": 1.028922200202942, - "247": 1.0499484539031982, - "248": 1.019058346748352, - "249": 1.0206499099731445, - "250": 1.0636106729507446, - "251": 1.0745441913604736, - "252": 1.3279489278793335, - "253": 1.3285070657730103, - "254": 1.3298733234405518, - "255": 1.305577039718628, - "256": 1.0363807678222656, - "257": 0.9024780988693237, - "258": 0.8831479549407959, - "259": 0.944024920463562, - "260": 1.327848196029663, - "261": 1.2221624851226807, - "262": 0.867016077041626, - "263": 0.9218744039535522, - "264": 1.0184452533721924, - "265": 1.108479380607605, - "266": 1.3029100894927979, - "267": 1.315808653831482, - "268": 1.173572301864624, - "269": 1.0854158401489258 - }, - "loss": { - "216": 2.5376625061035156, - "217": 2.528329372406006, - "218": 2.559483051300049, - "219": 2.5134034156799316, - "220": 2.5575060844421387, - "221": 2.5357069969177246, - "222": 2.5385208129882812, - "223": 2.5506324768066406, - "224": 2.52734375, - "225": 2.5073888301849365, - "226": 2.5314648151397705, - "227": 2.5137887001037598, - "228": 2.533418893814087, - "229": 2.5058951377868652, - "230": 2.5301146507263184, - "231": 2.533447504043579, - "232": 2.5572028160095215, - "233": 2.4895966053009033, - "234": 2.5353453159332275, - "235": 2.5028529167175293, - "236": 2.497239112854004, - "237": 2.5329959392547607, - "238": 2.548107624053955, - "239": 2.512467384338379, - "240": 2.532466411590576, - "241": 2.5352303981781006, - "242": 2.5191922187805176, - "243": 2.500959873199463, - "244": 2.4881105422973633, - "245": 2.4924659729003906, - "246": 2.4836041927337646, - "247": 2.508016586303711, - "248": 2.5005147457122803, - "249": 2.4982798099517822, - "250": 2.5091617107391357, - "251": 2.5055460929870605, - "252": 2.5079002380371094, - "253": 2.5764472484588623, - "254": 2.517523765563965, - "255": 2.566769599914551, - "256": 2.513474702835083, - "257": 2.5024774074554443, - "258": 2.462148427963257, - "259": 2.482999086380005, - "260": 2.5005099773406982, - "261": 2.5734333992004395, - "262": 2.5019009113311768, - "263": 2.47052264213562, - "264": 2.4790942668914795, - "265": 2.500074625015259, - "266": 2.5009069442749023, - "267": 2.520803928375244, - "268": 2.5006628036499023, - "269": 2.5019078254699707 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 2.4194, - 1.81284, - 1.67465, - 1.75315, - 1.62195, - 1.73778, - 1.69889, - 1.89244, - 2.27081, - 2.16625, - 2.35039, - 2.37609, - 2.21373, - 2.00736, - 1.84153, - 1.75562, - 2.18134, - 2.68559, - 2.4627, - 2.45406, - 2.05435, - 1.65781, - 1.70607, - 1.76371, - 1.67836, - 1.81688, - 2.12696, - 2.79302, - 3.51016, - 3.04895, - 2.34594, - 2.27507, - 2.40786, - 2.39821, - 2.21801, - 2.16997, - 1.42216, - 1.4598, - 1.42349, - 1.50585, - 2.34011, - 3.07253, - 3.1568, - 2.78618, - 1.41818, - 1.72288, - 3.32825, - 2.907, - 2.39011, - 2.03469, - 1.47322, - 1.45597, - 1.81566, - 2.12363 - ], - "train_epoch_time": 5.051345109939575, - "train_loss": 2.4881013529543554, - "train_score": 0.25934585722823394, - "val_loss": 2.529016125489595, - "val_score": 0.2525518438709316 - }, - { - "epoch": 5, - "grad_norm": 1.00590980052948, - "learning_rate": 0.215, - "model_norm": 87.51631164550781, - "step_logs": { - "grad_norm": { - "270": 1.13013756275177, - "271": 1.2563263177871704, - "272": 1.244115948677063, - "273": 1.1930584907531738, - "274": 1.1821926832199097, - "275": 1.1659003496170044, - "276": 1.1019318103790283, - "277": 1.0566940307617188, - "278": 0.9704861044883728, - "279": 1.014012098312378, - "280": 1.0404714345932007, - "281": 0.9590476751327515, - "282": 0.8862022757530212, - "283": 0.9800989627838135, - "284": 1.1799497604370117, - "285": 1.3598498106002808, - "286": 1.5365018844604492, - "287": 1.4131407737731934, - "288": 1.1602160930633545, - "289": 1.1545852422714233, - "290": 1.2624964714050293, - "291": 1.133628487586975, - "292": 0.9308536052703857, - "293": 0.8660498857498169, - "294": 0.9804956912994385, - "295": 1.0610535144805908, - "296": 1.3071521520614624, - "297": 1.44125497341156, - "298": 1.4970768690109253, - "299": 1.519706130027771, - "300": 1.5792064666748047, - "301": 1.2901794910430908, - "302": 1.0978492498397827, - "303": 0.98650723695755, - "304": 0.9940528869628906, - "305": 1.1404259204864502, - "306": 1.3931688070297241, - "307": 1.7322280406951904, - "308": 1.398901104927063, - "309": 1.0341744422912598, - "310": 1.0753517150878906, - "311": 1.1951384544372559, - "312": 1.3540966510772705, - "313": 1.1803466081619263, - "314": 1.1124998331069946, - "315": 1.1645481586456299, - "316": 1.2335587739944458, - "317": 1.1928004026412964, - "318": 1.1615335941314697, - "319": 1.267061471939087, - "320": 1.4724963903427124, - "321": 1.3068675994873047, - "322": 1.0462076663970947, - "323": 1.00590980052948 - }, - "loss": { - "270": 2.47450852394104, - "271": 2.505202531814575, - "272": 2.5308775901794434, - "273": 2.5177090167999268, - "274": 2.5039329528808594, - "275": 2.5061721801757812, - "276": 2.50260853767395, - "277": 2.501378059387207, - "278": 2.4689221382141113, - "279": 2.486182689666748, - "280": 2.483917713165283, - "281": 2.4691946506500244, - "282": 2.45881986618042, - "283": 2.4496452808380127, - "284": 2.4789481163024902, - "285": 2.5068202018737793, - "286": 2.5256478786468506, - "287": 2.5311498641967773, - "288": 2.484822988510132, - "289": 2.46293044090271, - "290": 2.475250482559204, - "291": 2.4920730590820312, - "292": 2.433931350708008, - "293": 2.450202465057373, - "294": 2.4047436714172363, - "295": 2.4686248302459717, - "296": 2.4366941452026367, - "297": 2.517975330352783, - "298": 2.492640972137451, - "299": 2.5122952461242676, - "300": 2.523442506790161, - "301": 2.5308361053466797, - "302": 2.457784414291382, - "303": 2.4264230728149414, - "304": 2.4332144260406494, - "305": 2.4344351291656494, - "306": 2.460434913635254, - "307": 2.5175065994262695, - "308": 2.560126543045044, - "309": 2.42850399017334, - "310": 2.4256277084350586, - "311": 2.461831569671631, - "312": 2.4378867149353027, - "313": 2.476686716079712, - "314": 2.4298629760742188, - "315": 2.433743476867676, - "316": 2.451724052429199, - "317": 2.4429852962493896, - "318": 2.4287307262420654, - "319": 2.4491324424743652, - "320": 2.4364917278289795, - "321": 2.4973602294921875, - "322": 2.4239325523376465, - "323": 2.405879020690918 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 1.93743, - 1.58722, - 1.63512, - 1.76881, - 1.79162, - 1.84369, - 2.06103, - 2.24017, - 2.62137, - 2.41795, - 2.29444, - 2.68457, - 3.13084, - 2.55014, - 1.78049, - 1.35563, - 1.06981, - 1.2675, - 1.84594, - 1.84757, - 1.55295, - 1.93919, - 2.80896, - 3.26675, - 2.50137, - 2.19271, - 1.4261, - 1.21219, - 1.11217, - 1.08781, - 1.01185, - 1.52042, - 2.03919, - 2.49325, - 2.46242, - 1.87182, - 1.26766, - 0.838997, - 1.30824, - 2.27066, - 2.0976, - 1.72354, - 1.32958, - 1.77767, - 1.96328, - 1.79457, - 1.61121, - 1.71706, - 1.80018, - 1.52552, - 1.12372, - 1.46224, - 2.21455, - 2.37769 - ], - "train_epoch_time": 5.054481506347656, - "train_loss": 2.410666897170342, - "train_score": 0.28630850969291316, - "val_loss": 2.458363356738085, - "val_score": 0.2764198838911702 - }, - { - "epoch": 6, - "grad_norm": 1.3920936584472656, - "learning_rate": 0.215, - "model_norm": 87.53954315185547, - "step_logs": { - "grad_norm": { - "324": 1.2398954629898071, - "325": 1.2559360265731812, - "326": 1.1005347967147827, - "327": 1.0315883159637451, - "328": 1.0457839965820312, - "329": 1.1575443744659424, - "330": 1.3922940492630005, - "331": 1.421283483505249, - "332": 1.2612704038619995, - "333": 1.0700067281723022, - "334": 0.9830129742622375, - "335": 1.0199302434921265, - "336": 1.105617880821228, - "337": 1.1271659135818481, - "338": 1.1486330032348633, - "339": 1.158549427986145, - "340": 1.0926823616027832, - "341": 1.081673502922058, - "342": 1.0944740772247314, - "343": 1.0738455057144165, - "344": 1.1464033126831055, - "345": 1.2084888219833374, - "346": 1.1741127967834473, - "347": 1.0600250959396362, - "348": 0.9916175007820129, - "349": 1.1096489429473877, - "350": 1.3233437538146973, - "351": 1.6562645435333252, - "352": 1.6098417043685913, - "353": 1.4056298732757568, - "354": 1.2175393104553223, - "355": 1.1655157804489136, - "356": 1.0950380563735962, - "357": 1.1121106147766113, - "358": 0.9945564270019531, - "359": 0.8062542676925659, - "360": 0.8896239995956421, - "361": 1.1539795398712158, - "362": 1.1678218841552734, - "363": 0.99281245470047, - "364": 0.9655885696411133, - "365": 1.053301453590393, - "366": 1.167069435119629, - "367": 1.2617477178573608, - "368": 1.1912317276000977, - "369": 1.0710958242416382, - "370": 1.0876836776733398, - "371": 1.2153470516204834, - "372": 1.4546935558319092, - "373": 1.1815884113311768, - "374": 1.0157623291015625, - "375": 1.1425656080245972, - "376": 1.2669028043746948, - "377": 1.3920936584472656 - }, - "loss": { - "324": 2.394548177719116, - "325": 2.4889769554138184, - "326": 2.4241433143615723, - "327": 2.405470371246338, - "328": 2.4188551902770996, - "329": 2.391901969909668, - "330": 2.427004337310791, - "331": 2.4808945655822754, - "332": 2.434130907058716, - "333": 2.42537260055542, - "334": 2.3684873580932617, - "335": 2.387106418609619, - "336": 2.416452407836914, - "337": 2.4211487770080566, - "338": 2.4016990661621094, - "339": 2.4311635494232178, - "340": 2.3922266960144043, - "341": 2.4100000858306885, - "342": 2.389313220977783, - "343": 2.383903980255127, - "344": 2.370375871658325, - "345": 2.4168379306793213, - "346": 2.3979382514953613, - "347": 2.4214749336242676, - "348": 2.3990001678466797, - "349": 2.402042865753174, - "350": 2.4201416969299316, - "351": 2.473811626434326, - "352": 2.5203981399536133, - "353": 2.4384689331054688, - "354": 2.440727710723877, - "355": 2.3802945613861084, - "356": 2.4108998775482178, - "357": 2.3781023025512695, - "358": 2.4347705841064453, - "359": 2.354454278945923, - "360": 2.3518245220184326, - "361": 2.3720993995666504, - "362": 2.4115350246429443, - "363": 2.3968918323516846, - "364": 2.3538870811462402, - "365": 2.389626979827881, - "366": 2.4068222045898438, - "367": 2.4024605751037598, - "368": 2.4263787269592285, - "369": 2.370750904083252, - "370": 2.3828773498535156, - "371": 2.4061851501464844, - "372": 2.4232969284057617, - "373": 2.425050735473633, - "374": 2.394852638244629, - "375": 2.3632655143737793, - "376": 2.4071457386016846, - "377": 2.393009662628174 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 1.55759, - 1.57792, - 2.00148, - 2.26041, - 2.2117, - 1.78512, - 1.25201, - 1.22814, - 1.53013, - 2.11839, - 2.45105, - 2.29473, - 1.97682, - 1.90566, - 1.82035, - 1.81128, - 2.00362, - 2.0598, - 1.99463, - 2.06731, - 1.80361, - 1.65486, - 1.73948, - 2.155, - 2.43973, - 1.95079, - 1.38196, - 0.901794, - 0.97253, - 1.23417, - 1.64647, - 1.75224, - 2.01058, - 1.9228, - 2.4615, - 3.62198, - 2.97161, - 1.7813, - 1.76824, - 2.43172, - 2.52465, - 2.1539, - 1.76706, - 1.50908, - 1.70988, - 2.06647, - 2.01417, - 1.62903, - 1.14515, - 1.73695, - 2.3211, - 1.8103, - 1.49974, - 1.23483 - ], - "train_epoch_time": 5.051822185516357, - "train_loss": 2.4235573906125434, - "train_score": 0.2781384503909813, - "val_loss": 2.4600568704572257, - "val_score": 0.26916798193063307 - }, - { - "epoch": 7, - "grad_norm": 1.2349190711975098, - "learning_rate": 0.215, - "model_norm": 87.56900787353516, - "step_logs": { - "grad_norm": { - "378": 1.250223159790039, - "379": 1.0774519443511963, - "380": 1.0756529569625854, - "381": 1.0746705532073975, - "382": 1.1278197765350342, - "383": 1.2192370891571045, - "384": 1.288550615310669, - "385": 1.2426871061325073, - "386": 1.175428032875061, - "387": 1.085137128829956, - "388": 1.0985182523727417, - "389": 1.1738382577896118, - "390": 1.182012677192688, - "391": 1.2103580236434937, - "392": 1.3914525508880615, - "393": 1.3073880672454834, - "394": 1.245774507522583, - "395": 1.229509949684143, - "396": 1.2988204956054688, - "397": 1.1870174407958984, - "398": 0.9405555129051208, - "399": 0.9783185720443726, - "400": 1.0132989883422852, - "401": 1.0678632259368896, - "402": 1.0937392711639404, - "403": 1.0450265407562256, - "404": 1.0873031616210938, - "405": 1.1243746280670166, - "406": 1.1756823062896729, - "407": 1.173513650894165, - "408": 1.160428524017334, - "409": 1.2300890684127808, - "410": 1.353251576423645, - "411": 1.381003975868225, - "412": 1.3498440980911255, - "413": 1.1837456226348877, - "414": 1.0914051532745361, - "415": 1.0649654865264893, - "416": 1.1831587553024292, - "417": 1.16208815574646, - "418": 0.9885045289993286, - "419": 0.8873646855354309, - "420": 0.8332418203353882, - "421": 0.9156327247619629, - "422": 0.9984257221221924, - "423": 0.9638368487358093, - "424": 0.9576247334480286, - "425": 1.05313241481781, - "426": 1.1539151668548584, - "427": 1.1707099676132202, - "428": 1.1698909997940063, - "429": 1.1404439210891724, - "430": 1.1631096601486206, - "431": 1.2349190711975098 - }, - "loss": { - "378": 2.425527572631836, - "379": 2.354468822479248, - "380": 2.384701728820801, - "381": 2.352278470993042, - "382": 2.3599302768707275, - "383": 2.375415325164795, - "384": 2.3820619583129883, - "385": 2.385037899017334, - "386": 2.3775856494903564, - "387": 2.3743581771850586, - "388": 2.3540422916412354, - "389": 2.3854315280914307, - "390": 2.37440824508667, - "391": 2.3822226524353027, - "392": 2.365360736846924, - "393": 2.4160287380218506, - "394": 2.3536853790283203, - "395": 2.376986026763916, - "396": 2.380284547805786, - "397": 2.3999688625335693, - "398": 2.3580517768859863, - "399": 2.3259963989257812, - "400": 2.3365097045898438, - "401": 2.3584940433502197, - "402": 2.3531861305236816, - "403": 2.361647844314575, - "404": 2.3399956226348877, - "405": 2.3749985694885254, - "406": 2.3618900775909424, - "407": 2.368696689605713, - "408": 2.3374476432800293, - "409": 2.3836843967437744, - "410": 2.367276191711426, - "411": 2.411839485168457, - "412": 2.3922245502471924, - "413": 2.3754727840423584, - "414": 2.3681883811950684, - "415": 2.350381374359131, - "416": 2.345921754837036, - "417": 2.3610222339630127, - "418": 2.3186309337615967, - "419": 2.3404953479766846, - "420": 2.3003005981445312, - "421": 2.3098666667938232, - "422": 2.321126937866211, - "423": 2.3332765102386475, - "424": 2.3189990520477295, - "425": 2.3287124633789062, - "426": 2.3312830924987793, - "427": 2.339560031890869, - "428": 2.320887565612793, - "429": 2.3463175296783447, - "430": 2.3140077590942383, - "431": 2.344412326812744 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 1.55178, - 2.02814, - 2.06106, - 2.03675, - 1.85532, - 1.59795, - 1.43466, - 1.54444, - 1.72085, - 2.0164, - 1.95074, - 1.73121, - 1.69946, - 1.62613, - 1.22169, - 1.41349, - 1.51659, - 1.5724, - 1.41101, - 1.7033, - 2.66554, - 2.43024, - 2.27558, - 2.06825, - 1.96711, - 2.16252, - 1.97931, - 1.87863, - 1.70875, - 1.72002, - 1.73582, - 1.57534, - 1.29268, - 1.26462, - 1.31291, - 1.69525, - 1.98813, - 2.07237, - 1.67582, - 1.74832, - 2.37287, - 2.97237, - 3.31316, - 2.75514, - 2.32845, - 2.51165, - 2.52877, - 2.09966, - 1.75084, - 1.70701, - 1.69576, - 1.80401, - 1.7105, - 1.53729 - ], - "train_epoch_time": 5.052142858505249, - "train_loss": 2.3236548091280924, - "train_score": 0.3173702026029228, - "val_loss": 2.37227043711359, - "val_score": 0.3091319245120551 - }, - { - "epoch": 8, - "grad_norm": 1.1614073514938354, - "learning_rate": 0.215, - "model_norm": 87.59609985351562, - "step_logs": { - "grad_norm": { - "432": 1.1335837841033936, - "433": 1.133182406425476, - "434": 1.3028398752212524, - "435": 1.3575388193130493, - "436": 1.183471918106079, - "437": 1.0903741121292114, - "438": 0.9942664504051208, - "439": 0.8852788805961609, - "440": 0.9137327075004578, - "441": 1.0063812732696533, - "442": 1.0718480348587036, - "443": 1.1188194751739502, - "444": 1.1481742858886719, - "445": 1.2257109880447388, - "446": 1.2147953510284424, - "447": 1.1156500577926636, - "448": 1.0477854013442993, - "449": 1.157600998878479, - "450": 1.1659703254699707, - "451": 1.209721326828003, - "452": 1.1933655738830566, - "453": 1.2303316593170166, - "454": 1.2471320629119873, - "455": 1.3114380836486816, - "456": 1.3324886560440063, - "457": 1.244615077972412, - "458": 1.325263500213623, - "459": 1.3488163948059082, - "460": 1.1333106756210327, - "461": 0.9534809589385986, - "462": 0.9193475246429443, - "463": 1.0124518871307373, - "464": 1.064584493637085, - "465": 1.0993348360061646, - "466": 1.108075737953186, - "467": 1.3078234195709229, - "468": 1.2591376304626465, - "469": 1.2338371276855469, - "470": 1.3050905466079712, - "471": 1.3439602851867676, - "472": 1.1983816623687744, - "473": 1.0942996740341187, - "474": 0.9860391020774841, - "475": 0.9721660614013672, - "476": 1.183914065361023, - "477": 1.3471086025238037, - "478": 1.2305814027786255, - "479": 1.2195003032684326, - "480": 1.250832438468933, - "481": 1.188461184501648, - "482": 1.2503068447113037, - "483": 1.2693363428115845, - "484": 1.1738686561584473, - "485": 1.1614073514938354 - }, - "loss": { - "432": 2.3429641723632812, - "433": 2.3273744583129883, - "434": 2.326981544494629, - "435": 2.3749589920043945, - "436": 2.337435245513916, - "437": 2.3211703300476074, - "438": 2.3299434185028076, - "439": 2.2885684967041016, - "440": 2.307781934738159, - "441": 2.311345100402832, - "442": 2.2958879470825195, - "443": 2.3113040924072266, - "444": 2.3194353580474854, - "445": 2.3361878395080566, - "446": 2.3112845420837402, - "447": 2.294637680053711, - "448": 2.2851107120513916, - "449": 2.2943053245544434, - "450": 2.3291707038879395, - "451": 2.3023414611816406, - "452": 2.302241325378418, - "453": 2.302578926086426, - "454": 2.345062732696533, - "455": 2.313366651535034, - "456": 2.3372836112976074, - "457": 2.330024003982544, - "458": 2.32963490486145, - "459": 2.3328371047973633, - "460": 2.3079590797424316, - "461": 2.282858371734619, - "462": 2.2656188011169434, - "463": 2.2717702388763428, - "464": 2.2871859073638916, - "465": 2.3089089393615723, - "466": 2.308109760284424, - "467": 2.29720401763916, - "468": 2.328185558319092, - "469": 2.2941536903381348, - "470": 2.31735897064209, - "471": 2.303525924682617, - "472": 2.3207876682281494, - "473": 2.2822265625, - "474": 2.260265827178955, - "475": 2.2539334297180176, - "476": 2.2617974281311035, - "477": 2.2976627349853516, - "478": 2.3209643363952637, - "479": 2.291518449783325, - "480": 2.323875904083252, - "481": 2.3008086681365967, - "482": 2.3029935359954834, - "483": 2.2736434936523438, - "484": 2.3120718002319336, - "485": 2.306636333465576 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 1.8233, - 1.81245, - 1.37092, - 1.2887, - 1.66887, - 1.95234, - 2.35689, - 2.92014, - 2.76412, - 2.28213, - 1.99841, - 1.84645, - 1.75941, - 1.555, - 1.5662, - 1.84356, - 2.08143, - 1.71212, - 1.71327, - 1.57325, - 1.6166, - 1.52114, - 1.50775, - 1.34508, - 1.31639, - 1.50415, - 1.32643, - 1.28227, - 1.79693, - 2.51105, - 2.68057, - 2.21623, - 2.01809, - 1.9105, - 1.87983, - 1.34308, - 1.46849, - 1.50698, - 1.36054, - 1.27532, - 1.61601, - 1.90584, - 2.32472, - 2.38485, - 1.61366, - 1.26614, - 1.53267, - 1.54085, - 1.4853, - 1.62896, - 1.47319, - 1.41114, - 1.67789, - 1.71005 - ], - "train_epoch_time": 5.051881790161133, - "train_loss": 2.3074515483642752, - "train_score": 0.31586374634478664, - "val_loss": 2.3635185950111715, - "val_score": 0.2983325556279871 - }, - { - "epoch": 9, - "grad_norm": 1.245952844619751, - "learning_rate": 0.215, - "model_norm": 87.62519073486328, - "step_logs": { - "grad_norm": { - "486": 1.4350836277008057, - "487": 1.3153775930404663, - "488": 1.1950019598007202, - "489": 1.1991479396820068, - "490": 1.1610701084136963, - "491": 1.0555616617202759, - "492": 0.8973591923713684, - "493": 0.8280812501907349, - "494": 0.8429233431816101, - "495": 1.0318495035171509, - "496": 1.2025450468063354, - "497": 1.4055798053741455, - "498": 1.4624541997909546, - "499": 1.7936570644378662, - "500": 1.3847507238388062, - "501": 1.2007139921188354, - "502": 0.9072302579879761, - "503": 0.688532292842865, - "504": 0.6601887345314026, - "505": 0.6672235131263733, - "506": 0.7431198358535767, - "507": 0.8004299402236938, - "508": 0.9479244947433472, - "509": 1.129452109336853, - "510": 1.4622944593429565, - "511": 1.4010891914367676, - "512": 1.1569700241088867, - "513": 1.1538740396499634, - "514": 1.229243516921997, - "515": 1.2271878719329834, - "516": 1.1357204914093018, - "517": 1.1053290367126465, - "518": 1.1159783601760864, - "519": 1.2433758974075317, - "520": 1.3332566022872925, - "521": 1.3406134843826294, - "522": 1.1998921632766724, - "523": 1.052549123764038, - "524": 0.9985735416412354, - "525": 0.9584429264068604, - "526": 0.9948418736457825, - "527": 1.0198593139648438, - "528": 1.073205590248108, - "529": 1.0430034399032593, - "530": 1.029845118522644, - "531": 1.1068893671035767, - "532": 1.2657700777053833, - "533": 1.1777558326721191, - "534": 0.9900950789451599, - "535": 0.9171857833862305, - "536": 0.9722216129302979, - "537": 0.9982343912124634, - "538": 1.0485411882400513, - "539": 1.245952844619751 - }, - "loss": { - "486": 2.3105132579803467, - "487": 2.3353779315948486, - "488": 2.2745251655578613, - "489": 2.3003311157226562, - "490": 2.259620189666748, - "491": 2.299048900604248, - "492": 2.2536635398864746, - "493": 2.2375235557556152, - "494": 2.2467522621154785, - "495": 2.2667975425720215, - "496": 2.274629831314087, - "497": 2.2951698303222656, - "498": 2.3376176357269287, - "499": 2.2875685691833496, - "500": 2.367274761199951, - "501": 2.294498920440674, - "502": 2.279972791671753, - "503": 2.2419590950012207, - "504": 2.2450153827667236, - "505": 2.208838701248169, - "506": 2.212721347808838, - "507": 2.233635902404785, - "508": 2.2223172187805176, - "509": 2.272603988647461, - "510": 2.2838568687438965, - "511": 2.3249762058258057, - "512": 2.2769415378570557, - "513": 2.293736696243286, - "514": 2.2783069610595703, - "515": 2.2808032035827637, - "516": 2.257035255432129, - "517": 2.2605504989624023, - "518": 2.271979331970215, - "519": 2.26370906829834, - "520": 2.299496650695801, - "521": 2.2778971195220947, - "522": 2.2844040393829346, - "523": 2.2698402404785156, - "524": 2.250211000442505, - "525": 2.2461366653442383, - "526": 2.24664044380188, - "527": 2.210922956466675, - "528": 2.2328033447265625, - "529": 2.2204999923706055, - "530": 2.267880439758301, - "531": 2.235379695892334, - "532": 2.2744719982147217, - "533": 2.27650785446167, - "534": 2.2195796966552734, - "535": 2.196235179901123, - "536": 2.222111940383911, - "537": 2.2188525199890137, - "538": 2.219332456588745, - "539": 2.228638172149658 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 1.1219, - 1.34976, - 1.59277, - 1.59972, - 1.67617, - 2.06339, - 2.7987, - 3.26304, - 3.16212, - 2.12902, - 1.57293, - 1.16173, - 1.09297, - 0.711042, - 1.23454, - 1.59151, - 2.77009, - 4.72911, - 5.1509, - 4.9616, - 4.0069, - 3.48631, - 2.4732, - 1.78151, - 1.06807, - 1.18437, - 1.70101, - 1.72277, - 1.50777, - 1.51449, - 1.74983, - 1.85025, - 1.82429, - 1.46425, - 1.29362, - 1.26744, - 1.58668, - 2.04885, - 2.25664, - 2.44514, - 2.27, - 2.12566, - 1.93858, - 2.04117, - 2.13834, - 1.8245, - 1.41962, - 1.64119, - 2.26421, - 2.61074, - 2.35091, - 2.22671, - 2.0186, - 1.43561 - ], - "train_epoch_time": 5.052018880844116, - "train_loss": 2.2684142873482176, - "train_score": 0.33120068155197024, - "val_loss": 2.337002307484536, - "val_score": 0.3179624710236296 - }, - { - "epoch": 10, - "grad_norm": 1.2299432754516602, - "learning_rate": 0.215, - "model_norm": 87.65270233154297, - "step_logs": { - "grad_norm": { - "540": 1.3511351346969604, - "541": 1.7157261371612549, - "542": 1.513992190361023, - "543": 1.422940731048584, - "544": 1.1099467277526855, - "545": 1.1671432256698608, - "546": 1.2268176078796387, - "547": 1.0674853324890137, - "548": 0.9695413112640381, - "549": 0.9712854623794556, - "550": 1.0565835237503052, - "551": 1.135793924331665, - "552": 1.126135230064392, - "553": 1.104817509651184, - "554": 1.1046085357666016, - "555": 1.2899866104125977, - "556": 1.3167997598648071, - "557": 1.109792709350586, - "558": 1.0866280794143677, - "559": 1.0575735569000244, - "560": 1.0726302862167358, - "561": 1.1983563899993896, - "562": 1.1723695993423462, - "563": 1.1241942644119263, - "564": 1.144669532775879, - "565": 1.082777738571167, - "566": 1.0937782526016235, - "567": 1.0855193138122559, - "568": 1.0865298509597778, - "569": 1.096518874168396, - "570": 1.1220263242721558, - "571": 1.074330449104309, - "572": 0.9960720539093018, - "573": 1.0139071941375732, - "574": 0.9430017471313477, - "575": 0.8367109298706055, - "576": 0.9074212312698364, - "577": 1.0143239498138428, - "578": 1.1238517761230469, - "579": 1.134877324104309, - "580": 1.0755152702331543, - "581": 1.0887564420700073, - "582": 1.1856040954589844, - "583": 1.1979775428771973, - "584": 1.0448088645935059, - "585": 1.0379643440246582, - "586": 1.1413239240646362, - "587": 1.2523587942123413, - "588": 1.2925063371658325, - "589": 1.2393759489059448, - "590": 1.1159319877624512, - "591": 1.0827888250350952, - "592": 1.1108096837997437, - "593": 1.2299432754516602 - }, - "loss": { - "540": 2.269345283508301, - "541": 2.298983573913574, - "542": 2.338210105895996, - "543": 2.31870174407959, - "544": 2.2218332290649414, - "545": 2.2401480674743652, - "546": 2.2606444358825684, - "547": 2.255159854888916, - "548": 2.2324373722076416, - "549": 2.2224931716918945, - "550": 2.238863945007324, - "551": 2.2543349266052246, - "552": 2.2394089698791504, - "553": 2.228637456893921, - "554": 2.237247943878174, - "555": 2.2630422115325928, - "556": 2.283402442932129, - "557": 2.23069167137146, - "558": 2.2173311710357666, - "559": 2.2453360557556152, - "560": 2.1964633464813232, - "561": 2.2507455348968506, - "562": 2.2712860107421875, - "563": 2.225886583328247, - "564": 2.2693567276000977, - "565": 2.2007830142974854, - "566": 2.223585605621338, - "567": 2.2126495838165283, - "568": 2.2432312965393066, - "569": 2.244802474975586, - "570": 2.2392830848693848, - "571": 2.2328972816467285, - "572": 2.235520124435425, - "573": 2.223702907562256, - "574": 2.201941967010498, - "575": 2.1812734603881836, - "576": 2.191406726837158, - "577": 2.2073256969451904, - "578": 2.229458808898926, - "579": 2.2427992820739746, - "580": 2.2275028228759766, - "581": 2.2165684700012207, - "582": 2.2341699600219727, - "583": 2.215458869934082, - "584": 2.2348432540893555, - "585": 2.2053182125091553, - "586": 2.2032017707824707, - "587": 2.2160887718200684, - "588": 2.2444844245910645, - "589": 2.2254462242126465, - "590": 2.2361598014831543, - "591": 2.222188711166382, - "592": 2.216907262802124, - "593": 2.2255680561065674 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 1.24309, - 0.78098, - 1.02008, - 1.14517, - 1.80346, - 1.64448, - 1.50201, - 1.97904, - 2.37491, - 2.35584, - 2.00549, - 1.74751, - 1.76584, - 1.82582, - 1.83357, - 1.35995, - 1.31687, - 1.81116, - 1.87788, - 2.00752, - 1.90908, - 1.56731, - 1.6525, - 1.76125, - 1.73198, - 1.87715, - 1.85864, - 1.87775, - 1.90016, - 1.86701, - 1.7787, - 1.93461, - 2.25319, - 2.16312, - 2.47617, - 3.11573, - 2.66137, - 2.14542, - 1.76515, - 1.74138, - 1.92568, - 1.86991, - 1.58941, - 1.54371, - 2.04726, - 2.04695, - 1.69136, - 1.41296, - 1.34354, - 1.44881, - 1.79567, - 1.89537, - 1.79667, - 1.4712 - ], - "train_epoch_time": 5.058496952056885, - "train_loss": 2.2306774993195937, - "train_score": 0.3424453013511094, - "val_loss": 2.296033101569087, - "val_score": 0.3286721436766615 - }, - { - "epoch": 11, - "grad_norm": 1.0349370241165161, - "learning_rate": 0.215, - "model_norm": 87.68195343017578, - "step_logs": { - "grad_norm": { - "594": 1.0943937301635742, - "595": 0.9796769022941589, - "596": 1.0310578346252441, - "597": 1.0670500993728638, - "598": 1.1307390928268433, - "599": 1.1710940599441528, - "600": 1.156296968460083, - "601": 1.2133711576461792, - "602": 1.1611528396606445, - "603": 1.1222338676452637, - "604": 1.2052091360092163, - "605": 1.2590749263763428, - "606": 1.176969051361084, - "607": 1.137434482574463, - "608": 1.080508828163147, - "609": 1.029705286026001, - "610": 1.0361080169677734, - "611": 1.053898572921753, - "612": 1.0411697626113892, - "613": 1.0509928464889526, - "614": 1.1699482202529907, - "615": 1.5786114931106567, - "616": 2.08183217048645, - "617": 1.4628498554229736, - "618": 2.068373918533325, - "619": 1.6212427616119385, - "620": 1.5444722175598145, - "621": 1.4550143480300903, - "622": 1.1803194284439087, - "623": 1.21443772315979, - "624": 1.2281582355499268, - "625": 1.2683097124099731, - "626": 1.3153064250946045, - "627": 1.1806786060333252, - "628": 0.9262410402297974, - "629": 0.944209635257721, - "630": 1.0660189390182495, - "631": 1.0335074663162231, - "632": 0.9160115718841553, - "633": 0.847176194190979, - "634": 0.7867324352264404, - "635": 0.8313835263252258, - "636": 0.8352426290512085, - "637": 0.8256402611732483, - "638": 0.839388370513916, - "639": 0.8972194194793701, - "640": 0.9569922089576721, - "641": 1.0144327878952026, - "642": 1.0443109273910522, - "643": 1.041453242301941, - "644": 1.0219300985336304, - "645": 0.9971755743026733, - "646": 0.9836614727973938, - "647": 1.0349370241165161 - }, - "loss": { - "594": 2.2265052795410156, - "595": 2.223076820373535, - "596": 2.2065114974975586, - "597": 2.1909635066986084, - "598": 2.200305461883545, - "599": 2.1981287002563477, - "600": 2.197580337524414, - "601": 2.230177164077759, - "602": 2.225196361541748, - "603": 2.223018169403076, - "604": 2.2023518085479736, - "605": 2.24660325050354, - "606": 2.214543104171753, - "607": 2.215930938720703, - "608": 2.190735340118408, - "609": 2.2196168899536133, - "610": 2.2095272541046143, - "611": 2.177351951599121, - "612": 2.2138514518737793, - "613": 2.1806013584136963, - "614": 2.2112765312194824, - "615": 2.2364535331726074, - "616": 2.3116908073425293, - "617": 2.3622446060180664, - "618": 2.392953872680664, - "619": 2.336181163787842, - "620": 2.2802348136901855, - "621": 2.2987122535705566, - "622": 2.2206177711486816, - "623": 2.2300620079040527, - "624": 2.2312326431274414, - "625": 2.216897964477539, - "626": 2.2371466159820557, - "627": 2.2626171112060547, - "628": 2.1957650184631348, - "629": 2.1836671829223633, - "630": 2.173825263977051, - "631": 2.192389965057373, - "632": 2.189667224884033, - "633": 2.1856131553649902, - "634": 2.1460840702056885, - "635": 2.162750720977783, - "636": 2.178058624267578, - "637": 2.1860759258270264, - "638": 2.1804001331329346, - "639": 2.1534974575042725, - "640": 2.1620194911956787, - "641": 2.195159912109375, - "642": 2.184896469116211, - "643": 2.2083370685577393, - "644": 2.1703310012817383, - "645": 2.191070556640625, - "646": 2.1832213401794434, - "647": 2.1793670654296875 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 1.85899, - 2.31627, - 2.07558, - 1.92427, - 1.72091, - 1.60276, - 1.64364, - 1.51479, - 1.6504, - 1.76513, - 1.51622, - 1.41717, - 1.59865, - 1.71279, - 1.87643, - 2.0934, - 2.05821, - 1.96034, - 2.04223, - 1.97413, - 1.61551, - 0.897448, - 0.533382, - 1.10389, - 0.55934, - 0.888813, - 0.955915, - 1.0858, - 1.59395, - 1.51205, - 1.47923, - 1.37815, - 1.29313, - 1.62311, - 2.5594, - 2.44934, - 1.91291, - 2.05254, - 2.60961, - 3.04527, - 3.46731, - 3.12899, - 3.12208, - 3.20689, - 3.09464, - 2.67514, - 2.36071, - 2.13314, - 2.00342, - 2.03604, - 2.07818, - 2.2035, - 2.25635, - 2.03471 - ], - "train_epoch_time": 5.0525007247924805, - "train_loss": 2.1892604366094517, - "train_score": 0.3530880111447366, - "val_loss": 2.271391089009089, - "val_score": 0.33596440841765685 - }, - { - "epoch": 12, - "grad_norm": 0.6247879266738892, - "learning_rate": 0.215, - "model_norm": 87.70884704589844, - "step_logs": { - "grad_norm": { - "648": 1.27436363697052, - "649": 1.3458645343780518, - "650": 1.3336223363876343, - "651": 1.283778190612793, - "652": 1.1900434494018555, - "653": 1.0207973718643188, - "654": 0.9495716094970703, - "655": 0.9589857459068298, - "656": 0.9915689826011658, - "657": 0.9953168630599976, - "658": 0.9650532603263855, - "659": 0.8956722021102905, - "660": 0.8467937111854553, - "661": 0.8934627771377563, - "662": 0.9381335377693176, - "663": 0.9531769752502441, - "664": 0.8920285105705261, - "665": 0.7697070837020874, - "666": 0.8196448087692261, - "667": 0.7981131672859192, - "668": 0.8364932537078857, - "669": 0.9031451344490051, - "670": 0.884121835231781, - "671": 0.9704537987709045, - "672": 1.037497639656067, - "673": 0.9718296527862549, - "674": 0.9248392581939697, - "675": 0.8426758050918579, - "676": 0.8113840222358704, - "677": 0.7985914349555969, - "678": 0.7930048108100891, - "679": 0.8237600922584534, - "680": 0.8027164340019226, - "681": 0.8228554129600525, - "682": 0.8873187899589539, - "683": 0.9340026378631592, - "684": 0.988332211971283, - "685": 1.0457377433776855, - "686": 0.9370012283325195, - "687": 0.8158568143844604, - "688": 0.7143938541412354, - "689": 0.6188098788261414, - "690": 0.6002197861671448, - "691": 0.5776981711387634, - "692": 0.5639108419418335, - "693": 0.5528355836868286, - "694": 0.6216949224472046, - "695": 0.7398737072944641, - "696": 0.7709805369377136, - "697": 0.7204237580299377, - "698": 0.7669119238853455, - "699": 0.8541778326034546, - "700": 0.7806927561759949, - "701": 0.6247879266738892 - }, - "loss": { - "648": 2.187340497970581, - "649": 2.236661672592163, - "650": 2.181426525115967, - "651": 2.2223172187805176, - "652": 2.206108570098877, - "653": 2.180610179901123, - "654": 2.176434278488159, - "655": 2.179388999938965, - "656": 2.1816482543945312, - "657": 2.1746327877044678, - "658": 2.169236183166504, - "659": 2.1697540283203125, - "660": 2.1555299758911133, - "661": 2.1336028575897217, - "662": 2.1822409629821777, - "663": 2.1862268447875977, - "664": 2.1699609756469727, - "665": 2.158590078353882, - "666": 2.123161792755127, - "667": 2.118091583251953, - "668": 2.1530086994171143, - "669": 2.1503145694732666, - "670": 2.151559591293335, - "671": 2.1622719764709473, - "672": 2.174621820449829, - "673": 2.1316094398498535, - "674": 2.1538071632385254, - "675": 2.1402854919433594, - "676": 2.10453200340271, - "677": 2.126291036605835, - "678": 2.116116523742676, - "679": 2.146627426147461, - "680": 2.1377975940704346, - "681": 2.140343189239502, - "682": 2.1301398277282715, - "683": 2.143266201019287, - "684": 2.1528308391571045, - "685": 2.147887945175171, - "686": 2.164555549621582, - "687": 2.168947696685791, - "688": 2.127497673034668, - "689": 2.109205961227417, - "690": 2.1138579845428467, - "691": 2.112443447113037, - "692": 2.08705997467041, - "693": 2.0793814659118652, - "694": 2.109280586242676, - "695": 2.128284215927124, - "696": 2.1036956310272217, - "697": 2.141223430633545, - "698": 2.1119680404663086, - "699": 2.1471896171569824, - "700": 2.1311025619506836, - "701": 2.100067615509033 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 1.34688, - 1.2348, - 1.22652, - 1.34842, - 1.55776, - 2.09266, - 2.41374, - 2.36979, - 2.21891, - 2.19514, - 2.32919, - 2.70466, - 3.00607, - 2.67276, - 2.47955, - 2.40629, - 2.72706, - 3.6435, - 3.16032, - 3.32518, - 3.07695, - 2.63625, - 2.75251, - 2.29594, - 2.02027, - 2.25698, - 2.51811, - 3.01405, - 3.19671, - 3.33406, - 3.36502, - 3.16341, - 3.31774, - 3.16109, - 2.70551, - 2.45686, - 2.20396, - 1.96411, - 2.46541, - 3.25853, - 4.16863, - 5.50813, - 5.86753, - 6.3297, - 6.56318, - 6.80365, - 5.45732, - 3.88789, - 3.53913, - 4.12559, - 3.59084, - 2.94289, - 3.49658, - 5.37982 - ], - "train_epoch_time": 5.051907539367676, - "train_loss": 2.1044700938625693, - "train_score": 0.3756164813742918, - "val_loss": 2.1938876284523756, - "val_score": 0.35201546313705184 - }, - { - "epoch": 13, - "grad_norm": 0.502764880657196, - "learning_rate": 0.14333333333333334, - "model_norm": 87.7265853881836, - "step_logs": { - "grad_norm": { - "702": 0.6563643217086792, - "703": 0.6571993827819824, - "704": 0.6090049147605896, - "705": 0.5090834498405457, - "706": 0.5032826066017151, - "707": 0.5454604625701904, - "708": 0.5655675530433655, - "709": 0.5795179009437561, - "710": 0.5758205652236938, - "711": 0.5833280086517334, - "712": 0.5649846196174622, - "713": 0.6254504919052124, - "714": 0.658409595489502, - "715": 0.6992095708847046, - "716": 0.648937463760376, - "717": 0.5740047693252563, - "718": 0.5344383716583252, - "719": 0.5104644894599915, - "720": 0.521329402923584, - "721": 0.5099399089813232, - "722": 0.5251731276512146, - "723": 0.5438639521598816, - "724": 0.5583299994468689, - "725": 0.5773388147354126, - "726": 0.5739305019378662, - "727": 0.5646443367004395, - "728": 0.584309995174408, - "729": 0.5981665253639221, - "730": 0.5569187998771667, - "731": 0.5464934706687927, - "732": 0.5473659634590149, - "733": 0.5321969985961914, - "734": 0.49224385619163513, - "735": 0.5335285663604736, - "736": 0.6055594086647034, - "737": 0.6290929317474365, - "738": 0.6229736804962158, - "739": 0.5933385491371155, - "740": 0.514173686504364, - "741": 0.504342794418335, - "742": 0.44745633006095886, - "743": 0.49347758293151855, - "744": 0.4832867383956909, - "745": 0.4926929175853729, - "746": 0.48200470209121704, - "747": 0.4366026818752289, - "748": 0.48769909143447876, - "749": 0.5025731921195984, - "750": 0.5172049403190613, - "751": 0.46526020765304565, - "752": 0.5184414982795715, - "753": 0.43663185834884644, - "754": 0.4778238534927368, - "755": 0.502764880657196 - }, - "loss": { - "702": 2.0921363830566406, - "703": 2.1032629013061523, - "704": 2.0804810523986816, - "705": 2.0745372772216797, - "706": 2.099489688873291, - "707": 2.1054189205169678, - "708": 2.1142611503601074, - "709": 2.095829486846924, - "710": 2.1037867069244385, - "711": 2.104379653930664, - "712": 2.1010234355926514, - "713": 2.107572555541992, - "714": 2.091850757598877, - "715": 2.1197190284729004, - "716": 2.0960986614227295, - "717": 2.1105685234069824, - "718": 2.103867530822754, - "719": 2.089076042175293, - "720": 2.0880379676818848, - "721": 2.0939292907714844, - "722": 2.087529420852661, - "723": 2.075826406478882, - "724": 2.0881247520446777, - "725": 2.078279972076416, - "726": 2.0891664028167725, - "727": 2.065402030944824, - "728": 2.083779811859131, - "729": 2.094331741333008, - "730": 2.1098685264587402, - "731": 2.0717971324920654, - "732": 2.0999763011932373, - "733": 2.0813305377960205, - "734": 2.0774643421173096, - "735": 2.0797691345214844, - "736": 2.092348098754883, - "737": 2.068068027496338, - "738": 2.0782954692840576, - "739": 2.098259925842285, - "740": 2.100529909133911, - "741": 2.070366621017456, - "742": 2.1026504039764404, - "743": 2.1024317741394043, - "744": 2.084796905517578, - "745": 2.0935921669006348, - "746": 2.07623553276062, - "747": 2.0815622806549072, - "748": 2.073194742202759, - "749": 2.0360491275787354, - "750": 2.069887638092041, - "751": 2.0653672218322754, - "752": 2.0628838539123535, - "753": 2.066896677017212, - "754": 2.0598807334899902, - "755": 2.0590217113494873 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 4.85624, - 4.86967, - 5.60947, - 8.00467, - 8.28877, - 7.07639, - 6.60982, - 6.24054, - 6.34494, - 6.18441, - 6.582, - 5.38762, - 4.82546, - 4.33574, - 4.97744, - 6.40573, - 7.36585, - 8.01721, - 7.6827, - 8.05238, - 7.56881, - 7.01795, - 6.69845, - 6.23509, - 6.34241, - 6.4782, - 6.10331, - 5.85331, - 6.80255, - 6.93709, - 7.00904, - 7.34846, - 8.57379, - 7.30634, - 5.70585, - 5.22559, - 5.3551, - 5.96011, - 7.94528, - 8.13946, - 10.5018, - 8.6335, - 8.92594, - 8.62461, - 8.93664, - 10.9198, - 8.71638, - 8.06101, - 7.73787, - 9.54125, - 7.67495, - 10.8415, - 9.02207, - 8.14575 - ], - "train_epoch_time": 5.052689075469971, - "train_loss": 2.069272039610481, - "train_score": 0.3860081152474692, - "val_loss": 2.1647764798831175, - "val_score": 0.3620434486907057 - }, - { - "epoch": 14, - "grad_norm": 0.4239129424095154, - "learning_rate": 0.07166666666666667, - "model_norm": 87.73258209228516, - "step_logs": { - "grad_norm": { - "756": 0.487050861120224, - "757": 0.4544031322002411, - "758": 0.42830315232276917, - "759": 0.4815734326839447, - "760": 0.4934178292751312, - "761": 0.4224790334701538, - "762": 0.43204110860824585, - "763": 0.45534443855285645, - "764": 0.4864194095134735, - "765": 0.47531047463417053, - "766": 0.4640043377876282, - "767": 0.4463971257209778, - "768": 0.49678027629852295, - "769": 0.4804724454879761, - "770": 0.4824571907520294, - "771": 0.45870834589004517, - "772": 0.4417542517185211, - "773": 0.48939937353134155, - "774": 0.4479179084300995, - "775": 0.44021567702293396, - "776": 0.44497251510620117, - "777": 0.4297047555446625, - "778": 0.4160434603691101, - "779": 0.45847994089126587, - "780": 0.4314284920692444, - "781": 0.44189924001693726, - "782": 0.47223207354545593, - "783": 0.5139621496200562, - "784": 0.4329736828804016, - "785": 0.4623982012271881, - "786": 0.43171170353889465, - "787": 0.48285242915153503, - "788": 0.46608272194862366, - "789": 0.4571160674095154, - "790": 0.4436997175216675, - "791": 0.4596877694129944, - "792": 0.41174277663230896, - "793": 0.415783166885376, - "794": 0.4652421772480011, - "795": 0.4222385883331299, - "796": 0.4403056502342224, - "797": 0.42745882272720337, - "798": 0.41921210289001465, - "799": 0.4269837737083435, - "800": 0.41298189759254456, - "801": 0.4164678454399109, - "802": 0.43775880336761475, - "803": 0.4333553910255432, - "804": 0.42356058955192566, - "805": 0.47225651144981384, - "806": 0.4127597212791443, - "807": 0.42735517024993896, - "808": 0.4763546586036682, - "809": 0.4239129424095154 - }, - "loss": { - "756": 2.076101303100586, - "757": 2.068575620651245, - "758": 2.067920207977295, - "759": 2.0638036727905273, - "760": 2.044358968734741, - "761": 2.072505474090576, - "762": 2.0743305683135986, - "763": 2.0558788776397705, - "764": 2.060443878173828, - "765": 2.0599725246429443, - "766": 2.0845131874084473, - "767": 2.0658109188079834, - "768": 2.0891785621643066, - "769": 2.0539777278900146, - "770": 2.064149856567383, - "771": 2.043766975402832, - "772": 2.073176383972168, - "773": 2.0630340576171875, - "774": 2.079786539077759, - "775": 2.0677788257598877, - "776": 2.051941394805908, - "777": 2.0560293197631836, - "778": 2.0794830322265625, - "779": 2.068795680999756, - "780": 2.0758466720581055, - "781": 2.0535378456115723, - "782": 2.033247470855713, - "783": 2.07309627532959, - "784": 2.060553550720215, - "785": 2.0748229026794434, - "786": 2.0392563343048096, - "787": 2.074115514755249, - "788": 2.0639984607696533, - "789": 2.0741524696350098, - "790": 2.050894260406494, - "791": 2.074471950531006, - "792": 2.062798023223877, - "793": 2.0665621757507324, - "794": 2.078562021255493, - "795": 2.057570457458496, - "796": 2.0507564544677734, - "797": 2.069823741912842, - "798": 2.0648856163024902, - "799": 2.051821231842041, - "800": 2.048342227935791, - "801": 2.055738925933838, - "802": 2.051240921020508, - "803": 2.0423755645751953, - "804": 2.0334811210632324, - "805": 2.0591909885406494, - "806": 2.059598207473755, - "807": 2.0358939170837402, - "808": 2.0849485397338867, - "809": 2.072460174560547 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 8.75185, - 10.0182, - 11.2728, - 8.89904, - 8.39706, - 11.6114, - 11.1129, - 9.91556, - 8.70841, - 9.11815, - 9.6819, - 10.3669, - 8.46539, - 8.89731, - 8.86796, - 9.71311, - 10.6237, - 8.6135, - 10.3663, - 10.6702, - 10.3633, - 11.135, - 12.0137, - 9.84185, - 11.1526, - 10.5161, - 9.11757, - 7.84797, - 10.9916, - 9.70395, - 10.9417, - 8.89619, - 9.50131, - 9.92631, - 10.4175, - 9.81707, - 12.1676, - 11.954, - 9.60295, - 11.5409, - 10.578, - 11.3278, - 11.7497, - 11.2543, - 12.0099, - 11.8524, - 10.704, - 10.8754, - 11.3347, - 9.23295, - 12.0889, - 11.1475, - 9.18829, - 11.5327 - ], - "train_epoch_time": 5.05340576171875, - "train_loss": 2.0568622892179995, - "train_score": 0.3888607425532348, - "val_loss": 2.154515545896493, - "val_score": 0.36438450822873997 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:23:41.394636", - "final_model_norm": 87.73258209228516, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:21:56.604458", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 4.7496337890625, - "learning_rate": 2.15e-11, - "model_norm": 87.43766021728516, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.733994483947754, - "3": 8.656057357788086, - "4": 20.53462028503418, - "5": 7.524908065795898, - "6": 5.193840503692627, - "7": 4.4241251945495605, - "8": 3.958240032196045, - "9": 6.524819850921631, - "10": 4.840961456298828, - "11": 53.18486404418945, - "12": 5.4986419677734375, - "13": 12.442137718200684, - "14": 6.462600231170654, - "15": 11.177252769470215, - "16": 10.306532859802246, - "17": 8.236608505249023, - "18": 6.942298412322998, - "19": 3.816582202911377, - "20": 5.433033466339111, - "21": 11.737058639526367, - "22": 15.485664367675781, - "23": 4.85066032409668, - "24": 13.537721633911133, - "25": 6.891615867614746, - "26": 8.508872032165527, - "27": 12.443584442138672, - "28": 3.6626434326171875, - "29": 6.212114334106445, - "30": 3.7766146659851074, - "31": 3.7971911430358887, - "32": 6.624378204345703, - "33": 4.688662528991699, - "34": 7.838125228881836, - "35": 5.432688236236572, - "36": 8.92573070526123, - "37": 6.702051639556885, - "38": 3.7504196166992188, - "39": 9.982173919677734, - "40": 4.434970855712891, - "41": 8.199506759643555, - "42": 3.068848133087158, - "43": 5.904267311096191, - "44": 3.240138530731201, - "45": 5.36593770980835, - "46": 2.441917657852173, - "47": 5.976339340209961, - "48": 11.575305938720703, - "49": 1.9051425457000732, - "50": 6.530988693237305, - "51": 2.7845442295074463, - "52": 3.0467329025268555, - "53": 4.7496337890625 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.8340747356414795, - "3": 3.709585666656494, - "4": 4.2086381912231445, - "5": 4.107909202575684, - "6": 3.57332706451416, - "7": 3.5852417945861816, - "8": 3.5281410217285156, - "9": 3.475802421569824, - "10": 3.82902193069458, - "11": 3.70784854888916, - "12": 3.4101204872131348, - "13": 3.5326273441314697, - "14": 3.489149570465088, - "15": 5.853799819946289, - "16": 3.937934398651123, - "17": 3.9702212810516357, - "18": 3.6945838928222656, - "19": 3.658841609954834, - "20": 3.4033401012420654, - "21": 3.869394063949585, - "22": 5.435483932495117, - "23": 3.5983214378356934, - "24": 3.690361261367798, - "25": 3.3407294750213623, - "26": 3.743614912033081, - "27": 3.74813175201416, - "28": 3.2972519397735596, - "29": 3.2863545417785645, - "30": 3.3834760189056396, - "31": 3.21584415435791, - "32": 3.399313449859619, - "33": 3.2554330825805664, - "34": 3.6103506088256836, - "35": 3.293485641479492, - "36": 3.268389940261841, - "37": 3.3465490341186523, - "38": 3.397594928741455, - "39": 3.386854648590088, - "40": 3.106630802154541, - "41": 3.451869249343872, - "42": 3.1569747924804688, - "43": 3.1236138343811035, - "44": 3.286839723587036, - "45": 3.2410733699798584, - "46": 2.995837926864624, - "47": 3.1895546913146973, - "48": 3.5327181816101074, - "49": 2.920980930328369, - "50": 3.262308120727539, - "51": 3.108814239501953, - "52": 3.072892189025879, - "53": 3.399852752685547 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 0.00869338, - 0.00874607, - 0.0845503, - 0.0495091, - 0.00998087, - 0.0725468, - 0.132463, - 0.183174, - 0.225186, - 0.0816428, - 0.16339, - 0.00131083, - 0.112787, - 0.0228196, - 0.083542, - 0.0468563, - 0.0370718, - 0.0585218, - 0.0766583, - 0.251185, - 0.115298, - 0.0280882, - 0.0226662, - 0.152932, - 0.0201362, - 0.0703395, - 0.0517067, - 0.024206, - 0.245789, - 0.08516, - 0.237224, - 0.223033, - 0.0774642, - 0.148085, - 0.0587658, - 0.11159, - 0.0410248, - 0.0745044, - 0.241553, - 0.0339896, - 0.157946, - 0.0513427, - 0.335213, - 0.0896036, - 0.313077, - 0.112564, - 0.502408, - 0.0893017, - 0.026366, - 0.804774, - 0.0764834, - 0.400947, - 0.331039, - 0.150709 - ], - "train_epoch_time": 5.061100482940674, - "train_loss": 3.242945489513994, - "train_score": 0.19822565453979515, - "val_loss": 3.2406251821944965, - "val_score": 0.19662654280115077 - }, - { - "epoch": 1, - "grad_norm": 0.9895183444023132, - "learning_rate": 0.215, - "model_norm": 87.45398712158203, - "step_logs": { - "grad_norm": { - "54": 3.5259673595428467, - "55": 2.9568469524383545, - "56": 3.482332229614258, - "57": 4.4543046951293945, - "58": 4.397068500518799, - "59": 3.0835766792297363, - "60": 3.778782367706299, - "61": 6.872467517852783, - "62": 2.8213114738464355, - "63": 2.861396074295044, - "64": 3.0130221843719482, - "65": 2.8980464935302734, - "66": 2.922877073287964, - "67": 2.683718204498291, - "68": 5.099470138549805, - "69": 2.7585859298706055, - "70": 2.321791172027588, - "71": 2.312845230102539, - "72": 2.170801877975464, - "73": 2.049748182296753, - "74": 3.4217114448547363, - "75": 1.847901463508606, - "76": 1.9583847522735596, - "77": 1.7942246198654175, - "78": 1.5130752325057983, - "79": 1.0910139083862305, - "80": 1.5957891941070557, - "81": 1.668270230293274, - "82": 1.5282151699066162, - "83": 1.5714319944381714, - "84": 1.681351661682129, - "85": 1.540235161781311, - "86": 1.353806495666504, - "87": 1.3609486818313599, - "88": 1.676891803741455, - "89": 1.5412225723266602, - "90": 1.1339129209518433, - "91": 1.1716455221176147, - "92": 1.718323826789856, - "93": 1.705134630203247, - "94": 1.257216215133667, - "95": 1.2187247276306152, - "96": 1.6744968891143799, - "97": 1.5709903240203857, - "98": 1.5189322233200073, - "99": 1.488869547843933, - "100": 1.346064805984497, - "101": 1.2822684049606323, - "102": 1.3047657012939453, - "103": 1.5115406513214111, - "104": 1.8590677976608276, - "105": 1.5442497730255127, - "106": 0.9806906580924988, - "107": 0.9895183444023132 - }, - "loss": { - "54": 3.2356085777282715, - "55": 3.3969006538391113, - "56": 3.1670751571655273, - "57": 3.4155526161193848, - "58": 3.246426582336426, - "59": 3.0758066177368164, - "60": 3.3291893005371094, - "61": 3.236724853515625, - "62": 2.9802818298339844, - "63": 3.085689067840576, - "64": 3.1118011474609375, - "65": 3.2212648391723633, - "66": 3.060415267944336, - "67": 3.0899100303649902, - "68": 3.204525947570801, - "69": 3.2052576541900635, - "70": 2.875473976135254, - "71": 3.0130062103271484, - "72": 2.8758559226989746, - "73": 2.9374003410339355, - "74": 2.918041706085205, - "75": 3.2483158111572266, - "76": 3.008821487426758, - "77": 2.7938528060913086, - "78": 2.7898197174072266, - "79": 2.651493787765503, - "80": 2.651546001434326, - "81": 2.798872470855713, - "82": 2.687563180923462, - "83": 2.7098217010498047, - "84": 2.698456287384033, - "85": 2.762613534927368, - "86": 2.639796018600464, - "87": 2.656764268875122, - "88": 2.6556339263916016, - "89": 2.75466251373291, - "90": 2.668826103210449, - "91": 2.5953469276428223, - "92": 2.680950164794922, - "93": 2.748776435852051, - "94": 2.6541833877563477, - "95": 2.6150248050689697, - "96": 2.6706058979034424, - "97": 2.7313241958618164, - "98": 2.647160053253174, - "99": 2.708970546722412, - "100": 2.629701852798462, - "101": 2.623826503753662, - "102": 2.6218488216400146, - "103": 2.624159097671509, - "104": 2.6723456382751465, - "105": 2.7639923095703125, - "106": 2.566903591156006, - "107": 2.5794601440429688 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.260255, - 0.388531, - 0.261167, - 0.172148, - 0.167911, - 0.323482, - 0.23315, - 0.0685299, - 0.374417, - 0.376874, - 0.342773, - 0.383545, - 0.358228, - 0.429015, - 0.123229, - 0.421201, - 0.533412, - 0.563257, - 0.610277, - 0.699137, - 0.249233, - 0.951263, - 0.784513, - 0.86786, - 1.21858, - 2.22756, - 1.04123, - 1.00566, - 1.15077, - 1.09736, - 0.954549, - 1.16452, - 1.44031, - 1.4344, - 0.944405, - 1.15968, - 2.07568, - 1.89061, - 0.907985, - 0.945414, - 1.67923, - 1.76062, - 0.952448, - 1.10669, - 1.14737, - 1.22206, - 1.45136, - 1.5958, - 1.54008, - 1.14855, - 0.773218, - 1.15905, - 2.66898, - 2.6344 - ], - "train_epoch_time": 5.051178932189941, - "train_loss": 2.5824617500797746, - "train_score": 0.253816580002756, - "val_loss": 2.6019529436814386, - "val_score": 0.24687858772743174 - }, - { - "epoch": 2, - "grad_norm": 1.438843846321106, - "learning_rate": 0.215, - "model_norm": 87.4708480834961, - "step_logs": { - "grad_norm": { - "108": 1.2233319282531738, - "109": 1.2875303030014038, - "110": 1.4738836288452148, - "111": 1.4504550695419312, - "112": 1.3059566020965576, - "113": 1.3362373113632202, - "114": 1.232397437095642, - "115": 1.2788878679275513, - "116": 1.3838263750076294, - "117": 1.3362531661987305, - "118": 1.189020037651062, - "119": 1.1656744480133057, - "120": 1.409247875213623, - "121": 1.4537246227264404, - "122": 1.6610594987869263, - "123": 1.5994174480438232, - "124": 1.2185155153274536, - "125": 1.1961498260498047, - "126": 1.2616292238235474, - "127": 1.2709039449691772, - "128": 1.336777925491333, - "129": 1.2213470935821533, - "130": 1.0613573789596558, - "131": 1.0799349546432495, - "132": 1.2445788383483887, - "133": 1.2968882322311401, - "134": 1.3268803358078003, - "135": 1.312757968902588, - "136": 1.2625645399093628, - "137": 1.2799403667449951, - "138": 1.3694570064544678, - "139": 1.4357945919036865, - "140": 1.4158834218978882, - "141": 1.4277855157852173, - "142": 1.3735074996948242, - "143": 1.360901951789856, - "144": 1.235777735710144, - "145": 1.2103146314620972, - "146": 1.0776960849761963, - "147": 0.950208842754364, - "148": 0.973766028881073, - "149": 1.1087901592254639, - "150": 1.2300769090652466, - "151": 1.2014013528823853, - "152": 1.0850319862365723, - "153": 1.2147715091705322, - "154": 1.4230504035949707, - "155": 1.9762364625930786, - "156": 1.5402697324752808, - "157": 1.1045266389846802, - "158": 1.1548722982406616, - "159": 1.2398775815963745, - "160": 1.659959077835083, - "161": 1.438843846321106 - }, - "loss": { - "108": 2.5846681594848633, - "109": 2.604456901550293, - "110": 2.6168222427368164, - "111": 2.683922529220581, - "112": 2.5865767002105713, - "113": 2.633469581604004, - "114": 2.5922837257385254, - "115": 2.6114470958709717, - "116": 2.6279919147491455, - "117": 2.6242949962615967, - "118": 2.5978493690490723, - "119": 2.578488349914551, - "120": 2.5887646675109863, - "121": 2.6442418098449707, - "122": 2.602750778198242, - "123": 2.738786458969116, - "124": 2.5735490322113037, - "125": 2.599130630493164, - "126": 2.5824694633483887, - "127": 2.6221070289611816, - "128": 2.578014373779297, - "129": 2.6319899559020996, - "130": 2.5426599979400635, - "131": 2.566744565963745, - "132": 2.5594983100891113, - "133": 2.602893352508545, - "134": 2.5885303020477295, - "135": 2.59574031829834, - "136": 2.559732437133789, - "137": 2.5966176986694336, - "138": 2.569796085357666, - "139": 2.6154351234436035, - "140": 2.5899815559387207, - "141": 2.620474100112915, - "142": 2.5939788818359375, - "143": 2.6111207008361816, - "144": 2.5724313259124756, - "145": 2.593980312347412, - "146": 2.5657131671905518, - "147": 2.5307910442352295, - "148": 2.522359848022461, - "149": 2.5407354831695557, - "150": 2.547524929046631, - "151": 2.5607824325561523, - "152": 2.546299457550049, - "153": 2.539485454559326, - "154": 2.5851902961730957, - "155": 2.632418155670166, - "156": 2.7197184562683105, - "157": 2.5422887802124023, - "158": 2.547421932220459, - "159": 2.5587520599365234, - "160": 2.5815212726593018, - "161": 2.6597466468811035 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 1.72709, - 1.57109, - 1.20461, - 1.27574, - 1.51659, - 1.4749, - 1.70679, - 1.59667, - 1.37234, - 1.46972, - 1.83754, - 1.89763, - 1.30352, - 1.25123, - 0.943327, - 1.07062, - 1.73329, - 1.81659, - 1.62245, - 1.6234, - 1.44267, - 1.76444, - 2.25717, - 2.20083, - 1.65238, - 1.54757, - 1.47025, - 1.50623, - 1.60579, - 1.585, - 1.37026, - 1.2687, - 1.29194, - 1.28545, - 1.37501, - 1.40985, - 1.68447, - 1.7708, - 2.2091, - 2.80297, - 2.6601, - 2.06662, - 1.68366, - 1.77417, - 2.16284, - 1.7209, - 1.27659, - 0.674027, - 1.14638, - 2.08388, - 1.91, - 1.66445, - 0.936873, - 1.28473 - ], - "train_epoch_time": 5.051164865493774, - "train_loss": 2.5496641852763324, - "train_score": 0.2510412930715375, - "val_loss": 2.58637697606355, - "val_score": 0.24500394671573705 - }, - { - "epoch": 3, - "grad_norm": 1.1497336626052856, - "learning_rate": 0.215, - "model_norm": 87.48700714111328, - "step_logs": { - "grad_norm": { - "162": 1.050085425376892, - "163": 1.0326648950576782, - "164": 1.4168496131896973, - "165": 1.5214149951934814, - "166": 1.3445160388946533, - "167": 1.2294894456863403, - "168": 1.1759703159332275, - "169": 1.2555344104766846, - "170": 1.795857310295105, - "171": 1.568656325340271, - "172": 1.1205564737319946, - "173": 1.0407679080963135, - "174": 0.9934329986572266, - "175": 1.0689603090286255, - "176": 1.1351724863052368, - "177": 1.1269174814224243, - "178": 1.094292163848877, - "179": 1.1241751909255981, - "180": 1.207863688468933, - "181": 1.260880708694458, - "182": 1.2338262796401978, - "183": 1.3229544162750244, - "184": 1.7132389545440674, - "185": 1.4439713954925537, - "186": 1.0494155883789062, - "187": 1.08171546459198, - "188": 1.243632435798645, - "189": 1.132729411125183, - "190": 1.1325690746307373, - "191": 1.2239532470703125, - "192": 1.2383285760879517, - "193": 1.1728230714797974, - "194": 1.0139098167419434, - "195": 1.0753616094589233, - "196": 1.2727397680282593, - "197": 1.2477887868881226, - "198": 1.203045129776001, - "199": 1.1582870483398438, - "200": 1.0660293102264404, - "201": 1.074910283088684, - "202": 1.1148866415023804, - "203": 1.275556206703186, - "204": 1.4545255899429321, - "205": 1.4447495937347412, - "206": 1.1876591444015503, - "207": 1.1161009073257446, - "208": 1.0649995803833008, - "209": 1.0433517694473267, - "210": 1.164297342300415, - "211": 1.179956316947937, - "212": 1.1639496088027954, - "213": 1.2394453287124634, - "214": 1.1911393404006958, - "215": 1.1497336626052856 - }, - "loss": { - "162": 2.553956985473633, - "163": 2.5160884857177734, - "164": 2.5572195053100586, - "165": 2.6393539905548096, - "166": 2.573037624359131, - "167": 2.5481529235839844, - "168": 2.55696177482605, - "169": 2.575908660888672, - "170": 2.593052864074707, - "171": 2.713900089263916, - "172": 2.5387673377990723, - "173": 2.551333427429199, - "174": 2.5312585830688477, - "175": 2.529991626739502, - "176": 2.536864757537842, - "177": 2.5450539588928223, - "178": 2.5372281074523926, - "179": 2.5184242725372314, - "180": 2.5571441650390625, - "181": 2.5595500469207764, - "182": 2.5660524368286133, - "183": 2.554115056991577, - "184": 2.6031370162963867, - "185": 2.661167621612549, - "186": 2.5279197692871094, - "187": 2.5267162322998047, - "188": 2.5198121070861816, - "189": 2.565575361251831, - "190": 2.5168821811676025, - "191": 2.5461485385894775, - "192": 2.535763740539551, - "193": 2.549503803253174, - "194": 2.506636381149292, - "195": 2.5353403091430664, - "196": 2.5276269912719727, - "197": 2.556729793548584, - "198": 2.5351266860961914, - "199": 2.5549492835998535, - "200": 2.518739700317383, - "201": 2.5266060829162598, - "202": 2.5180256366729736, - "203": 2.532019853591919, - "204": 2.548419713973999, - "205": 2.5820398330688477, - "206": 2.5259439945220947, - "207": 2.5355849266052246, - "208": 2.516207695007324, - "209": 2.4979496002197266, - "210": 2.5298194885253906, - "211": 2.5399580001831055, - "212": 2.519296169281006, - "213": 2.5140347480773926, - "214": 2.535700798034668, - "215": 2.520113468170166 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 2.31614, - 2.35943, - 1.27386, - 1.14026, - 1.42336, - 1.68568, - 1.84898, - 1.63408, - 0.804022, - 1.10291, - 2.02188, - 2.35537, - 2.56483, - 2.21409, - 1.96867, - 2.00407, - 2.11881, - 1.99279, - 1.75275, - 1.60996, - 1.68561, - 1.45932, - 0.886872, - 1.27631, - 2.29545, - 2.15939, - 1.62924, - 1.99955, - 1.96216, - 1.69963, - 1.65362, - 1.85349, - 2.43833, - 2.19244, - 1.56039, - 1.64211, - 1.7516, - 1.90436, - 2.21638, - 2.18672, - 2.02581, - 1.55621, - 1.20456, - 1.23702, - 1.79077, - 2.0355, - 2.21844, - 2.29468, - 1.86622, - 1.82429, - 1.85956, - 1.6365, - 1.7872, - 1.90645 - ], - "train_epoch_time": 5.052144289016724, - "train_loss": 2.5310348666039224, - "train_score": 0.23321265247323078, - "val_loss": 2.5781733811791265, - "val_score": 0.23483693355054996 - }, - { - "epoch": 4, - "grad_norm": 1.4136536121368408, - "learning_rate": 0.215, - "model_norm": 87.5055160522461, - "step_logs": { - "grad_norm": { - "216": 1.1256388425827026, - "217": 1.0439687967300415, - "218": 1.0271297693252563, - "219": 1.0465854406356812, - "220": 1.1141142845153809, - "221": 1.3420478105545044, - "222": 1.388972282409668, - "223": 1.1843324899673462, - "224": 1.0988532304763794, - "225": 0.9907215237617493, - "226": 1.0126491785049438, - "227": 1.113301396369934, - "228": 1.1795713901519775, - "229": 1.2027106285095215, - "230": 1.2602288722991943, - "231": 1.2933470010757446, - "232": 1.1825640201568604, - "233": 0.970707356929779, - "234": 0.9286707043647766, - "235": 1.0501418113708496, - "236": 1.1006262302398682, - "237": 1.1589261293411255, - "238": 1.1930242776870728, - "239": 1.3182697296142578, - "240": 1.2061333656311035, - "241": 1.0257818698883057, - "242": 1.0941945314407349, - "243": 1.1340782642364502, - "244": 1.051352620124817, - "245": 1.0220674276351929, - "246": 1.0296192169189453, - "247": 1.1951388120651245, - "248": 1.1458808183670044, - "249": 0.9668431282043457, - "250": 0.9969336986541748, - "251": 1.2322670221328735, - "252": 1.260117530822754, - "253": 1.3114705085754395, - "254": 1.3221094608306885, - "255": 1.1988939046859741, - "256": 1.1063432693481445, - "257": 1.0850253105163574, - "258": 1.1996195316314697, - "259": 1.3038287162780762, - "260": 1.3045973777770996, - "261": 1.0922925472259521, - "262": 0.9888884425163269, - "263": 1.0132226943969727, - "264": 1.182775616645813, - "265": 1.348366141319275, - "266": 1.3008694648742676, - "267": 1.2511227130889893, - "268": 1.27561354637146, - "269": 1.4136536121368408 - }, - "loss": { - "216": 2.5313754081726074, - "217": 2.4926369190216064, - "218": 2.5185351371765137, - "219": 2.5040764808654785, - "220": 2.5085480213165283, - "221": 2.5251452922821045, - "222": 2.579390048980713, - "223": 2.5221755504608154, - "224": 2.550185203552246, - "225": 2.4988090991973877, - "226": 2.517817497253418, - "227": 2.4847073554992676, - "228": 2.5304605960845947, - "229": 2.510681629180908, - "230": 2.54360032081604, - "231": 2.5163745880126953, - "232": 2.5476391315460205, - "233": 2.5137102603912354, - "234": 2.4717116355895996, - "235": 2.4896535873413086, - "236": 2.528299331665039, - "237": 2.501495599746704, - "238": 2.530106782913208, - "239": 2.5251595973968506, - "240": 2.5540010929107666, - "241": 2.488203525543213, - "242": 2.52704119682312, - "243": 2.50433611869812, - "244": 2.5183701515197754, - "245": 2.492636203765869, - "246": 2.4824111461639404, - "247": 2.4939348697662354, - "248": 2.5156242847442627, - "249": 2.472944736480713, - "250": 2.480274200439453, - "251": 2.477083206176758, - "252": 2.521735429763794, - "253": 2.506953716278076, - "254": 2.5420007705688477, - "255": 2.5196309089660645, - "256": 2.5012266635894775, - "257": 2.4918296337127686, - "258": 2.506340265274048, - "259": 2.5137476921081543, - "260": 2.5228233337402344, - "261": 2.512665271759033, - "262": 2.4894466400146484, - "263": 2.4663496017456055, - "264": 2.4855427742004395, - "265": 2.5255002975463867, - "266": 2.5225110054016113, - "267": 2.4934170246124268, - "268": 2.487058401107788, - "269": 2.5473792552948 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 1.99783, - 2.28709, - 2.38725, - 2.28612, - 2.02098, - 1.40201, - 1.337, - 1.79816, - 2.11199, - 2.54583, - 2.45531, - 2.0047, - 1.81866, - 1.73568, - 1.60159, - 1.50434, - 1.82175, - 2.66771, - 2.86599, - 2.25758, - 2.08713, - 1.86247, - 1.77763, - 1.45305, - 1.75562, - 2.3647, - 2.11068, - 1.94718, - 2.27836, - 2.38616, - 2.34164, - 1.74602, - 1.91587, - 2.64547, - 2.49555, - 1.63129, - 1.5881, - 1.45757, - 1.45426, - 1.75297, - 2.04349, - 2.1166, - 1.74162, - 1.4787, - 1.48229, - 2.10599, - 2.54571, - 2.4024, - 1.77671, - 1.38909, - 1.49062, - 1.59292, - 1.52844, - 1.2747 - ], - "train_epoch_time": 5.051509618759155, - "train_loss": 2.5299568458130235, - "train_score": 0.2571747220828167, - "val_loss": 2.565680218892585, - "val_score": 0.2521930607944624 - }, - { - "epoch": 5, - "grad_norm": 1.0233439207077026, - "learning_rate": 0.215, - "model_norm": 87.53192901611328, - "step_logs": { - "grad_norm": { - "270": 1.4621710777282715, - "271": 1.2549599409103394, - "272": 0.9583402872085571, - "273": 0.9238143563270569, - "274": 0.9930229783058167, - "275": 1.0756828784942627, - "276": 1.206313967704773, - "277": 1.2826403379440308, - "278": 1.1965733766555786, - "279": 1.1301473379135132, - "280": 1.1039425134658813, - "281": 1.1750026941299438, - "282": 1.3242261409759521, - "283": 1.2207683324813843, - "284": 1.1407650709152222, - "285": 1.0587184429168701, - "286": 0.9370437264442444, - "287": 1.0092836618423462, - "288": 1.2136086225509644, - "289": 1.2424288988113403, - "290": 1.314866542816162, - "291": 1.2721630334854126, - "292": 1.1575560569763184, - "293": 1.1037946939468384, - "294": 1.1253407001495361, - "295": 1.1398760080337524, - "296": 1.1920768022537231, - "297": 1.2422540187835693, - "298": 1.3192061185836792, - "299": 1.3274704217910767, - "300": 1.1823089122772217, - "301": 1.0793026685714722, - "302": 1.1569048166275024, - "303": 1.4129902124404907, - "304": 1.3153616189956665, - "305": 1.176673173904419, - "306": 1.2608675956726074, - "307": 1.2719707489013672, - "308": 1.168274998664856, - "309": 1.134477138519287, - "310": 1.115610122680664, - "311": 1.264222264289856, - "312": 1.3564943075180054, - "313": 1.4025533199310303, - "314": 1.2757071256637573, - "315": 1.0213652849197388, - "316": 0.9081512689590454, - "317": 0.9712821841239929, - "318": 1.050710916519165, - "319": 1.0866272449493408, - "320": 1.114082932472229, - "321": 1.2214831113815308, - "322": 1.2293280363082886, - "323": 1.0233439207077026 - }, - "loss": { - "270": 2.514986515045166, - "271": 2.5509705543518066, - "272": 2.4738762378692627, - "273": 2.482973575592041, - "274": 2.4760775566101074, - "275": 2.474302053451538, - "276": 2.4774842262268066, - "277": 2.5124454498291016, - "278": 2.488851308822632, - "279": 2.4821102619171143, - "280": 2.470085620880127, - "281": 2.487722635269165, - "282": 2.5030651092529297, - "283": 2.529686212539673, - "284": 2.445143699645996, - "285": 2.480433464050293, - "286": 2.430105686187744, - "287": 2.4367318153381348, - "288": 2.440614700317383, - "289": 2.4875807762145996, - "290": 2.47745418548584, - "291": 2.4872961044311523, - "292": 2.4253389835357666, - "293": 2.44598126411438, - "294": 2.434525489807129, - "295": 2.465590000152588, - "296": 2.451469898223877, - "297": 2.4583263397216797, - "298": 2.489090919494629, - "299": 2.473580837249756, - "300": 2.4507694244384766, - "301": 2.4316000938415527, - "302": 2.422515630722046, - "303": 2.4787473678588867, - "304": 2.466991901397705, - "305": 2.4323315620422363, - "306": 2.416975975036621, - "307": 2.4670331478118896, - "308": 2.4527292251586914, - "309": 2.403153896331787, - "310": 2.4297921657562256, - "311": 2.433335542678833, - "312": 2.466033458709717, - "313": 2.4460132122039795, - "314": 2.460947275161743, - "315": 2.4223339557647705, - "316": 2.402672529220581, - "317": 2.4009952545166016, - "318": 2.4176480770111084, - "319": 2.405160665512085, - "320": 2.4187331199645996, - "321": 2.4114279747009277, - "322": 2.4444549083709717, - "323": 2.39345645904541 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 1.17636, - 1.61974, - 2.69363, - 2.90939, - 2.51099, - 2.13838, - 1.70251, - 1.52717, - 1.73828, - 1.94335, - 2.02684, - 1.80187, - 1.42741, - 1.69746, - 1.87894, - 2.21292, - 2.76761, - 2.39211, - 1.65707, - 1.61151, - 1.43299, - 1.53689, - 1.81004, - 2.0076, - 1.92241, - 1.8976, - 1.72512, - 1.59301, - 1.43026, - 1.40371, - 1.75324, - 2.0874, - 1.80997, - 1.24152, - 1.42586, - 1.75675, - 1.52031, - 1.52483, - 1.79705, - 1.8672, - 1.95229, - 1.52249, - 1.34018, - 1.24343, - 1.51217, - 2.32205, - 2.91325, - 2.54507, - 2.18991, - 2.03696, - 1.94874, - 1.61622, - 1.61751, - 2.28551 - ], - "train_epoch_time": 5.052793741226196, - "train_loss": 2.3776131450702334, - "train_score": 0.3044767754320777, - "val_loss": 2.438130426899574, - "val_score": 0.2920448832752784 - }, - { - "epoch": 6, - "grad_norm": 1.1622827053070068, - "learning_rate": 0.215, - "model_norm": 87.55876922607422, - "step_logs": { - "grad_norm": { - "324": 0.8716362714767456, - "325": 0.89914870262146, - "326": 1.0766135454177856, - "327": 1.2029300928115845, - "328": 1.468245506286621, - "329": 1.4433614015579224, - "330": 1.2462615966796875, - "331": 1.1867982149124146, - "332": 1.133683681488037, - "333": 1.1377207040786743, - "334": 1.117013931274414, - "335": 1.1750456094741821, - "336": 1.2225422859191895, - "337": 1.1788886785507202, - "338": 1.1226738691329956, - "339": 1.145333170890808, - "340": 1.1781138181686401, - "341": 1.1886587142944336, - "342": 1.2688055038452148, - "343": 1.4411208629608154, - "344": 1.1073471307754517, - "345": 0.8979843258857727, - "346": 0.8883922696113586, - "347": 0.9640265107154846, - "348": 1.3548240661621094, - "349": 1.4462450742721558, - "350": 1.356637716293335, - "351": 1.3427813053131104, - "352": 1.3249545097351074, - "353": 1.2640893459320068, - "354": 1.122170329093933, - "355": 1.0957119464874268, - "356": 1.174031376838684, - "357": 1.2044655084609985, - "358": 1.0939375162124634, - "359": 1.1627094745635986, - "360": 1.2441326379776, - "361": 1.1325262784957886, - "362": 0.9843354821205139, - "363": 1.0464366674423218, - "364": 1.0454834699630737, - "365": 1.036085605621338, - "366": 1.1615955829620361, - "367": 1.4211864471435547, - "368": 1.2872300148010254, - "369": 1.1462335586547852, - "370": 1.2676925659179688, - "371": 1.2726163864135742, - "372": 1.0992878675460815, - "373": 1.110335350036621, - "374": 1.3180903196334839, - "375": 1.778267741203308, - "376": 1.4958765506744385, - "377": 1.1622827053070068 - }, - "loss": { - "324": 2.378471612930298, - "325": 2.3629865646362305, - "326": 2.385097026824951, - "327": 2.4174094200134277, - "328": 2.410780191421509, - "329": 2.4900832176208496, - "330": 2.4151382446289062, - "331": 2.442502498626709, - "332": 2.381744384765625, - "333": 2.4000790119171143, - "334": 2.393716812133789, - "335": 2.385098457336426, - "336": 2.3959171772003174, - "337": 2.403446912765503, - "338": 2.384711265563965, - "339": 2.4059650897979736, - "340": 2.3941526412963867, - "341": 2.411202907562256, - "342": 2.394829273223877, - "343": 2.4191174507141113, - "344": 2.4327807426452637, - "345": 2.3698110580444336, - "346": 2.372457265853882, - "347": 2.358297348022461, - "348": 2.3747143745422363, - "349": 2.462536334991455, - "350": 2.3946521282196045, - "351": 2.444960117340088, - "352": 2.374955892562866, - "353": 2.397289752960205, - "354": 2.393702507019043, - "355": 2.3784091472625732, - "356": 2.3567159175872803, - "357": 2.3950634002685547, - "358": 2.3703205585479736, - "359": 2.3878798484802246, - "360": 2.385012626647949, - "361": 2.3864059448242188, - "362": 2.349491834640503, - "363": 2.358891487121582, - "364": 2.3372199535369873, - "365": 2.3621435165405273, - "366": 2.3578944206237793, - "367": 2.3868937492370605, - "368": 2.4096920490264893, - "369": 2.384051561355591, - "370": 2.369637966156006, - "371": 2.3805456161499023, - "372": 2.361449718475342, - "373": 2.335862398147583, - "374": 2.347748279571533, - "375": 2.44773006439209, - "376": 2.4842140674591064, - "377": 2.3896615505218506 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 3.1306, - 2.92279, - 2.05772, - 1.67059, - 1.1183, - 1.19526, - 1.55498, - 1.73413, - 1.85315, - 1.85419, - 1.91847, - 1.72742, - 1.60304, - 1.72937, - 1.89203, - 1.83411, - 1.72495, - 1.70655, - 1.48759, - 1.16481, - 1.98397, - 2.93884, - 3.006, - 2.53759, - 1.29374, - 1.17733, - 1.30111, - 1.356, - 1.35286, - 1.50025, - 1.90087, - 1.98104, - 1.70981, - 1.65093, - 1.98072, - 1.76632, - 1.54084, - 1.86058, - 2.42487, - 2.15418, - 2.13828, - 2.20047, - 1.74749, - 1.18176, - 1.45428, - 1.81455, - 1.47453, - 1.46988, - 1.95414, - 1.89469, - 1.35133, - 0.77405, - 1.11019, - 1.76894 - ], - "train_epoch_time": 5.051965713500977, - "train_loss": 2.3595358218489966, - "train_score": 0.30445659962753996, - "val_loss": 2.4094951702998233, - "val_score": 0.2916233135487812 - }, - { - "epoch": 7, - "grad_norm": 1.122349500656128, - "learning_rate": 0.215, - "model_norm": 87.58779907226562, - "step_logs": { - "grad_norm": { - "378": 1.2289165258407593, - "379": 1.2876077890396118, - "380": 1.253775954246521, - "381": 1.1601649522781372, - "382": 1.0149775743484497, - "383": 0.8874648213386536, - "384": 0.8765559196472168, - "385": 0.9939548373222351, - "386": 1.1471822261810303, - "387": 1.2171940803527832, - "388": 1.4047293663024902, - "389": 1.3704050779342651, - "390": 1.2447500228881836, - "391": 1.1996443271636963, - "392": 1.0778791904449463, - "393": 1.0508983135223389, - "394": 1.0584830045700073, - "395": 1.075810432434082, - "396": 1.121604323387146, - "397": 1.2505501508712769, - "398": 1.2425906658172607, - "399": 1.1320399045944214, - "400": 0.9426090717315674, - "401": 0.9163249731063843, - "402": 1.163918375968933, - "403": 1.693349838256836, - "404": 1.4655699729919434, - "405": 1.0728397369384766, - "406": 0.9973278045654297, - "407": 1.0631458759307861, - "408": 1.1138743162155151, - "409": 1.3411601781845093, - "410": 1.3309574127197266, - "411": 1.190207600593567, - "412": 1.1875566244125366, - "413": 1.0829166173934937, - "414": 0.9782698750495911, - "415": 1.0266886949539185, - "416": 1.0581934452056885, - "417": 1.1176337003707886, - "418": 1.1155422925949097, - "419": 1.1001964807510376, - "420": 1.062107801437378, - "421": 1.0173048973083496, - "422": 1.0776845216751099, - "423": 1.1612517833709717, - "424": 1.3631857633590698, - "425": 1.4028018712997437, - "426": 1.3414517641067505, - "427": 1.2491090297698975, - "428": 1.1552237272262573, - "429": 1.0854108333587646, - "430": 1.150957703590393, - "431": 1.122349500656128 - }, - "loss": { - "378": 2.3674890995025635, - "379": 2.3736331462860107, - "380": 2.374690055847168, - "381": 2.3619821071624756, - "382": 2.3689804077148438, - "383": 2.3117270469665527, - "384": 2.319326400756836, - "385": 2.311298370361328, - "386": 2.3461179733276367, - "387": 2.3617935180664062, - "388": 2.3598344326019287, - "389": 2.419098377227783, - "390": 2.331808567047119, - "391": 2.3663904666900635, - "392": 2.3436355590820312, - "393": 2.3307714462280273, - "394": 2.337489604949951, - "395": 2.317723512649536, - "396": 2.3408355712890625, - "397": 2.3330633640289307, - "398": 2.375703811645508, - "399": 2.3548202514648438, - "400": 2.3112916946411133, - "401": 2.3086235523223877, - "402": 2.3135900497436523, - "403": 2.3431601524353027, - "404": 2.4124484062194824, - "405": 2.333739995956421, - "406": 2.2959189414978027, - "407": 2.297210693359375, - "408": 2.3128461837768555, - "409": 2.3254969120025635, - "410": 2.3781135082244873, - "411": 2.3242831230163574, - "412": 2.3365964889526367, - "413": 2.3092870712280273, - "414": 2.314084053039551, - "415": 2.2937607765197754, - "416": 2.2956981658935547, - "417": 2.3010544776916504, - "418": 2.3063392639160156, - "419": 2.2883734703063965, - "420": 2.307068347930908, - "421": 2.2768523693084717, - "422": 2.311478853225708, - "423": 2.3039536476135254, - "424": 2.3166608810424805, - "425": 2.346284866333008, - "426": 2.3382768630981445, - "427": 2.3504528999328613, - "428": 2.298037052154541, - "429": 2.320526599884033, - "430": 2.2736430168151855, - "431": 2.3213510513305664 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 1.56763, - 1.43168, - 1.51066, - 1.75484, - 2.29958, - 2.93518, - 3.01858, - 2.3395, - 1.78273, - 1.59412, - 1.1959, - 1.28812, - 1.50497, - 1.6443, - 2.0172, - 2.11047, - 2.08632, - 2.00258, - 1.86077, - 1.49185, - 1.53864, - 1.83753, - 2.60131, - 2.7495, - 1.70782, - 0.817163, - 1.12317, - 2.0276, - 2.30824, - 2.03243, - 1.86412, - 1.29287, - 1.34247, - 1.64075, - 1.65682, - 1.96919, - 2.41803, - 2.17606, - 2.05015, - 1.84216, - 1.85332, - 1.89054, - 2.04514, - 2.20005, - 1.99025, - 1.70852, - 1.24667, - 1.19231, - 1.29941, - 1.50644, - 1.72197, - 1.96969, - 1.71634, - 1.84283 - ], - "train_epoch_time": 5.052664041519165, - "train_loss": 2.283510016847717, - "train_score": 0.3188654502204364, - "val_loss": 2.3316025684676407, - "val_score": 0.30808248376873687 - }, - { - "epoch": 8, - "grad_norm": 0.9300982356071472, - "learning_rate": 0.215, - "model_norm": 87.61836242675781, - "step_logs": { - "grad_norm": { - "432": 1.173887014389038, - "433": 1.1409796476364136, - "434": 1.1342936754226685, - "435": 1.2576640844345093, - "436": 1.2820959091186523, - "437": 1.1802228689193726, - "438": 1.0372898578643799, - "439": 1.0499985218048096, - "440": 1.108478307723999, - "441": 1.207029104232788, - "442": 1.290515661239624, - "443": 1.2015646696090698, - "444": 1.0155858993530273, - "445": 0.9483391046524048, - "446": 0.9841550588607788, - "447": 1.0490765571594238, - "448": 1.2082215547561646, - "449": 1.3061292171478271, - "450": 1.3867985010147095, - "451": 1.718166708946228, - "452": 1.3583420515060425, - "453": 1.1765997409820557, - "454": 1.21886146068573, - "455": 1.1780078411102295, - "456": 1.1769036054611206, - "457": 1.0307259559631348, - "458": 0.9529721140861511, - "459": 1.0447609424591064, - "460": 1.4128073453903198, - "461": 1.3071491718292236, - "462": 1.0343782901763916, - "463": 0.9364259243011475, - "464": 0.8971028923988342, - "465": 0.9150164127349854, - "466": 1.0134968757629395, - "467": 1.0172163248062134, - "468": 1.0775176286697388, - "469": 1.1535265445709229, - "470": 1.191583514213562, - "471": 1.1831544637680054, - "472": 1.0748728513717651, - "473": 0.8862811326980591, - "474": 0.866497814655304, - "475": 0.957634687423706, - "476": 1.0472304821014404, - "477": 1.1254465579986572, - "478": 1.1687746047973633, - "479": 1.096500039100647, - "480": 1.038978099822998, - "481": 1.050185203552246, - "482": 1.06312894821167, - "483": 1.004075288772583, - "484": 0.9146953821182251, - "485": 0.9300982356071472 - }, - "loss": { - "432": 2.2915685176849365, - "433": 2.325540542602539, - "434": 2.2867980003356934, - "435": 2.3208999633789062, - "436": 2.3207876682281494, - "437": 2.3049392700195312, - "438": 2.2741847038269043, - "439": 2.2700278759002686, - "440": 2.29074764251709, - "441": 2.2956035137176514, - "442": 2.301769256591797, - "443": 2.3018126487731934, - "444": 2.3019819259643555, - "445": 2.244292736053467, - "446": 2.2597298622131348, - "447": 2.2670047283172607, - "448": 2.2799508571624756, - "449": 2.3224425315856934, - "450": 2.3306963443756104, - "451": 2.3314719200134277, - "452": 2.3882226943969727, - "453": 2.2948594093322754, - "454": 2.3057758808135986, - "455": 2.291252613067627, - "456": 2.2728543281555176, - "457": 2.26200008392334, - "458": 2.2534117698669434, - "459": 2.268404483795166, - "460": 2.2851171493530273, - "461": 2.3509154319763184, - "462": 2.2637104988098145, - "463": 2.261314868927002, - "464": 2.235646963119507, - "465": 2.2582640647888184, - "466": 2.242279529571533, - "467": 2.2655491828918457, - "468": 2.2640581130981445, - "469": 2.2845754623413086, - "470": 2.2636184692382812, - "471": 2.3068342208862305, - "472": 2.2654080390930176, - "473": 2.266956329345703, - "474": 2.239006519317627, - "475": 2.24525785446167, - "476": 2.244333267211914, - "477": 2.2878918647766113, - "478": 2.2697043418884277, - "479": 2.243941307067871, - "480": 2.2570700645446777, - "481": 2.2509193420410156, - "482": 2.249399423599243, - "483": 2.2804508209228516, - "484": 2.2411937713623047, - "485": 2.2560853958129883 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 1.66295, - 1.78636, - 1.77737, - 1.46733, - 1.41187, - 1.65475, - 2.11361, - 2.05899, - 1.86433, - 1.57566, - 1.38209, - 1.59432, - 2.23187, - 2.49547, - 2.33308, - 2.05986, - 1.56182, - 1.36136, - 1.21188, - 0.789768, - 1.29436, - 1.65767, - 1.55206, - 1.65111, - 1.64093, - 2.12915, - 2.48131, - 2.0782, - 1.14483, - 1.3759, - 2.11574, - 2.57878, - 2.77791, - 2.69722, - 2.18296, - 2.18951, - 1.95002, - 1.71692, - 1.59424, - 1.64791, - 1.9608, - 2.88603, - 2.98209, - 2.44831, - 2.04646, - 1.80628, - 1.66153, - 1.86635, - 2.0909, - 2.04093, - 1.99019, - 2.26198, - 2.67871, - 2.60794 - ], - "train_epoch_time": 5.051317930221558, - "train_loss": 2.230953530977925, - "train_score": 0.34803846847036135, - "val_loss": 2.280944639724924, - "val_score": 0.3334215349088991 - }, - { - "epoch": 9, - "grad_norm": 1.1118519306182861, - "learning_rate": 0.215, - "model_norm": 87.64906311035156, - "step_logs": { - "grad_norm": { - "486": 0.9912803769111633, - "487": 1.1801499128341675, - "488": 1.3727082014083862, - "489": 1.4574068784713745, - "490": 1.1562776565551758, - "491": 1.0924196243286133, - "492": 1.1069799661636353, - "493": 1.1005313396453857, - "494": 1.1039934158325195, - "495": 1.0187159776687622, - "496": 1.1182432174682617, - "497": 1.3187373876571655, - "498": 1.4477620124816895, - "499": 1.3412322998046875, - "500": 1.153167486190796, - "501": 1.0750313997268677, - "502": 1.025566577911377, - "503": 1.0409637689590454, - "504": 1.011820673942566, - "505": 1.1079769134521484, - "506": 1.1562845706939697, - "507": 1.0853391885757446, - "508": 1.0119813680648804, - "509": 0.9975836873054504, - "510": 1.0724613666534424, - "511": 1.1301614046096802, - "512": 1.1464024782180786, - "513": 1.1840053796768188, - "514": 1.2161908149719238, - "515": 1.1920208930969238, - "516": 1.0625874996185303, - "517": 0.9278948903083801, - "518": 0.9675673842430115, - "519": 1.0902743339538574, - "520": 1.1268482208251953, - "521": 1.178515076637268, - "522": 1.1936978101730347, - "523": 1.27751624584198, - "524": 1.2797014713287354, - "525": 1.255751371383667, - "526": 1.2691787481307983, - "527": 1.34010648727417, - "528": 1.4972039461135864, - "529": 1.4528220891952515, - "530": 1.1309788227081299, - "531": 0.9770186543464661, - "532": 1.0028085708618164, - "533": 1.033262848854065, - "534": 1.0508148670196533, - "535": 1.0891156196594238, - "536": 1.190722942352295, - "537": 1.1033893823623657, - "538": 1.0907820463180542, - "539": 1.1118519306182861 - }, - "loss": { - "486": 2.244365692138672, - "487": 2.258953094482422, - "488": 2.291658401489258, - "489": 2.301762342453003, - "490": 2.26674222946167, - "491": 2.240677833557129, - "492": 2.2656078338623047, - "493": 2.254861831665039, - "494": 2.2496354579925537, - "495": 2.222031354904175, - "496": 2.2465157508850098, - "497": 2.263211727142334, - "498": 2.27559494972229, - "499": 2.298959255218506, - "500": 2.253042221069336, - "501": 2.2481675148010254, - "502": 2.2057642936706543, - "503": 2.243898868560791, - "504": 2.2219278812408447, - "505": 2.225553274154663, - "506": 2.251554012298584, - "507": 2.2208924293518066, - "508": 2.223480701446533, - "509": 2.221001148223877, - "510": 2.2008442878723145, - "511": 2.246204376220703, - "512": 2.223757266998291, - "513": 2.253476619720459, - "514": 2.2615246772766113, - "515": 2.238771438598633, - "516": 2.219839572906494, - "517": 2.1763017177581787, - "518": 2.2072110176086426, - "519": 2.2068400382995605, - "520": 2.225220203399658, - "521": 2.210195779800415, - "522": 2.2160658836364746, - "523": 2.219590663909912, - "524": 2.2667903900146484, - "525": 2.2522406578063965, - "526": 2.2427616119384766, - "527": 2.233675479888916, - "528": 2.2969250679016113, - "529": 2.272357702255249, - "530": 2.264460563659668, - "531": 2.191066265106201, - "532": 2.1999876499176025, - "533": 2.183898448944092, - "534": 2.2113471031188965, - "535": 2.2229721546173096, - "536": 2.223538875579834, - "537": 2.246978521347046, - "538": 2.211336612701416, - "539": 2.2184529304504395 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 2.28402, - 1.62193, - 1.21617, - 1.08367, - 1.69542, - 1.87759, - 1.84887, - 1.86172, - 1.84578, - 2.14113, - 1.79654, - 1.30139, - 1.08568, - 1.27798, - 1.69428, - 1.9453, - 2.09716, - 2.07077, - 2.17032, - 1.81291, - 1.68404, - 1.88537, - 2.17114, - 2.23177, - 1.91349, - 1.75861, - 1.69205, - 1.60748, - 1.52897, - 1.57559, - 1.96604, - 2.52768, - 2.35766, - 1.85652, - 1.75244, - 1.59133, - 1.55523, - 1.36, - 1.38418, - 1.42826, - 1.39232, - 1.24377, - 1.02467, - 1.07659, - 1.77034, - 2.29535, - 2.18768, - 2.04555, - 2.00265, - 1.87407, - 1.56828, - 1.84562, - 1.85857, - 1.79455 - ], - "train_epoch_time": 5.059020757675171, - "train_loss": 2.200929508250276, - "train_score": 0.35788199437228985, - "val_loss": 2.2678967945611297, - "val_score": 0.34036847093622663 - }, - { - "epoch": 10, - "grad_norm": 1.0116633176803589, - "learning_rate": 0.215, - "model_norm": 87.67935180664062, - "step_logs": { - "grad_norm": { - "540": 1.169072151184082, - "541": 1.1783757209777832, - "542": 1.2139266729354858, - "543": 1.2846906185150146, - "544": 1.3508814573287964, - "545": 1.332606315612793, - "546": 1.19757080078125, - "547": 1.1094799041748047, - "548": 1.0778957605361938, - "549": 1.1209033727645874, - "550": 1.1984648704528809, - "551": 1.252472162246704, - "552": 1.183653473854065, - "553": 1.145930528640747, - "554": 1.0975148677825928, - "555": 1.0804927349090576, - "556": 1.1179344654083252, - "557": 1.11802339553833, - "558": 1.0764483213424683, - "559": 1.1522531509399414, - "560": 1.1919751167297363, - "561": 1.1006834506988525, - "562": 1.0570098161697388, - "563": 1.0119645595550537, - "564": 1.032116413116455, - "565": 1.0845112800598145, - "566": 1.1901994943618774, - "567": 1.1658177375793457, - "568": 1.056470274925232, - "569": 1.1020967960357666, - "570": 1.228576898574829, - "571": 1.2427558898925781, - "572": 1.2071932554244995, - "573": 1.275977373123169, - "574": 1.3003004789352417, - "575": 1.2350523471832275, - "576": 1.5249589681625366, - "577": 1.4124336242675781, - "578": 1.4135326147079468, - "579": 1.5690242052078247, - "580": 1.573901891708374, - "581": 1.436079740524292, - "582": 1.3040106296539307, - "583": 1.1048146486282349, - "584": 1.0893659591674805, - "585": 1.1002446413040161, - "586": 1.0650063753128052, - "587": 1.0706894397735596, - "588": 1.0903651714324951, - "589": 1.219233512878418, - "590": 1.2892698049545288, - "591": 1.1891388893127441, - "592": 1.0812523365020752, - "593": 1.0116633176803589 - }, - "loss": { - "540": 2.204136848449707, - "541": 2.2254788875579834, - "542": 2.20072603225708, - "543": 2.2437100410461426, - "544": 2.197460651397705, - "545": 2.2728939056396484, - "546": 2.2356958389282227, - "547": 2.214299201965332, - "548": 2.188335418701172, - "549": 2.2122912406921387, - "550": 2.20137882232666, - "551": 2.2458462715148926, - "552": 2.18747615814209, - "553": 2.2249817848205566, - "554": 2.2014222145080566, - "555": 2.186837673187256, - "556": 2.215104103088379, - "557": 2.208815336227417, - "558": 2.1690101623535156, - "559": 2.1853578090667725, - "560": 2.2086341381073, - "561": 2.19753360748291, - "562": 2.196995735168457, - "563": 2.1907670497894287, - "564": 2.1760456562042236, - "565": 2.182063341140747, - "566": 2.197408676147461, - "567": 2.2246310710906982, - "568": 2.1983628273010254, - "569": 2.189992904663086, - "570": 2.1991801261901855, - "571": 2.2129592895507812, - "572": 2.210176467895508, - "573": 2.194798469543457, - "574": 2.20719838142395, - "575": 2.203917980194092, - "576": 2.2294514179229736, - "577": 2.272395610809326, - "578": 2.2360830307006836, - "579": 2.2558693885803223, - "580": 2.267986297607422, - "581": 2.2737629413604736, - "582": 2.235292673110962, - "583": 2.1753664016723633, - "584": 2.200342893600464, - "585": 2.192239999771118, - "586": 2.2137129306793213, - "587": 2.153522253036499, - "588": 2.160778522491455, - "589": 2.210700511932373, - "590": 2.217236042022705, - "591": 2.195439100265503, - "592": 2.1788392066955566, - "593": 2.1664459705352783 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 1.61271, - 1.60271, - 1.49342, - 1.35947, - 1.20417, - 1.2799, - 1.55887, - 1.79886, - 1.88348, - 1.76078, - 1.53265, - 1.43167, - 1.56133, - 1.69438, - 1.82761, - 1.87315, - 1.7724, - 1.76709, - 1.87187, - 1.64599, - 1.5545, - 1.81389, - 1.9664, - 2.13927, - 2.04273, - 1.85524, - 1.55121, - 1.6368, - 1.96963, - 1.80303, - 1.45699, - 1.43285, - 1.51661, - 1.34806, - 1.30543, - 1.44486, - 0.958698, - 1.13906, - 1.11912, - 0.916336, - 0.915556, - 1.10252, - 1.31453, - 1.78219, - 1.85414, - 1.81096, - 1.95172, - 1.87855, - 1.81747, - 1.48715, - 1.3339, - 1.55259, - 1.86368, - 2.11678 - ], - "train_epoch_time": 5.05180287361145, - "train_loss": 2.1669004165288195, - "train_score": 0.36084334651059025, - "val_loss": 2.2336182996681995, - "val_score": 0.3415524546574506 - }, - { - "epoch": 11, - "grad_norm": 1.0813859701156616, - "learning_rate": 0.215, - "model_norm": 87.71249389648438, - "step_logs": { - "grad_norm": { - "594": 1.0095385313034058, - "595": 1.118397831916809, - "596": 1.113401174545288, - "597": 0.9805076122283936, - "598": 0.9841046929359436, - "599": 1.229051113128662, - "600": 1.213972568511963, - "601": 1.0447558164596558, - "602": 0.9220883846282959, - "603": 0.936968982219696, - "604": 0.9985181093215942, - "605": 1.0668905973434448, - "606": 1.2745782136917114, - "607": 1.29975426197052, - "608": 1.2980278730392456, - "609": 1.3180011510849, - "610": 1.2992914915084839, - "611": 1.2111988067626953, - "612": 1.1126219034194946, - "613": 1.0514963865280151, - "614": 0.9431167244911194, - "615": 0.9624524116516113, - "616": 1.0076013803482056, - "617": 1.0628156661987305, - "618": 1.1847156286239624, - "619": 1.2134593725204468, - "620": 1.1799983978271484, - "621": 1.1404943466186523, - "622": 1.0369693040847778, - "623": 1.033238172531128, - "624": 1.1578330993652344, - "625": 1.2227585315704346, - "626": 1.3081293106079102, - "627": 1.367281198501587, - "628": 1.358440637588501, - "629": 1.246859073638916, - "630": 1.2392390966415405, - "631": 1.260047435760498, - "632": 1.3502540588378906, - "633": 1.3460414409637451, - "634": 1.2329233884811401, - "635": 1.124908208847046, - "636": 1.0964032411575317, - "637": 1.1042762994766235, - "638": 1.0895113945007324, - "639": 1.1002918481826782, - "640": 1.1614863872528076, - "641": 1.1491286754608154, - "642": 1.0553640127182007, - "643": 1.0744247436523438, - "644": 1.1322510242462158, - "645": 1.1758286952972412, - "646": 1.1297539472579956, - "647": 1.0813859701156616 - }, - "loss": { - "594": 2.1929025650024414, - "595": 2.164384365081787, - "596": 2.170667886734009, - "597": 2.1898555755615234, - "598": 2.1332521438598633, - "599": 2.165065288543701, - "600": 2.214233636856079, - "601": 2.166761875152588, - "602": 2.16964054107666, - "603": 2.150141954421997, - "604": 2.1509041786193848, - "605": 2.169931411743164, - "606": 2.1968047618865967, - "607": 2.2210962772369385, - "608": 2.189572811126709, - "609": 2.2153797149658203, - "610": 2.1691596508026123, - "611": 2.2113218307495117, - "612": 2.160707712173462, - "613": 2.179276466369629, - "614": 2.14737868309021, - "615": 2.1177570819854736, - "616": 2.1631083488464355, - "617": 2.1379146575927734, - "618": 2.152888059616089, - "619": 2.205331325531006, - "620": 2.167412281036377, - "621": 2.18001389503479, - "622": 2.1571598052978516, - "623": 2.165440082550049, - "624": 2.165348768234253, - "625": 2.1720526218414307, - "626": 2.1637320518493652, - "627": 2.215428113937378, - "628": 2.184414863586426, - "629": 2.1814124584198, - "630": 2.1491243839263916, - "631": 2.178330898284912, - "632": 2.1728720664978027, - "633": 2.2016239166259766, - "634": 2.169736862182617, - "635": 2.1725568771362305, - "636": 2.137773036956787, - "637": 2.1530861854553223, - "638": 2.151876211166382, - "639": 2.164989471435547, - "640": 2.168134927749634, - "641": 2.168419361114502, - "642": 2.1598010063171387, - "643": 2.121164560317993, - "644": 2.1406288146972656, - "645": 2.1656789779663086, - "646": 2.148796558380127, - "647": 2.133854627609253 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 2.15166, - 1.73038, - 1.75102, - 2.27779, - 2.20272, - 1.43328, - 1.50247, - 1.9851, - 2.55178, - 2.44916, - 2.15729, - 1.90637, - 1.35225, - 1.31476, - 1.29955, - 1.27531, - 1.28493, - 1.50737, - 1.74542, - 1.97105, - 2.41423, - 2.28622, - 2.13059, - 1.89267, - 1.53389, - 1.49769, - 1.55661, - 1.676, - 2.00609, - 2.02836, - 1.61524, - 1.45274, - 1.26445, - 1.18506, - 1.18373, - 1.40315, - 1.39943, - 1.37199, - 1.1918, - 1.21514, - 1.42736, - 1.71687, - 1.77837, - 1.76566, - 1.81282, - 1.7883, - 1.60716, - 1.64212, - 1.93914, - 1.83748, - 1.66977, - 1.56641, - 1.68356, - 1.82475 - ], - "train_epoch_time": 5.0524678230285645, - "train_loss": 2.1371430958702713, - "train_score": 0.37163849522598846, - "val_loss": 2.2138783608731116, - "val_score": 0.3534909584971164 - }, - { - "epoch": 12, - "grad_norm": 0.8866685628890991, - "learning_rate": 0.215, - "model_norm": 87.7420654296875, - "step_logs": { - "grad_norm": { - "648": 1.0472753047943115, - "649": 0.9115982055664062, - "650": 0.8244289755821228, - "651": 0.8689126968383789, - "652": 0.9928649067878723, - "653": 1.167019009590149, - "654": 1.26961350440979, - "655": 1.2213505506515503, - "656": 1.2602888345718384, - "657": 1.3108564615249634, - "658": 1.180912971496582, - "659": 1.1064778566360474, - "660": 1.0781797170639038, - "661": 1.006677269935608, - "662": 0.9838483929634094, - "663": 0.9756195545196533, - "664": 0.9793493747711182, - "665": 1.070969581604004, - "666": 1.1430097818374634, - "667": 1.1448856592178345, - "668": 1.0267887115478516, - "669": 0.9792900681495667, - "670": 1.010474443435669, - "671": 1.0112379789352417, - "672": 0.9828790426254272, - "673": 0.8997846245765686, - "674": 0.8759722709655762, - "675": 0.9008798003196716, - "676": 0.988506555557251, - "677": 1.0519704818725586, - "678": 0.974343478679657, - "679": 0.91118985414505, - "680": 0.8859447240829468, - "681": 0.8313981890678406, - "682": 0.797153651714325, - "683": 0.7582221031188965, - "684": 0.7774538397789001, - "685": 0.7920456528663635, - "686": 0.7822142243385315, - "687": 0.784407913684845, - "688": 0.8251732587814331, - "689": 0.8213247656822205, - "690": 0.7949087619781494, - "691": 0.9233590364456177, - "692": 0.9984911680221558, - "693": 1.0227104425430298, - "694": 0.9449827671051025, - "695": 0.8695642352104187, - "696": 0.8431944847106934, - "697": 0.8557807207107544, - "698": 0.9188382625579834, - "699": 0.9073380827903748, - "700": 0.9085015654563904, - "701": 0.8866685628890991 - }, - "loss": { - "648": 2.1539249420166016, - "649": 2.153470516204834, - "650": 2.1303768157958984, - "651": 2.124969005584717, - "652": 2.112607479095459, - "653": 2.12748384475708, - "654": 2.1599693298339844, - "655": 2.1595160961151123, - "656": 2.147554636001587, - "657": 2.1652820110321045, - "658": 2.164153575897217, - "659": 2.1289477348327637, - "660": 2.122256278991699, - "661": 2.1265668869018555, - "662": 2.1151175498962402, - "663": 2.1025984287261963, - "664": 2.1128110885620117, - "665": 2.1415257453918457, - "666": 2.1279125213623047, - "667": 2.1497600078582764, - "668": 2.1128122806549072, - "669": 2.109170913696289, - "670": 2.1266679763793945, - "671": 2.108522415161133, - "672": 2.082127571105957, - "673": 2.080562114715576, - "674": 2.1004080772399902, - "675": 2.0928163528442383, - "676": 2.0909857749938965, - "677": 2.1165192127227783, - "678": 2.12237548828125, - "679": 2.060544490814209, - "680": 2.0678043365478516, - "681": 2.0857534408569336, - "682": 2.1190152168273926, - "683": 2.0696496963500977, - "684": 2.0691144466400146, - "685": 2.0708937644958496, - "686": 2.0833353996276855, - "687": 2.079961061477661, - "688": 2.0788733959198, - "689": 2.0817677974700928, - "690": 2.0411934852600098, - "691": 2.060296058654785, - "692": 2.0876822471618652, - "693": 2.089556932449341, - "694": 2.0931663513183594, - "695": 2.100879192352295, - "696": 2.0798120498657227, - "697": 2.0584583282470703, - "698": 2.087106227874756, - "699": 2.059535026550293, - "700": 2.0786895751953125, - "701": 2.078433036804199 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 1.96385, - 2.59139, - 3.13437, - 2.81449, - 2.14308, - 1.56211, - 1.34, - 1.44769, - 1.35209, - 1.2601, - 1.55186, - 1.73892, - 1.82564, - 2.09845, - 2.18513, - 2.209, - 2.20285, - 1.86711, - 1.62875, - 1.64008, - 2.004, - 2.19932, - 2.08281, - 2.06192, - 2.1553, - 2.56982, - 2.7373, - 2.57868, - 2.13989, - 1.91256, - 2.23562, - 2.48179, - 2.63449, - 3.01748, - 3.33465, - 3.60001, - 3.42322, - 3.30109, - 3.40493, - 3.38042, - 3.05308, - 3.08605, - 3.23035, - 2.41651, - 2.094, - 1.99779, - 2.34399, - 2.77842, - 2.92529, - 2.81072, - 2.4721, - 2.50168, - 2.51848, - 2.64371 - ], - "train_epoch_time": 5.0517659187316895, - "train_loss": 2.064169383288456, - "train_score": 0.38890894004056925, - "val_loss": 2.1586678411328286, - "val_score": 0.3648285020985368 - }, - { - "epoch": 13, - "grad_norm": 0.5387442708015442, - "learning_rate": 0.14333333333333334, - "model_norm": 87.76129913330078, - "step_logs": { - "grad_norm": { - "702": 0.8555957078933716, - "703": 0.8389430046081543, - "704": 0.7974465489387512, - "705": 0.7111917734146118, - "706": 0.6865280866622925, - "707": 0.6655653119087219, - "708": 0.7120420932769775, - "709": 0.6198110580444336, - "710": 0.6193294525146484, - "711": 0.6613816618919373, - "712": 0.6125500202178955, - "713": 0.6190069913864136, - "714": 0.6449284553527832, - "715": 0.623160719871521, - "716": 0.635799765586853, - "717": 0.6766581535339355, - "718": 0.6606045365333557, - "719": 0.6689791679382324, - "720": 0.7258394956588745, - "721": 0.6947575807571411, - "722": 0.6685524582862854, - "723": 0.6224409341812134, - "724": 0.6535094380378723, - "725": 0.6764883995056152, - "726": 0.6475353837013245, - "727": 0.6286060214042664, - "728": 0.6079074740409851, - "729": 0.5699890851974487, - "730": 0.6327533721923828, - "731": 0.6207576394081116, - "732": 0.6795573234558105, - "733": 0.7368409633636475, - "734": 0.6754153966903687, - "735": 0.5986524820327759, - "736": 0.5603148341178894, - "737": 0.5499348044395447, - "738": 0.553995668888092, - "739": 0.552712082862854, - "740": 0.5789799094200134, - "741": 0.6051276922225952, - "742": 0.5702527165412903, - "743": 0.5347087979316711, - "744": 0.5771920680999756, - "745": 0.5683488249778748, - "746": 0.5687381625175476, - "747": 0.5655471682548523, - "748": 0.5749183893203735, - "749": 0.5902183651924133, - "750": 0.591609001159668, - "751": 0.5643320679664612, - "752": 0.5647693276405334, - "753": 0.5534801483154297, - "754": 0.5389439463615417, - "755": 0.5387442708015442 - }, - "loss": { - "702": 2.068603038787842, - "703": 2.0600123405456543, - "704": 2.0609488487243652, - "705": 2.0514752864837646, - "706": 2.0525529384613037, - "707": 2.0297815799713135, - "708": 2.043025016784668, - "709": 2.0269765853881836, - "710": 2.041564702987671, - "711": 2.035750389099121, - "712": 2.015636920928955, - "713": 2.041550636291504, - "714": 2.0596487522125244, - "715": 2.0353753566741943, - "716": 2.0464487075805664, - "717": 2.0496344566345215, - "718": 2.0427510738372803, - "719": 2.0662004947662354, - "720": 2.044621706008911, - "721": 2.0307607650756836, - "722": 2.044048309326172, - "723": 2.0486457347869873, - "724": 2.0574636459350586, - "725": 2.022700786590576, - "726": 2.028663396835327, - "727": 2.0265889167785645, - "728": 2.050173282623291, - "729": 2.022919178009033, - "730": 2.0387704372406006, - "731": 2.04561185836792, - "732": 2.040342092514038, - "733": 2.025927782058716, - "734": 2.0442991256713867, - "735": 2.040170669555664, - "736": 2.0224528312683105, - "737": 2.0229549407958984, - "738": 2.013416290283203, - "739": 2.042023181915283, - "740": 2.0228073596954346, - "741": 2.013936758041382, - "742": 2.0341639518737793, - "743": 2.0218470096588135, - "744": 2.034235954284668, - "745": 2.01385235786438, - "746": 2.030132532119751, - "747": 2.009298086166382, - "748": 2.0243000984191895, - "749": 2.024648904800415, - "750": 2.03177547454834, - "751": 2.0019636154174805, - "752": 2.0308589935302734, - "753": 2.0112240314483643, - "754": 1.999527096748352, - "755": 2.010127067565918 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 2.82579, - 2.92688, - 3.24089, - 4.05595, - 4.3549, - 4.58214, - 4.0296, - 5.27631, - 5.32255, - 4.65393, - 5.37192, - 5.32806, - 4.95188, - 5.24136, - 5.06244, - 4.47649, - 4.68093, - 4.61687, - 3.88089, - 4.20719, - 4.5732, - 5.28775, - 4.81757, - 4.41988, - 4.83819, - 5.12872, - 5.54773, - 6.22652, - 5.09213, - 5.30859, - 4.41826, - 3.73144, - 4.48128, - 5.69268, - 6.4419, - 6.68904, - 6.56026, - 6.6844, - 6.03431, - 5.49986, - 6.25534, - 7.07153, - 6.10605, - 6.23444, - 6.27624, - 6.28213, - 6.12439, - 5.81198, - 5.80505, - 6.28618, - 6.36704, - 6.56533, - 6.88399, - 6.92561 - ], - "train_epoch_time": 5.052505970001221, - "train_loss": 2.010954659912131, - "train_score": 0.4025365404025725, - "val_loss": 2.1118211666834203, - "val_score": 0.3764754948810376 - }, - { - "epoch": 14, - "grad_norm": 0.4847679138183594, - "learning_rate": 0.07166666666666667, - "model_norm": 87.76786041259766, - "step_logs": { - "grad_norm": { - "756": 0.5058140158653259, - "757": 0.49769508838653564, - "758": 0.5300824046134949, - "759": 0.5167328119277954, - "760": 0.5182225108146667, - "761": 0.5222752690315247, - "762": 0.49985629320144653, - "763": 0.5261271595954895, - "764": 0.5339662432670593, - "765": 0.49722054600715637, - "766": 0.525741696357727, - "767": 0.5475974082946777, - "768": 0.48826518654823303, - "769": 0.5173293948173523, - "770": 0.5007962584495544, - "771": 0.5114091634750366, - "772": 0.4944199323654175, - "773": 0.49919113516807556, - "774": 0.5080803036689758, - "775": 0.5323089361190796, - "776": 0.513701856136322, - "777": 0.4728732705116272, - "778": 0.5287741422653198, - "779": 0.5132942199707031, - "780": 0.5144039988517761, - "781": 0.4901692271232605, - "782": 0.4993336498737335, - "783": 0.5102146863937378, - "784": 0.5094323754310608, - "785": 0.5016494989395142, - "786": 0.5456656217575073, - "787": 0.45594877004623413, - "788": 0.5241446495056152, - "789": 0.5235331058502197, - "790": 0.49963730573654175, - "791": 0.5246809124946594, - "792": 0.48514509201049805, - "793": 0.4656037986278534, - "794": 0.51222825050354, - "795": 0.48398905992507935, - "796": 0.48211216926574707, - "797": 0.4955475330352783, - "798": 0.455089271068573, - "799": 0.45145097374916077, - "800": 0.4879188537597656, - "801": 0.5031603574752808, - "802": 0.5076599717140198, - "803": 0.4612177908420563, - "804": 0.5085827112197876, - "805": 0.49353665113449097, - "806": 0.46145835518836975, - "807": 0.5111345648765564, - "808": 0.4644039273262024, - "809": 0.4847679138183594 - }, - "loss": { - "756": 1.9891791343688965, - "757": 2.003089189529419, - "758": 2.021998643875122, - "759": 1.998246669769287, - "760": 1.9814414978027344, - "761": 2.0237021446228027, - "762": 2.0245351791381836, - "763": 2.0070135593414307, - "764": 2.0204505920410156, - "765": 1.9956107139587402, - "766": 2.011631965637207, - "767": 2.02752947807312, - "768": 1.9936531782150269, - "769": 1.9787603616714478, - "770": 2.0311286449432373, - "771": 2.0049281120300293, - "772": 2.025374412536621, - "773": 1.9979968070983887, - "774": 2.014063835144043, - "775": 2.0084056854248047, - "776": 2.0069265365600586, - "777": 2.0142831802368164, - "778": 1.9831066131591797, - "779": 2.0081496238708496, - "780": 1.9759306907653809, - "781": 1.9884992837905884, - "782": 2.0142593383789062, - "783": 2.0046749114990234, - "784": 2.0277111530303955, - "785": 1.994141936302185, - "786": 2.0026023387908936, - "787": 2.0019803047180176, - "788": 2.0354251861572266, - "789": 2.0050806999206543, - "790": 1.9992761611938477, - "791": 2.0147576332092285, - "792": 2.006575345993042, - "793": 2.0002737045288086, - "794": 1.9919480085372925, - "795": 2.0168275833129883, - "796": 1.9870167970657349, - "797": 2.016627311706543, - "798": 2.0025057792663574, - "799": 1.9895964860916138, - "800": 2.0089383125305176, - "801": 2.0107169151306152, - "802": 1.982405185699463, - "803": 1.9838014841079712, - "804": 2.003615379333496, - "805": 1.9976394176483154, - "806": 1.9997005462646484, - "807": 2.0074102878570557, - "808": 1.979114294052124, - "809": 1.9938241243362427 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 7.77485, - 8.08674, - 7.19605, - 7.48371, - 7.37817, - 7.41904, - 8.1028, - 7.25052, - 7.08632, - 8.07194, - 7.27786, - 6.76152, - 8.36254, - 7.39365, - 8.0987, - 7.66588, - 8.2854, - 8.01791, - 7.80205, - 7.088, - 7.60517, - 9.00805, - 7.0926, - 7.6219, - 7.46729, - 8.27625, - 8.07856, - 7.70084, - 7.81327, - 7.9242, - 6.72576, - 9.63003, - 7.40888, - 7.31549, - 8.00872, - 7.31867, - 8.52535, - 9.22691, - 7.59191, - 8.60989, - 8.5488, - 8.21212, - 9.66898, - 9.76211, - 8.43862, - 7.94215, - 7.69213, - 9.3258, - 7.74624, - 8.20122, - 9.39074, - 7.68362, - 9.17654, - 8.48436 - ], - "train_epoch_time": 5.059843301773071, - "train_loss": 1.9970765826691856, - "train_score": 0.40541046454198393, - "val_loss": 2.1023791610989315, - "val_score": 0.3787582525908605 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:25:26.382728", - "final_model_norm": 87.76786041259766, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:23:41.528256", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 3.4020798206329346, - "learning_rate": 2.15e-11, - "model_norm": 87.4279556274414, - "step_logs": { - "grad_norm": { - "0": 22.7664794921875, - "1": 23.4499454498291, - "2": 6.6349334716796875, - "3": 7.6453537940979, - "4": 21.053424835205078, - "5": 7.058008670806885, - "6": 5.988020420074463, - "7": 4.229581832885742, - "8": 3.9983975887298584, - "9": 7.037342548370361, - "10": 5.494598388671875, - "11": 8.59819221496582, - "12": 2.925267457962036, - "13": 39.092185974121094, - "14": 4.42000150680542, - "15": 8.168949127197266, - "16": 16.293621063232422, - "17": 7.581535339355469, - "18": 9.173480033874512, - "19": 5.241852760314941, - "20": 4.280900001525879, - "21": 27.222410202026367, - "22": 5.295047760009766, - "23": 37.9439811706543, - "24": 8.8499116897583, - "25": 22.281017303466797, - "26": 4.42485237121582, - "27": 10.319413185119629, - "28": 3.462764024734497, - "29": 5.584513187408447, - "30": 6.203570365905762, - "31": 12.963118553161621, - "32": 4.502926349639893, - "33": 12.705103874206543, - "34": 4.057493686676025, - "35": 13.966697692871094, - "36": 5.8249006271362305, - "37": 5.485065937042236, - "38": 8.925680160522461, - "39": 5.118313312530518, - "40": 4.502964019775391, - "41": 3.303424596786499, - "42": 5.9380340576171875, - "43": 3.2785823345184326, - "44": 3.232649564743042, - "45": 10.542298316955566, - "46": 8.931904792785645, - "47": 2.3960790634155273, - "48": 3.548248767852783, - "49": 3.5594968795776367, - "50": 3.093522071838379, - "51": 7.152304172515869, - "52": 2.485452890396118, - "53": 3.4020798206329346 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.821899175643921, - "3": 3.692150592803955, - "4": 4.159581661224365, - "5": 4.066653251647949, - "6": 3.577277660369873, - "7": 3.644073009490967, - "8": 3.516756057739258, - "9": 3.527935743331909, - "10": 3.9001166820526123, - "11": 3.4582974910736084, - "12": 3.3248465061187744, - "13": 3.636115074157715, - "14": 3.3588008880615234, - "15": 3.5337278842926025, - "16": 5.811540126800537, - "17": 3.4106802940368652, - "18": 4.815016269683838, - "19": 4.148991107940674, - "20": 3.581756353378296, - "21": 5.044354438781738, - "22": 3.6478264331817627, - "23": 3.5692028999328613, - "24": 3.675647258758545, - "25": 4.464303016662598, - "26": 3.690401554107666, - "27": 3.540964126586914, - "28": 3.1019937992095947, - "29": 3.440617561340332, - "30": 3.246218204498291, - "31": 6.221119403839111, - "32": 3.552947998046875, - "33": 3.9883127212524414, - "34": 3.316122531890869, - "35": 4.133999824523926, - "36": 3.9942827224731445, - "37": 3.4199929237365723, - "38": 3.7397119998931885, - "39": 3.6552629470825195, - "40": 3.303652763366699, - "41": 3.5317206382751465, - "42": 3.6163101196289062, - "43": 3.2882792949676514, - "44": 3.2394442558288574, - "45": 3.826599597930908, - "46": 3.640352487564087, - "47": 3.0058960914611816, - "48": 3.12412166595459, - "49": 3.574035167694092, - "50": 3.350635290145874, - "51": 3.607766628265381, - "52": 3.1396865844726562, - "53": 3.1181774139404297 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 0.00874362, - 0.00824185, - 0.0868173, - 0.0631661, - 0.00938435, - 0.0816343, - 0.0997668, - 0.203701, - 0.219973, - 0.0712366, - 0.129183, - 0.0467787, - 0.388544, - 0.00237935, - 0.171925, - 0.0529542, - 0.0218905, - 0.0593372, - 0.0572176, - 0.150999, - 0.195446, - 0.00680695, - 0.130105, - 0.00247905, - 0.0469306, - 0.00899257, - 0.188485, - 0.0332515, - 0.258699, - 0.110323, - 0.0843518, - 0.0370211, - 0.175226, - 0.0247077, - 0.201426, - 0.0211925, - 0.117723, - 0.113674, - 0.0469413, - 0.139529, - 0.162929, - 0.323637, - 0.102561, - 0.305912, - 0.309994, - 0.0344304, - 0.0456305, - 0.523566, - 0.248142, - 0.282086, - 0.350123, - 0.0705255, - 0.508247, - 0.269409 - ], - "train_epoch_time": 5.055591344833374, - "train_loss": 3.3950779705512812, - "train_score": 0.18294364242163758, - "val_loss": 3.386549778017467, - "val_score": 0.17807746150805673 - }, - { - "epoch": 1, - "grad_norm": 1.2744216918945312, - "learning_rate": 0.215, - "model_norm": 87.44254302978516, - "step_logs": { - "grad_norm": { - "54": 2.9588420391082764, - "55": 4.551698684692383, - "56": 2.997354030609131, - "57": 2.449770927429199, - "58": 4.22332763671875, - "59": 5.804546356201172, - "60": 2.8611626625061035, - "61": 6.422741413116455, - "62": 2.121128797531128, - "63": 2.7550430297851562, - "64": 4.218456268310547, - "65": 3.298926591873169, - "66": 3.0421950817108154, - "67": 13.084441184997559, - "68": 2.878300905227661, - "69": 2.4003987312316895, - "70": 8.343585014343262, - "71": 2.248182535171509, - "72": 2.7363319396972656, - "73": 2.3988842964172363, - "74": 3.64451265335083, - "75": 2.167081594467163, - "76": 1.8511990308761597, - "77": 2.448065996170044, - "78": 1.6482326984405518, - "79": 1.3638733625411987, - "80": 1.453316569328308, - "81": 1.8655786514282227, - "82": 2.458991765975952, - "83": 2.3232791423797607, - "84": 2.0435938835144043, - "85": 1.2519172430038452, - "86": 1.0352760553359985, - "87": 1.4385895729064941, - "88": 1.5370991230010986, - "89": 1.914219617843628, - "90": 1.659883975982666, - "91": 1.1214090585708618, - "92": 1.267828106880188, - "93": 1.8239319324493408, - "94": 1.550571322441101, - "95": 1.0800126791000366, - "96": 1.2024586200714111, - "97": 1.4818787574768066, - "98": 2.308434009552002, - "99": 1.788259744644165, - "100": 1.1262332201004028, - "101": 1.1004700660705566, - "102": 1.3424655199050903, - "103": 1.4440902471542358, - "104": 1.3265347480773926, - "105": 1.2666053771972656, - "106": 1.3224139213562012, - "107": 1.2744216918945312 - }, - "loss": { - "54": 3.394242286682129, - "55": 3.3434767723083496, - "56": 3.417769193649292, - "57": 2.9999172687530518, - "58": 3.295624256134033, - "59": 3.4924838542938232, - "60": 3.0930306911468506, - "61": 3.2670469284057617, - "62": 2.8934435844421387, - "63": 2.8680472373962402, - "64": 3.2922112941741943, - "65": 2.977743625640869, - "66": 3.392702102661133, - "67": 3.770906448364258, - "68": 2.9260621070861816, - "69": 3.0121850967407227, - "70": 3.5799572467803955, - "71": 2.991973638534546, - "72": 2.9055001735687256, - "73": 3.136777877807617, - "74": 2.968360185623169, - "75": 3.387712001800537, - "76": 2.925246238708496, - "77": 2.841588020324707, - "78": 2.9415340423583984, - "79": 2.723445415496826, - "80": 2.7015414237976074, - "81": 2.7253923416137695, - "82": 2.829561233520508, - "83": 2.9352569580078125, - "84": 2.9875247478485107, - "85": 2.701293468475342, - "86": 2.61785626411438, - "87": 2.6081013679504395, - "88": 2.7314963340759277, - "89": 2.6647980213165283, - "90": 2.8540682792663574, - "91": 2.642575740814209, - "92": 2.6175789833068848, - "93": 2.6791069507598877, - "94": 2.755443811416626, - "95": 2.6256582736968994, - "96": 2.6011414527893066, - "97": 2.6784753799438477, - "98": 2.6998097896575928, - "99": 2.9472389221191406, - "100": 2.6260838508605957, - "101": 2.6080284118652344, - "102": 2.620018482208252, - "103": 2.6422929763793945, - "104": 2.653263568878174, - "105": 2.6121649742126465, - "106": 2.6141340732574463, - "107": 2.6211490631103516 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.387703, - 0.161381, - 0.380423, - 0.499871, - 0.184769, - 0.103657, - 0.377832, - 0.0791981, - 0.643104, - 0.377859, - 0.185004, - 0.273616, - 0.366582, - 0.022026, - 0.353192, - 0.522775, - 0.0514248, - 0.591963, - 0.388046, - 0.545086, - 0.223479, - 0.721366, - 0.853604, - 0.474149, - 1.08277, - 1.4641, - 1.27906, - 0.783072, - 0.467956, - 0.543805, - 0.715356, - 1.72354, - 2.44249, - 1.26023, - 1.1561, - 0.727245, - 1.03588, - 2.10135, - 1.62847, - 0.805328, - 1.14606, - 2.25103, - 1.79897, - 1.21973, - 0.506639, - 0.921625, - 2.07039, - 2.15355, - 1.45378, - 1.26705, - 1.5078, - 1.62824, - 1.49483, - 1.61386 - ], - "train_epoch_time": 5.053322792053223, - "train_loss": 2.6014165839300607, - "train_score": 0.24673601150769242, - "val_loss": 2.6329931763912864, - "val_score": 0.2444254088757643 - }, - { - "epoch": 2, - "grad_norm": 1.1091136932373047, - "learning_rate": 0.215, - "model_norm": 87.45811462402344, - "step_logs": { - "grad_norm": { - "108": 1.3537793159484863, - "109": 1.5923566818237305, - "110": 2.557115316390991, - "111": 1.9047725200653076, - "112": 1.3393384218215942, - "113": 1.1124268770217896, - "114": 1.282261610031128, - "115": 1.4741277694702148, - "116": 1.5760860443115234, - "117": 1.3580493927001953, - "118": 1.3671592473983765, - "119": 1.5843846797943115, - "120": 2.2419321537017822, - "121": 1.587849497795105, - "122": 1.1053768396377563, - "123": 0.9108991026878357, - "124": 0.9168004393577576, - "125": 1.0370756387710571, - "126": 1.1651623249053955, - "127": 1.484588623046875, - "128": 1.6852039098739624, - "129": 1.5998611450195312, - "130": 1.4381487369537354, - "131": 1.3622264862060547, - "132": 1.3824046850204468, - "133": 1.436284065246582, - "134": 1.4441611766815186, - "135": 1.2695380449295044, - "136": 1.2396032810211182, - "137": 1.3548280000686646, - "138": 1.272579312324524, - "139": 1.1266461610794067, - "140": 1.1946275234222412, - "141": 1.5436118841171265, - "142": 1.408092975616455, - "143": 1.094925045967102, - "144": 1.0771726369857788, - "145": 1.228016972541809, - "146": 1.2998939752578735, - "147": 1.3817087411880493, - "148": 1.3504518270492554, - "149": 1.2477402687072754, - "150": 1.1859843730926514, - "151": 1.156193494796753, - "152": 1.15631103515625, - "153": 1.1957241296768188, - "154": 1.2543359994888306, - "155": 1.1964917182922363, - "156": 1.1030373573303223, - "157": 1.2358675003051758, - "158": 1.2464865446090698, - "159": 1.3116198778152466, - "160": 1.2531845569610596, - "161": 1.1091136932373047 - }, - "loss": { - "108": 2.610424518585205, - "109": 2.6543350219726562, - "110": 2.728701591491699, - "111": 2.9849181175231934, - "112": 2.641246795654297, - "113": 2.5753979682922363, - "114": 2.6039133071899414, - "115": 2.6467647552490234, - "116": 2.6306467056274414, - "117": 2.642721176147461, - "118": 2.576988697052002, - "119": 2.6863064765930176, - "120": 2.6805765628814697, - "121": 2.8543100357055664, - "122": 2.6050119400024414, - "123": 2.5657882690429688, - "124": 2.5764946937561035, - "125": 2.564035415649414, - "126": 2.5523557662963867, - "127": 2.5930418968200684, - "128": 2.6834757328033447, - "129": 2.67267107963562, - "130": 2.655188798904419, - "131": 2.6000471115112305, - "132": 2.6280689239501953, - "133": 2.588824510574341, - "134": 2.648529052734375, - "135": 2.5908961296081543, - "136": 2.5875887870788574, - "137": 2.5850300788879395, - "138": 2.621670961380005, - "139": 2.570456027984619, - "140": 2.5660643577575684, - "141": 2.5941407680511475, - "142": 2.652806282043457, - "143": 2.5749118328094482, - "144": 2.5511906147003174, - "145": 2.5554089546203613, - "146": 2.5818586349487305, - "147": 2.5662527084350586, - "148": 2.6069812774658203, - "149": 2.5674352645874023, - "150": 2.5896189212799072, - "151": 2.565016269683838, - "152": 2.561311721801758, - "153": 2.5388855934143066, - "154": 2.5716705322265625, - "155": 2.5705032348632812, - "156": 2.5561070442199707, - "157": 2.5741374492645264, - "158": 2.5815210342407227, - "159": 2.5703485012054443, - "160": 2.6151628494262695, - "161": 2.538440227508545 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 1.42435, - 1.04683, - 0.417307, - 0.822709, - 1.47241, - 2.08114, - 1.5837, - 1.21799, - 1.05902, - 1.43291, - 1.37871, - 1.07012, - 0.533314, - 1.13209, - 2.13201, - 3.09229, - 3.06535, - 2.38398, - 1.88005, - 1.17651, - 0.944915, - 1.04419, - 1.28377, - 1.40114, - 1.3752, - 1.25494, - 1.26991, - 1.60753, - 1.68395, - 1.40831, - 1.61886, - 2.02505, - 1.79805, - 1.08872, - 1.33796, - 2.1478, - 2.19873, - 1.69454, - 1.52798, - 1.34421, - 1.42949, - 1.64912, - 1.8411, - 1.9188, - 1.91564, - 1.77575, - 1.63451, - 1.79556, - 2.10087, - 1.68534, - 1.6615, - 1.49409, - 1.66521, - 2.06355 - ], - "train_epoch_time": 5.052363157272339, - "train_loss": 2.5611019456061603, - "train_score": 0.23936849891113243, - "val_loss": 2.598254974345526, - "val_score": 0.23617339989621658 - }, - { - "epoch": 3, - "grad_norm": 1.1446658372879028, - "learning_rate": 0.215, - "model_norm": 87.47420501708984, - "step_logs": { - "grad_norm": { - "162": 1.0795036554336548, - "163": 1.1948630809783936, - "164": 1.313206672668457, - "165": 1.3505170345306396, - "166": 1.2679893970489502, - "167": 0.9987155795097351, - "168": 0.9981672763824463, - "169": 1.0860553979873657, - "170": 1.3168418407440186, - "171": 1.5022683143615723, - "172": 1.4740920066833496, - "173": 1.2219743728637695, - "174": 1.0660455226898193, - "175": 0.9779313206672668, - "176": 0.9448350071907043, - "177": 1.0835472345352173, - "178": 1.2158117294311523, - "179": 1.3276386260986328, - "180": 1.3913034200668335, - "181": 1.284597635269165, - "182": 1.0919030904769897, - "183": 1.0397491455078125, - "184": 1.2926994562149048, - "185": 1.3351250886917114, - "186": 1.1868009567260742, - "187": 1.0680104494094849, - "188": 1.1032730340957642, - "189": 1.1434894800186157, - "190": 1.0390969514846802, - "191": 0.9297041296958923, - "192": 0.9079300165176392, - "193": 1.0789446830749512, - "194": 1.2187674045562744, - "195": 1.5853400230407715, - "196": 1.4148362874984741, - "197": 1.177739143371582, - "198": 1.0950336456298828, - "199": 1.253442406654358, - "200": 1.3364614248275757, - "201": 1.11751127243042, - "202": 1.0730769634246826, - "203": 1.3222599029541016, - "204": 1.2656381130218506, - "205": 1.0810356140136719, - "206": 1.1039373874664307, - "207": 1.1099750995635986, - "208": 1.1167949438095093, - "209": 1.25467050075531, - "210": 1.2758673429489136, - "211": 1.0997803211212158, - "212": 1.0883212089538574, - "213": 1.2073808908462524, - "214": 1.289371132850647, - "215": 1.1446658372879028 - }, - "loss": { - "162": 2.555593490600586, - "163": 2.5555341243743896, - "164": 2.5946128368377686, - "165": 2.557882070541382, - "166": 2.5956897735595703, - "167": 2.5656960010528564, - "168": 2.513338327407837, - "169": 2.528416156768799, - "170": 2.5558478832244873, - "171": 2.597238063812256, - "172": 2.6018199920654297, - "173": 2.557772397994995, - "174": 2.5404086112976074, - "175": 2.5234920978546143, - "176": 2.495227813720703, - "177": 2.5375282764434814, - "178": 2.5435545444488525, - "179": 2.582265853881836, - "180": 2.5569159984588623, - "181": 2.605103015899658, - "182": 2.5344672203063965, - "183": 2.523806571960449, - "184": 2.5452044010162354, - "185": 2.5882067680358887, - "186": 2.5537099838256836, - "187": 2.5519070625305176, - "188": 2.5399184226989746, - "189": 2.5384864807128906, - "190": 2.53171968460083, - "191": 2.509122371673584, - "192": 2.5117125511169434, - "193": 2.513850212097168, - "194": 2.538954019546509, - "195": 2.5605599880218506, - "196": 2.6236510276794434, - "197": 2.5396456718444824, - "198": 2.5503299236297607, - "199": 2.5266995429992676, - "200": 2.5790867805480957, - "201": 2.530428409576416, - "202": 2.539207935333252, - "203": 2.536179304122925, - "204": 2.5748157501220703, - "205": 2.5254759788513184, - "206": 2.5426840782165527, - "207": 2.5105419158935547, - "208": 2.528724193572998, - "209": 2.5420632362365723, - "210": 2.576490879058838, - "211": 2.5136027336120605, - "212": 2.5271058082580566, - "213": 2.530677318572998, - "214": 2.571873188018799, - "215": 2.538135528564453 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 2.19302, - 1.78997, - 1.50455, - 1.40243, - 1.61444, - 2.5723, - 2.52258, - 2.1436, - 1.4739, - 1.15084, - 1.19737, - 1.71292, - 2.23538, - 2.63867, - 2.79511, - 2.1613, - 1.72071, - 1.46501, - 1.32091, - 1.57867, - 2.12578, - 2.33453, - 1.5231, - 1.45196, - 1.81308, - 2.23725, - 2.08667, - 1.94138, - 2.34479, - 2.9029, - 3.04695, - 2.15944, - 1.70928, - 1.0188, - 1.31067, - 1.83094, - 2.12687, - 1.60822, - 1.44395, - 2.02624, - 2.20514, - 1.4506, - 1.60741, - 2.16104, - 2.08643, - 2.0377, - 2.02747, - 1.61483, - 1.58277, - 2.07819, - 2.13358, - 1.73599, - 1.54701, - 1.93712 - ], - "train_epoch_time": 5.053134441375732, - "train_loss": 2.5162249922239286, - "train_score": 0.24222673056286412, - "val_loss": 2.556968991162446, - "val_score": 0.23407900437763993 - }, - { - "epoch": 4, - "grad_norm": 1.060787320137024, - "learning_rate": 0.215, - "model_norm": 87.49299621582031, - "step_logs": { - "grad_norm": { - "216": 0.9633231163024902, - "217": 1.0106887817382812, - "218": 1.0738520622253418, - "219": 1.2391701936721802, - "220": 1.1446963548660278, - "221": 0.955525279045105, - "222": 1.0772483348846436, - "223": 1.2479866743087769, - "224": 1.2095543146133423, - "225": 1.1579902172088623, - "226": 1.0227599143981934, - "227": 1.0297197103500366, - "228": 0.9692851901054382, - "229": 0.8947920799255371, - "230": 0.9824771881103516, - "231": 1.0812528133392334, - "232": 1.198651671409607, - "233": 1.1882545948028564, - "234": 1.1107783317565918, - "235": 1.1343162059783936, - "236": 1.2151082754135132, - "237": 1.3301852941513062, - "238": 1.3763662576675415, - "239": 1.3176782131195068, - "240": 1.2533372640609741, - "241": 1.0426181554794312, - "242": 0.9654722213745117, - "243": 0.986422061920166, - "244": 1.1485326290130615, - "245": 1.2290763854980469, - "246": 1.1783108711242676, - "247": 1.0896384716033936, - "248": 1.0137453079223633, - "249": 0.9478561878204346, - "250": 1.0650352239608765, - "251": 1.2701523303985596, - "252": 0.9598528742790222, - "253": 0.9563704133033752, - "254": 1.3311500549316406, - "255": 1.385802984237671, - "256": 1.1627907752990723, - "257": 1.1118957996368408, - "258": 1.090212345123291, - "259": 1.122296690940857, - "260": 1.2211205959320068, - "261": 1.1986427307128906, - "262": 1.0584967136383057, - "263": 1.1139775514602661, - "264": 1.1434112787246704, - "265": 1.111130714416504, - "266": 1.1608188152313232, - "267": 1.2084766626358032, - "268": 1.1846649646759033, - "269": 1.060787320137024 - }, - "loss": { - "216": 2.5163536071777344, - "217": 2.4914841651916504, - "218": 2.5015335083007812, - "219": 2.5429632663726807, - "220": 2.568941593170166, - "221": 2.49544095993042, - "222": 2.532780170440674, - "223": 2.5202486515045166, - "224": 2.56973934173584, - "225": 2.503185272216797, - "226": 2.5275704860687256, - "227": 2.4948208332061768, - "228": 2.5221123695373535, - "229": 2.481478214263916, - "230": 2.499135971069336, - "231": 2.5088982582092285, - "232": 2.5080246925354004, - "233": 2.526634693145752, - "234": 2.529207706451416, - "235": 2.512040853500366, - "236": 2.546560764312744, - "237": 2.5465004444122314, - "238": 2.5607752799987793, - "239": 2.549367904663086, - "240": 2.5440845489501953, - "241": 2.5321335792541504, - "242": 2.4821934700012207, - "243": 2.500823974609375, - "244": 2.4934394359588623, - "245": 2.537851095199585, - "246": 2.4922428131103516, - "247": 2.515509605407715, - "248": 2.5096993446350098, - "249": 2.492447853088379, - "250": 2.4846715927124023, - "251": 2.5338258743286133, - "252": 2.53157639503479, - "253": 2.499506950378418, - "254": 2.4946110248565674, - "255": 2.57424259185791, - "256": 2.506796360015869, - "257": 2.5065479278564453, - "258": 2.4947104454040527, - "259": 2.499516487121582, - "260": 2.5047781467437744, - "261": 2.5250051021575928, - "262": 2.4777143001556396, - "263": 2.512481689453125, - "264": 2.5152482986450195, - "265": 2.5075490474700928, - "266": 2.4944794178009033, - "267": 2.510552406311035, - "268": 2.50632381439209, - "269": 2.4979054927825928 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 2.71161, - 2.43906, - 2.16929, - 1.65607, - 1.96053, - 2.73315, - 2.18256, - 1.61817, - 1.75646, - 1.86674, - 2.41633, - 2.35289, - 2.68449, - 3.09932, - 2.58908, - 2.14599, - 1.7456, - 1.78947, - 2.04989, - 1.95235, - 1.72474, - 1.43919, - 1.35177, - 1.4683, - 1.61955, - 2.32936, - 2.66291, - 2.57014, - 1.89022, - 1.68, - 1.79503, - 2.11866, - 2.4421, - 2.77422, - 2.19049, - 1.5706, - 2.74778, - 2.73276, - 1.40783, - 1.34044, - 1.85403, - 2.02744, - 2.09893, - 1.98445, - 1.67978, - 1.75745, - 2.21142, - 2.02465, - 1.92387, - 2.03104, - 1.85119, - 1.71907, - 1.78585, - 2.21983 - ], - "train_epoch_time": 5.052665948867798, - "train_loss": 2.47574853609761, - "train_score": 0.25657057017301726, - "val_loss": 2.5230224606090244, - "val_score": 0.25065029451001253 - }, - { - "epoch": 5, - "grad_norm": 1.9789291620254517, - "learning_rate": 0.215, - "model_norm": 87.51495361328125, - "step_logs": { - "grad_norm": { - "270": 1.0404844284057617, - "271": 1.054120659828186, - "272": 1.0576982498168945, - "273": 1.0886740684509277, - "274": 1.2677284479141235, - "275": 1.4281651973724365, - "276": 1.227057933807373, - "277": 1.1613863706588745, - "278": 1.0496907234191895, - "279": 1.0507171154022217, - "280": 1.2304166555404663, - "281": 1.2998121976852417, - "282": 1.2435519695281982, - "283": 1.1221468448638916, - "284": 1.0892112255096436, - "285": 1.1592762470245361, - "286": 1.5829782485961914, - "287": 1.3556159734725952, - "288": 1.3460111618041992, - "289": 1.287331461906433, - "290": 1.2054649591445923, - "291": 1.216613531112671, - "292": 1.2182255983352661, - "293": 1.2035558223724365, - "294": 1.3098182678222656, - "295": 1.1993869543075562, - "296": 1.1196050643920898, - "297": 1.1932202577590942, - "298": 1.3162342309951782, - "299": 1.2452771663665771, - "300": 1.1721611022949219, - "301": 1.389076828956604, - "302": 1.4930850267410278, - "303": 1.3326239585876465, - "304": 1.0249804258346558, - "305": 1.2022465467453003, - "306": 1.3210219144821167, - "307": 1.347138524055481, - "308": 1.3398345708847046, - "309": 1.3874322175979614, - "310": 1.4049382209777832, - "311": 1.266074776649475, - "312": 1.2584120035171509, - "313": 1.1598255634307861, - "314": 1.0752277374267578, - "315": 1.1054651737213135, - "316": 1.2830973863601685, - "317": 1.3031442165374756, - "318": 1.2562724351882935, - "319": 1.3067193031311035, - "320": 1.253655195236206, - "321": 1.1800053119659424, - "322": 1.389737844467163, - "323": 1.9789291620254517 - }, - "loss": { - "270": 2.481053113937378, - "271": 2.4931485652923584, - "272": 2.47379469871521, - "273": 2.4876770973205566, - "274": 2.4740099906921387, - "275": 2.5152814388275146, - "276": 2.506864547729492, - "277": 2.4897401332855225, - "278": 2.4641270637512207, - "279": 2.4755802154541016, - "280": 2.463754653930664, - "281": 2.5001280307769775, - "282": 2.4866793155670166, - "283": 2.494428873062134, - "284": 2.440568447113037, - "285": 2.464487075805664, - "286": 2.4941728115081787, - "287": 2.567190170288086, - "288": 2.4898314476013184, - "289": 2.5082783699035645, - "290": 2.459127902984619, - "291": 2.4648776054382324, - "292": 2.4492084980010986, - "293": 2.4543380737304688, - "294": 2.4661977291107178, - "295": 2.4770710468292236, - "296": 2.4436912536621094, - "297": 2.458850383758545, - "298": 2.457784652709961, - "299": 2.4822587966918945, - "300": 2.445441484451294, - "301": 2.4586124420166016, - "302": 2.5083487033843994, - "303": 2.4750165939331055, - "304": 2.436406135559082, - "305": 2.4328064918518066, - "306": 2.4731924533843994, - "307": 2.4277968406677246, - "308": 2.4908194541931152, - "309": 2.4814276695251465, - "310": 2.494682788848877, - "311": 2.4678120613098145, - "312": 2.4374711513519287, - "313": 2.445601463317871, - "314": 2.4159693717956543, - "315": 2.407942295074463, - "316": 2.392904043197632, - "317": 2.4680347442626953, - "318": 2.426144599914551, - "319": 2.4157941341400146, - "320": 2.4545459747314453, - "321": 2.4055252075195312, - "322": 2.4295005798339844, - "323": 2.484586715698242 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 2.29174, - 2.24371, - 2.21126, - 2.09893, - 1.53939, - 1.23319, - 1.66495, - 1.84587, - 2.23635, - 2.24236, - 1.6274, - 1.47979, - 1.60802, - 1.98094, - 2.05715, - 1.8338, - 0.995352, - 1.39696, - 1.37427, - 1.51354, - 1.69228, - 1.66529, - 1.65033, - 1.69435, - 1.43749, - 1.72195, - 1.94947, - 1.72699, - 1.41866, - 1.60072, - 1.77985, - 1.2742, - 1.12517, - 1.39368, - 2.31909, - 1.68314, - 1.41722, - 1.33779, - 1.38752, - 1.28907, - 1.26387, - 1.53955, - 1.5392, - 1.81803, - 2.08973, - 1.97041, - 1.45347, - 1.45334, - 1.53727, - 1.4148, - 1.56176, - 1.7276, - 1.25791, - 0.634445 - ], - "train_epoch_time": 5.052392244338989, - "train_loss": 2.5304164905630193, - "train_score": 0.2597583392685763, - "val_loss": 2.5800164375458463, - "val_score": 0.25274917507281125 - }, - { - "epoch": 6, - "grad_norm": 1.1846872568130493, - "learning_rate": 0.215, - "model_norm": 87.54618835449219, - "step_logs": { - "grad_norm": { - "324": 1.3573976755142212, - "325": 1.3110039234161377, - "326": 1.3333898782730103, - "327": 1.2504231929779053, - "328": 1.1230586767196655, - "329": 1.0541247129440308, - "330": 1.1783971786499023, - "331": 1.3918724060058594, - "332": 1.244531273841858, - "333": 1.0875149965286255, - "334": 1.1421831846237183, - "335": 1.2019091844558716, - "336": 1.159886360168457, - "337": 1.1704378128051758, - "338": 1.1401429176330566, - "339": 0.9999255537986755, - "340": 1.0166006088256836, - "341": 1.082255482673645, - "342": 1.1286462545394897, - "343": 1.2869051694869995, - "344": 1.3348263502120972, - "345": 1.2356014251708984, - "346": 1.1513980627059937, - "347": 1.095396876335144, - "348": 1.2476483583450317, - "349": 1.450863242149353, - "350": 1.7418878078460693, - "351": 1.4917750358581543, - "352": 1.3195232152938843, - "353": 1.393325924873352, - "354": 1.245233178138733, - "355": 1.117339849472046, - "356": 1.1995913982391357, - "357": 1.1889171600341797, - "358": 1.1929136514663696, - "359": 1.3817616701126099, - "360": 1.3203043937683105, - "361": 1.130623459815979, - "362": 1.0869464874267578, - "363": 1.0876266956329346, - "364": 1.0385804176330566, - "365": 1.1228587627410889, - "366": 1.2124336957931519, - "367": 1.2343508005142212, - "368": 1.1473438739776611, - "369": 0.9730501174926758, - "370": 1.0877901315689087, - "371": 1.2865344285964966, - "372": 1.4649385213851929, - "373": 1.4533758163452148, - "374": 1.3768742084503174, - "375": 1.2845958471298218, - "376": 1.2969180345535278, - "377": 1.1846872568130493 - }, - "loss": { - "324": 2.5348188877105713, - "325": 2.517831802368164, - "326": 2.469292163848877, - "327": 2.4391627311706543, - "328": 2.432940721511841, - "329": 2.39764404296875, - "330": 2.4189252853393555, - "331": 2.4243788719177246, - "332": 2.4466824531555176, - "333": 2.3837130069732666, - "334": 2.3925302028656006, - "335": 2.4191460609436035, - "336": 2.4181969165802, - "337": 2.38248872756958, - "338": 2.4586009979248047, - "339": 2.376631259918213, - "340": 2.3857100009918213, - "341": 2.381657361984253, - "342": 2.38150954246521, - "343": 2.3754851818084717, - "344": 2.4282846450805664, - "345": 2.3892898559570312, - "346": 2.3803272247314453, - "347": 2.3713722229003906, - "348": 2.391658306121826, - "349": 2.415534019470215, - "350": 2.4277889728546143, - "351": 2.4933362007141113, - "352": 2.4133195877075195, - "353": 2.4069058895111084, - "354": 2.4253978729248047, - "355": 2.3844547271728516, - "356": 2.401344060897827, - "357": 2.390613079071045, - "358": 2.389570713043213, - "359": 2.3708393573760986, - "360": 2.404020309448242, - "361": 2.3591580390930176, - "362": 2.3566761016845703, - "363": 2.348598003387451, - "364": 2.3539350032806396, - "365": 2.3588826656341553, - "366": 2.3553085327148438, - "367": 2.3789544105529785, - "368": 2.380356788635254, - "369": 2.3502817153930664, - "370": 2.346113920211792, - "371": 2.3712704181671143, - "372": 2.3816232681274414, - "373": 2.4218852519989014, - "374": 2.3745689392089844, - "375": 2.371290445327759, - "376": 2.356919288635254, - "377": 2.3802976608276367 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 1.37573, - 1.46494, - 1.38886, - 1.56001, - 1.92898, - 2.15775, - 1.74196, - 1.25142, - 1.57967, - 2.0155, - 1.83394, - 1.67463, - 1.79747, - 1.73914, - 1.89134, - 2.37699, - 2.30843, - 2.03339, - 1.86955, - 1.43436, - 1.36286, - 1.56499, - 1.7955, - 1.97632, - 1.53644, - 1.14752, - 0.800148, - 1.1204, - 1.38606, - 1.23981, - 1.56416, - 1.90993, - 1.66874, - 1.69124, - 1.6792, - 1.24175, - 1.37908, - 1.84553, - 1.99473, - 1.9854, - 2.1823, - 1.87092, - 1.60226, - 1.56138, - 1.80823, - 2.48227, - 1.98271, - 1.43264, - 1.10977, - 1.14656, - 1.25255, - 1.43698, - 1.40126, - 1.69599 - ], - "train_epoch_time": 5.0534539222717285, - "train_loss": 2.324898044142867, - "train_score": 0.3233926650783424, - "val_loss": 2.37294156707387, - "val_score": 0.3126345435253651 - }, - { - "epoch": 7, - "grad_norm": 1.1531764268875122, - "learning_rate": 0.215, - "model_norm": 87.57366943359375, - "step_logs": { - "grad_norm": { - "378": 1.1289430856704712, - "379": 1.243653416633606, - "380": 1.3153750896453857, - "381": 1.3431339263916016, - "382": 1.3326919078826904, - "383": 1.3011196851730347, - "384": 1.2887794971466064, - "385": 1.31019926071167, - "386": 1.1913291215896606, - "387": 1.1776820421218872, - "388": 1.1971888542175293, - "389": 1.3472508192062378, - "390": 1.363603115081787, - "391": 1.162752389907837, - "392": 1.0115514993667603, - "393": 1.0417031049728394, - "394": 1.1638439893722534, - "395": 1.1206867694854736, - "396": 0.9458897709846497, - "397": 0.9332041144371033, - "398": 0.9492629766464233, - "399": 1.1382262706756592, - "400": 1.2862460613250732, - "401": 1.3330069780349731, - "402": 1.309767246246338, - "403": 1.2581886053085327, - "404": 1.23268723487854, - "405": 1.2677524089813232, - "406": 1.2518386840820312, - "407": 1.2291858196258545, - "408": 1.206500768661499, - "409": 1.1809968948364258, - "410": 1.2421176433563232, - "411": 1.1774324178695679, - "412": 1.0919493436813354, - "413": 1.2882260084152222, - "414": 1.288912296295166, - "415": 1.2558650970458984, - "416": 1.156387448310852, - "417": 1.1074306964874268, - "418": 1.2333728075027466, - "419": 1.3118255138397217, - "420": 1.2340978384017944, - "421": 1.3532516956329346, - "422": 1.341996192932129, - "423": 1.3310952186584473, - "424": 1.3048169612884521, - "425": 1.2553791999816895, - "426": 1.0900002717971802, - "427": 0.8893017172813416, - "428": 1.004921555519104, - "429": 1.3000060319900513, - "430": 1.3357574939727783, - "431": 1.1531764268875122 - }, - "loss": { - "378": 2.3454017639160156, - "379": 2.3728151321411133, - "380": 2.3513553142547607, - "381": 2.3730781078338623, - "382": 2.352410316467285, - "383": 2.38809871673584, - "384": 2.3483800888061523, - "385": 2.3708133697509766, - "386": 2.3452467918395996, - "387": 2.32724928855896, - "388": 2.3073418140411377, - "389": 2.350141763687134, - "390": 2.4108469486236572, - "391": 2.361056327819824, - "392": 2.3009684085845947, - "393": 2.312511444091797, - "394": 2.322472095489502, - "395": 2.345559597015381, - "396": 2.3028035163879395, - "397": 2.30960750579834, - "398": 2.2892653942108154, - "399": 2.325058937072754, - "400": 2.3363466262817383, - "401": 2.3556876182556152, - "402": 2.358995199203491, - "403": 2.351083755493164, - "404": 2.350637674331665, - "405": 2.3232100009918213, - "406": 2.3398730754852295, - "407": 2.3386454582214355, - "408": 2.3467605113983154, - "409": 2.315364360809326, - "410": 2.318145751953125, - "411": 2.3347935676574707, - "412": 2.3186306953430176, - "413": 2.303685188293457, - "414": 2.3645644187927246, - "415": 2.318037509918213, - "416": 2.3407301902770996, - "417": 2.310563087463379, - "418": 2.308903217315674, - "419": 2.341179370880127, - "420": 2.3423972129821777, - "421": 2.3010945320129395, - "422": 2.374690055847168, - "423": 2.324794054031372, - "424": 2.348637104034424, - "425": 2.3122482299804688, - "426": 2.3057942390441895, - "427": 2.269679546356201, - "428": 2.284641742706299, - "429": 2.3086280822753906, - "430": 2.349426746368408, - "431": 2.304624557495117 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 1.84023, - 1.53414, - 1.359, - 1.31545, - 1.3245, - 1.41065, - 1.41388, - 1.38109, - 1.65244, - 1.67798, - 1.60985, - 1.29478, - 1.29656, - 1.74635, - 2.24872, - 2.13106, - 1.71459, - 1.86757, - 2.57381, - 2.65207, - 2.54052, - 1.79464, - 1.41218, - 1.32572, - 1.37511, - 1.48517, - 1.54696, - 1.4455, - 1.49312, - 1.54785, - 1.61218, - 1.66005, - 1.5025, - 1.68413, - 1.94458, - 1.38816, - 1.42333, - 1.46972, - 1.75043, - 1.88402, - 1.51781, - 1.36045, - 1.53802, - 1.25654, - 1.31857, - 1.3121, - 1.37948, - 1.46718, - 1.94074, - 2.8699, - 2.26232, - 1.36604, - 1.31676, - 1.73304 - ], - "train_epoch_time": 5.05295467376709, - "train_loss": 2.3001526758694752, - "train_score": 0.3114732783186213, - "val_loss": 2.3597924676472504, - "val_score": 0.299180181134582 - }, - { - "epoch": 8, - "grad_norm": 1.2681738138198853, - "learning_rate": 0.215, - "model_norm": 87.60327911376953, - "step_logs": { - "grad_norm": { - "432": 1.1657077074050903, - "433": 1.0624216794967651, - "434": 0.9508992433547974, - "435": 1.0236607789993286, - "436": 1.0205790996551514, - "437": 1.1780050992965698, - "438": 1.1690641641616821, - "439": 0.9694347381591797, - "440": 0.855843722820282, - "441": 0.9146633744239807, - "442": 0.9712487459182739, - "443": 1.1102378368377686, - "444": 1.2463276386260986, - "445": 1.3122589588165283, - "446": 1.3411824703216553, - "447": 1.3111793994903564, - "448": 1.1978651285171509, - "449": 1.1386679410934448, - "450": 1.1126952171325684, - "451": 1.1515297889709473, - "452": 1.554506778717041, - "453": 1.7724924087524414, - "454": 1.4600499868392944, - "455": 1.2319329977035522, - "456": 1.2554680109024048, - "457": 1.1865328550338745, - "458": 1.2981535196304321, - "459": 1.3265959024429321, - "460": 1.3346232175827026, - "461": 1.337964653968811, - "462": 1.0969263315200806, - "463": 0.9672433137893677, - "464": 1.0292184352874756, - "465": 1.225926399230957, - "466": 1.2225052118301392, - "467": 1.1589717864990234, - "468": 1.1485871076583862, - "469": 1.1403145790100098, - "470": 1.1448750495910645, - "471": 1.07486891746521, - "472": 1.0754045248031616, - "473": 1.0469359159469604, - "474": 0.9945337176322937, - "475": 1.0120408535003662, - "476": 1.1361474990844727, - "477": 1.2197010517120361, - "478": 1.2575366497039795, - "479": 1.2691034078598022, - "480": 1.3010587692260742, - "481": 1.2214558124542236, - "482": 1.2115081548690796, - "483": 1.3336796760559082, - "484": 1.3091402053833008, - "485": 1.2681738138198853 - }, - "loss": { - "432": 2.3262267112731934, - "433": 2.289912223815918, - "434": 2.285169839859009, - "435": 2.2982277870178223, - "436": 2.2939398288726807, - "437": 2.2875099182128906, - "438": 2.2896578311920166, - "439": 2.269970655441284, - "440": 2.2595419883728027, - "441": 2.2395036220550537, - "442": 2.298501491546631, - "443": 2.263517379760742, - "444": 2.332143545150757, - "445": 2.3122246265411377, - "446": 2.3136560916900635, - "447": 2.3255043029785156, - "448": 2.3160669803619385, - "449": 2.2910187244415283, - "450": 2.2740893363952637, - "451": 2.2703871726989746, - "452": 2.3054635524749756, - "453": 2.380159378051758, - "454": 2.3471803665161133, - "455": 2.3172059059143066, - "456": 2.279041051864624, - "457": 2.317080020904541, - "458": 2.2779951095581055, - "459": 2.3112294673919678, - "460": 2.286576747894287, - "461": 2.330930709838867, - "462": 2.2930221557617188, - "463": 2.2537410259246826, - "464": 2.254612922668457, - "465": 2.26353120803833, - "466": 2.2835307121276855, - "467": 2.2856826782226562, - "468": 2.297455310821533, - "469": 2.271739959716797, - "470": 2.3129823207855225, - "471": 2.2743937969207764, - "472": 2.263763904571533, - "473": 2.257481813430786, - "474": 2.246896266937256, - "475": 2.265903949737549, - "476": 2.2495508193969727, - "477": 2.257024049758911, - "478": 2.291917562484741, - "479": 2.2819509506225586, - "480": 2.291085720062256, - "481": 2.2643344402313232, - "482": 2.2799153327941895, - "483": 2.2721385955810547, - "484": 2.281658172607422, - "485": 2.279491424560547 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 1.71188, - 2.02873, - 2.52726, - 2.19321, - 2.20236, - 1.64842, - 1.67531, - 2.41537, - 3.08483, - 2.67688, - 2.4366, - 1.83633, - 1.50138, - 1.34274, - 1.28624, - 1.35267, - 1.61412, - 1.76699, - 1.83677, - 1.71218, - 0.954054, - 0.757595, - 1.10106, - 1.52683, - 1.44591, - 1.64582, - 1.35176, - 1.31331, - 1.28371, - 1.30209, - 1.90569, - 2.40898, - 2.12842, - 1.50611, - 1.52794, - 1.70165, - 1.74148, - 1.74707, - 1.76464, - 1.96859, - 1.95744, - 2.05961, - 2.27166, - 2.21231, - 1.74272, - 1.51715, - 1.4493, - 1.41681, - 1.35347, - 1.5177, - 1.55334, - 1.27741, - 1.33131, - 1.41736 - ], - "train_epoch_time": 5.059791326522827, - "train_loss": 2.278086079415495, - "train_score": 0.32514235111356976, - "val_loss": 2.3398839945634387, - "val_score": 0.3114057124413798 - }, - { - "epoch": 9, - "grad_norm": 1.0351953506469727, - "learning_rate": 0.215, - "model_norm": 87.63327026367188, - "step_logs": { - "grad_norm": { - "486": 1.1684287786483765, - "487": 1.0308504104614258, - "488": 1.0464164018630981, - "489": 1.0797393321990967, - "490": 1.0152900218963623, - "491": 0.9935247898101807, - "492": 1.0372517108917236, - "493": 1.0761717557907104, - "494": 1.056221604347229, - "495": 1.1687315702438354, - "496": 1.201019287109375, - "497": 1.1341582536697388, - "498": 1.0773943662643433, - "499": 1.104231357574463, - "500": 1.2758419513702393, - "501": 1.1967161893844604, - "502": 1.0560189485549927, - "503": 1.0503727197647095, - "504": 1.089692234992981, - "505": 1.169783115386963, - "506": 1.1873635053634644, - "507": 1.1956232786178589, - "508": 1.1792436838150024, - "509": 1.166176676750183, - "510": 1.1644901037216187, - "511": 1.114013910293579, - "512": 1.1182492971420288, - "513": 1.0733829736709595, - "514": 1.024760127067566, - "515": 1.0335193872451782, - "516": 1.029309868812561, - "517": 1.1361197233200073, - "518": 1.2207305431365967, - "519": 1.1736738681793213, - "520": 1.063452959060669, - "521": 1.191510558128357, - "522": 1.1893848180770874, - "523": 1.1825417280197144, - "524": 1.288385033607483, - "525": 1.2547293901443481, - "526": 1.448888897895813, - "527": 1.4466297626495361, - "528": 1.411918044090271, - "529": 1.2146190404891968, - "530": 0.8395581841468811, - "531": 0.7797887325286865, - "532": 0.8696569204330444, - "533": 0.8927386999130249, - "534": 0.8359286189079285, - "535": 0.876964807510376, - "536": 0.9378746151924133, - "537": 1.0051822662353516, - "538": 1.0174076557159424, - "539": 1.0351953506469727 - }, - "loss": { - "486": 2.289457082748413, - "487": 2.258662700653076, - "488": 2.242375373840332, - "489": 2.2333269119262695, - "490": 2.227339506149292, - "491": 2.235215425491333, - "492": 2.2564926147460938, - "493": 2.261200428009033, - "494": 2.2278871536254883, - "495": 2.229905128479004, - "496": 2.2912046909332275, - "497": 2.24263072013855, - "498": 2.264322519302368, - "499": 2.2308273315429688, - "500": 2.2664594650268555, - "501": 2.264845371246338, - "502": 2.2422800064086914, - "503": 2.220738410949707, - "504": 2.2208306789398193, - "505": 2.243136167526245, - "506": 2.261786460876465, - "507": 2.223053455352783, - "508": 2.2746758460998535, - "509": 2.269198179244995, - "510": 2.230855941772461, - "511": 2.254364013671875, - "512": 2.251873016357422, - "513": 2.254303216934204, - "514": 2.2264745235443115, - "515": 2.226069927215576, - "516": 2.2261905670166016, - "517": 2.235260009765625, - "518": 2.2614593505859375, - "519": 2.260850429534912, - "520": 2.2187652587890625, - "521": 2.267563819885254, - "522": 2.2565879821777344, - "523": 2.258462905883789, - "524": 2.284813165664673, - "525": 2.260769844055176, - "526": 2.264866352081299, - "527": 2.297207832336426, - "528": 2.2487082481384277, - "529": 2.2696166038513184, - "530": 2.215879440307617, - "531": 2.190382242202759, - "532": 2.2044198513031006, - "533": 2.2123496532440186, - "534": 2.214869737625122, - "535": 2.216726541519165, - "536": 2.2274861335754395, - "537": 2.209041118621826, - "538": 2.2008519172668457, - "539": 2.2481071949005127 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 1.67698, - 2.12549, - 2.04786, - 1.91564, - 2.16076, - 2.26445, - 2.09732, - 1.95243, - 1.99702, - 1.63251, - 1.58841, - 1.74345, - 1.95069, - 1.82956, - 1.39237, - 1.58145, - 2.0107, - 2.01285, - 1.87028, - 1.63925, - 1.60429, - 1.55511, - 1.63573, - 1.66857, - 1.64513, - 1.81653, - 1.8008, - 1.9566, - 2.12018, - 2.08402, - 2.10121, - 1.73173, - 1.51757, - 1.64126, - 1.96189, - 1.59722, - 1.59517, - 1.61503, - 1.37645, - 1.43601, - 1.07888, - 1.0977, - 1.12801, - 1.53841, - 3.14372, - 3.60218, - 2.91473, - 2.77591, - 3.16964, - 2.88236, - 2.53236, - 2.18632, - 2.12618, - 2.09784 - ], - "train_epoch_time": 5.053409814834595, - "train_loss": 2.2223256973830323, - "train_score": 0.34378474719897234, - "val_loss": 2.2957989156725214, - "val_score": 0.3247748635598083 - }, - { - "epoch": 10, - "grad_norm": 1.1790107488632202, - "learning_rate": 0.215, - "model_norm": 87.6618423461914, - "step_logs": { - "grad_norm": { - "540": 1.1793591976165771, - "541": 1.2544864416122437, - "542": 1.3156967163085938, - "543": 1.3263134956359863, - "544": 1.2598274946212769, - "545": 1.2719249725341797, - "546": 1.3314683437347412, - "547": 1.3810806274414062, - "548": 1.2629423141479492, - "549": 1.1641008853912354, - "550": 1.079391360282898, - "551": 0.9686326384544373, - "552": 0.9809894561767578, - "553": 1.082485556602478, - "554": 1.3549513816833496, - "555": 1.5087933540344238, - "556": 1.3560024499893188, - "557": 1.0993787050247192, - "558": 1.0621819496154785, - "559": 1.0812126398086548, - "560": 1.0281181335449219, - "561": 1.03694486618042, - "562": 1.1006512641906738, - "563": 1.0757622718811035, - "564": 0.9926916360855103, - "565": 1.0858652591705322, - "566": 1.0707896947860718, - "567": 1.0707064867019653, - "568": 1.0729668140411377, - "569": 1.0329747200012207, - "570": 0.9695006012916565, - "571": 0.9426378011703491, - "572": 0.9705097079277039, - "573": 0.9889181852340698, - "574": 1.0636265277862549, - "575": 1.1128720045089722, - "576": 1.1323602199554443, - "577": 1.126505970954895, - "578": 1.0679641962051392, - "579": 1.0324246883392334, - "580": 1.00874662399292, - "581": 1.0246080160140991, - "582": 1.121915340423584, - "583": 1.1432573795318604, - "584": 1.168141484260559, - "585": 1.1352272033691406, - "586": 1.1535664796829224, - "587": 1.191074252128601, - "588": 1.13308584690094, - "589": 1.096796989440918, - "590": 1.1544077396392822, - "591": 1.2780288457870483, - "592": 1.2854341268539429, - "593": 1.1790107488632202 - }, - "loss": { - "540": 2.2375879287719727, - "541": 2.2494208812713623, - "542": 2.232557773590088, - "543": 2.268441677093506, - "544": 2.2331626415252686, - "545": 2.2606444358825684, - "546": 2.2736902236938477, - "547": 2.265397071838379, - "548": 2.2437784671783447, - "549": 2.237973213195801, - "550": 2.2194788455963135, - "551": 2.2054667472839355, - "552": 2.2070603370666504, - "553": 2.2249860763549805, - "554": 2.2266693115234375, - "555": 2.3066585063934326, - "556": 2.258350133895874, - "557": 2.2342143058776855, - "558": 2.187175989151001, - "559": 2.222107410430908, - "560": 2.2103919982910156, - "561": 2.1974058151245117, - "562": 2.223552942276001, - "563": 2.252666473388672, - "564": 2.1936118602752686, - "565": 2.1940817832946777, - "566": 2.229010581970215, - "567": 2.203260660171509, - "568": 2.1924338340759277, - "569": 2.214766502380371, - "570": 2.2222049236297607, - "571": 2.1961467266082764, - "572": 2.192488193511963, - "573": 2.173339366912842, - "574": 2.1902012825012207, - "575": 2.220437526702881, - "576": 2.173555374145508, - "577": 2.213186264038086, - "578": 2.193436622619629, - "579": 2.205944538116455, - "580": 2.1946370601654053, - "581": 2.196967601776123, - "582": 2.181394577026367, - "583": 2.1926252841949463, - "584": 2.2116217613220215, - "585": 2.202755928039551, - "586": 2.2120208740234375, - "587": 2.240696907043457, - "588": 2.2186598777770996, - "589": 2.1977717876434326, - "590": 2.2106475830078125, - "591": 2.215855360031128, - "592": 2.2250189781188965, - "593": 2.1973719596862793 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 1.60875, - 1.42935, - 1.28971, - 1.28954, - 1.40701, - 1.39736, - 1.28254, - 1.1877, - 1.40674, - 1.65148, - 1.90499, - 2.35062, - 2.29343, - 1.89882, - 1.21285, - 1.01327, - 1.2282, - 1.84855, - 1.93859, - 1.90083, - 2.09114, - 2.04361, - 1.83547, - 1.94654, - 2.22603, - 1.86081, - 1.94403, - 1.92187, - 1.90438, - 2.07562, - 2.36422, - 2.47156, - 2.32776, - 2.22232, - 1.936, - 1.79287, - 1.69512, - 1.74402, - 1.92314, - 2.06956, - 2.15674, - 2.09271, - 1.73306, - 1.67755, - 1.62076, - 1.70923, - 1.66228, - 1.57945, - 1.72808, - 1.82696, - 1.65883, - 1.35663, - 1.34659, - 1.58077 - ], - "train_epoch_time": 5.052105665206909, - "train_loss": 2.2066850610921853, - "train_score": 0.34228725790806447, - "val_loss": 2.2768776690508274, - "val_score": 0.32542964275477265 - }, - { - "epoch": 11, - "grad_norm": 1.0811924934387207, - "learning_rate": 0.215, - "model_norm": 87.69290161132812, - "step_logs": { - "grad_norm": { - "594": 1.0722501277923584, - "595": 0.9539421796798706, - "596": 0.8895746469497681, - "597": 0.9736768007278442, - "598": 1.1487165689468384, - "599": 1.2902697324752808, - "600": 1.2751232385635376, - "601": 1.2577886581420898, - "602": 1.2878049612045288, - "603": 1.3745046854019165, - "604": 1.474057912826538, - "605": 1.3831669092178345, - "606": 1.188003420829773, - "607": 1.0976040363311768, - "608": 1.110308289527893, - "609": 1.1269159317016602, - "610": 1.09621262550354, - "611": 1.125464677810669, - "612": 1.1130430698394775, - "613": 1.1233985424041748, - "614": 1.2097125053405762, - "615": 1.1646780967712402, - "616": 1.0226004123687744, - "617": 1.0091562271118164, - "618": 0.9841094613075256, - "619": 1.052068829536438, - "620": 1.039048194885254, - "621": 0.9284295439720154, - "622": 0.930082380771637, - "623": 0.9472982883453369, - "624": 1.0345968008041382, - "625": 1.1250677108764648, - "626": 1.133701205253601, - "627": 1.1700854301452637, - "628": 1.1502556800842285, - "629": 1.1888290643692017, - "630": 1.3324466943740845, - "631": 1.4147093296051025, - "632": 1.409759283065796, - "633": 1.324647307395935, - "634": 1.196407437324524, - "635": 1.1432737112045288, - "636": 1.0334744453430176, - "637": 1.0733283758163452, - "638": 1.0704445838928223, - "639": 1.1816291809082031, - "640": 1.2707067728042603, - "641": 1.4233193397521973, - "642": 1.510169506072998, - "643": 1.4230880737304688, - "644": 1.5449576377868652, - "645": 1.3111509084701538, - "646": 1.1607167720794678, - "647": 1.0811924934387207 - }, - "loss": { - "594": 2.197610378265381, - "595": 2.1663661003112793, - "596": 2.1908884048461914, - "597": 2.1659035682678223, - "598": 2.1972155570983887, - "599": 2.1681125164031982, - "600": 2.222126007080078, - "601": 2.2042784690856934, - "602": 2.209777593612671, - "603": 2.2139716148376465, - "604": 2.2378392219543457, - "605": 2.250018835067749, - "606": 2.227654218673706, - "607": 2.2098264694213867, - "608": 2.2035820484161377, - "609": 2.1683459281921387, - "610": 2.1792078018188477, - "611": 2.1931228637695312, - "612": 2.1854279041290283, - "613": 2.1973283290863037, - "614": 2.193040132522583, - "615": 2.2128329277038574, - "616": 2.211945056915283, - "617": 2.1756386756896973, - "618": 2.191995620727539, - "619": 2.1691370010375977, - "620": 2.1774208545684814, - "621": 2.1583166122436523, - "622": 2.1612367630004883, - "623": 2.1501541137695312, - "624": 2.1886472702026367, - "625": 2.179947853088379, - "626": 2.1892282962799072, - "627": 2.1607470512390137, - "628": 2.1942410469055176, - "629": 2.1435461044311523, - "630": 2.2039132118225098, - "631": 2.18869948387146, - "632": 2.2016348838806152, - "633": 2.2277731895446777, - "634": 2.1970982551574707, - "635": 2.169689178466797, - "636": 2.167137622833252, - "637": 2.184201717376709, - "638": 2.1892950534820557, - "639": 2.166654348373413, - "640": 2.200949192047119, - "641": 2.1958212852478027, - "642": 2.2325479984283447, - "643": 2.2413299083709717, - "644": 2.231419801712036, - "645": 2.235685110092163, - "646": 2.2096056938171387, - "647": 2.195629596710205 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 1.91143, - 2.38061, - 2.76857, - 2.2846, - 1.66513, - 1.30233, - 1.36667, - 1.39332, - 1.33244, - 1.17187, - 1.02991, - 1.17608, - 1.57838, - 1.83428, - 1.78748, - 1.70744, - 1.81346, - 1.73141, - 1.76406, - 1.74111, - 1.49859, - 1.63131, - 2.11525, - 2.13634, - 2.26336, - 1.95974, - 2.01684, - 2.5039, - 2.49839, - 2.39605, - 2.04472, - 1.72222, - 1.70331, - 1.57822, - 1.65842, - 1.51668, - 1.24135, - 1.09358, - 1.10778, - 1.26961, - 1.53494, - 1.65996, - 2.02902, - 1.89595, - 1.91063, - 1.55177, - 1.36307, - 1.08391, - 0.978925, - 1.10673, - 0.934863, - 1.30048, - 1.64007, - 1.87825 - ], - "train_epoch_time": 5.052819490432739, - "train_loss": 2.1614820974287037, - "train_score": 0.3638685885084582, - "val_loss": 2.241458268171063, - "val_score": 0.34152554541860464 - }, - { - "epoch": 12, - "grad_norm": 0.7322170734405518, - "learning_rate": 0.215, - "model_norm": 87.72135162353516, - "step_logs": { - "grad_norm": { - "648": 0.9811676740646362, - "649": 0.9268051385879517, - "650": 0.9433252215385437, - "651": 1.0863995552062988, - "652": 1.1166901588439941, - "653": 1.0618934631347656, - "654": 1.0624192953109741, - "655": 1.0678328275680542, - "656": 1.0174188613891602, - "657": 1.0399116277694702, - "658": 1.1021108627319336, - "659": 1.0501272678375244, - "660": 1.0812186002731323, - "661": 1.113978385925293, - "662": 1.1203151941299438, - "663": 1.1313939094543457, - "664": 1.147111177444458, - "665": 1.193229079246521, - "666": 1.1617342233657837, - "667": 1.014167070388794, - "668": 0.9099019765853882, - "669": 0.8943407535552979, - "670": 0.9062685966491699, - "671": 0.9609559774398804, - "672": 1.0396455526351929, - "673": 1.0577466487884521, - "674": 0.9902991056442261, - "675": 0.8730791807174683, - "676": 0.7829840779304504, - "677": 0.7391128540039062, - "678": 0.726824164390564, - "679": 0.7180324196815491, - "680": 0.789458155632019, - "681": 0.7767745852470398, - "682": 0.7667245268821716, - "683": 0.747543215751648, - "684": 0.7859933376312256, - "685": 0.863521158695221, - "686": 0.8415840864181519, - "687": 0.7546394467353821, - "688": 0.7119998931884766, - "689": 0.6587206125259399, - "690": 0.649011492729187, - "691": 0.594395637512207, - "692": 0.607739269733429, - "693": 0.6333566308021545, - "694": 0.7254647612571716, - "695": 0.7646143436431885, - "696": 0.7775688767433167, - "697": 0.7838239073753357, - "698": 0.7927706837654114, - "699": 0.8128005862236023, - "700": 0.7663552165031433, - "701": 0.7322170734405518 - }, - "loss": { - "648": 2.1460654735565186, - "649": 2.1604654788970947, - "650": 2.1466236114501953, - "651": 2.152681589126587, - "652": 2.1853973865509033, - "653": 2.1641101837158203, - "654": 2.13713002204895, - "655": 2.157498598098755, - "656": 2.1578383445739746, - "657": 2.1604483127593994, - "658": 2.15325665473938, - "659": 2.1440532207489014, - "660": 2.136319160461426, - "661": 2.17118501663208, - "662": 2.1742184162139893, - "663": 2.168506622314453, - "664": 2.1592202186584473, - "665": 2.169806957244873, - "666": 2.1680572032928467, - "667": 2.1269326210021973, - "668": 2.1224851608276367, - "669": 2.14237642288208, - "670": 2.1322193145751953, - "671": 2.109828472137451, - "672": 2.133904457092285, - "673": 2.1641364097595215, - "674": 2.1350114345550537, - "675": 2.1151628494262695, - "676": 2.14634108543396, - "677": 2.1125316619873047, - "678": 2.111654281616211, - "679": 2.1284406185150146, - "680": 2.1131863594055176, - "681": 2.0897419452667236, - "682": 2.116541862487793, - "683": 2.1236226558685303, - "684": 2.111642599105835, - "685": 2.112362861633301, - "686": 2.1085355281829834, - "687": 2.093629837036133, - "688": 2.109797477722168, - "689": 2.0911669731140137, - "690": 2.084482192993164, - "691": 2.1057138442993164, - "692": 2.0671777725219727, - "693": 2.0651800632476807, - "694": 2.086766242980957, - "695": 2.0856857299804688, - "696": 2.1058921813964844, - "697": 2.084188938140869, - "698": 2.0881385803222656, - "699": 2.0810317993164062, - "700": 2.105480670928955, - "701": 2.092444896697998 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 2.22924, - 2.51519, - 2.41231, - 1.8239, - 1.75253, - 1.91919, - 1.89339, - 1.8921, - 2.08458, - 1.9978, - 1.77274, - 1.94425, - 1.82742, - 1.74962, - 1.7323, - 1.69408, - 1.64091, - 1.52396, - 1.60641, - 2.06792, - 2.56363, - 2.67849, - 2.59608, - 2.28476, - 1.97426, - 1.93429, - 2.17705, - 2.77483, - 3.50101, - 3.86707, - 3.99728, - 4.12832, - 3.39062, - 3.4634, - 3.60038, - 3.80019, - 3.41808, - 2.83284, - 2.97705, - 3.67638, - 4.1618, - 4.81932, - 4.94873, - 5.96003, - 5.59684, - 5.14827, - 3.96498, - 3.5675, - 3.48304, - 3.39235, - 3.32249, - 3.15, - 3.58502, - 3.90278 - ], - "train_epoch_time": 5.051898241043091, - "train_loss": 2.0858033783637637, - "train_score": 0.38239105089836173, - "val_loss": 2.176868250668391, - "val_score": 0.3553252372758123 - }, - { - "epoch": 13, - "grad_norm": 0.5446042418479919, - "learning_rate": 0.14333333333333334, - "model_norm": 87.7396240234375, - "step_logs": { - "grad_norm": { - "702": 0.7106263637542725, - "703": 0.7023528814315796, - "704": 0.7346948981285095, - "705": 0.7453676462173462, - "706": 0.6841105818748474, - "707": 0.6793532371520996, - "708": 0.7205890417098999, - "709": 0.7325961589813232, - "710": 0.739450216293335, - "711": 0.6701182723045349, - "712": 0.6422028541564941, - "713": 0.625672459602356, - "714": 0.6575274467468262, - "715": 0.728140652179718, - "716": 0.7375804781913757, - "717": 0.6998003125190735, - "718": 0.640377402305603, - "719": 0.6396369338035583, - "720": 0.6164888739585876, - "721": 0.6106836199760437, - "722": 0.6095193028450012, - "723": 0.57877516746521, - "724": 0.5720216035842896, - "725": 0.5472336411476135, - "726": 0.5757449865341187, - "727": 0.540033757686615, - "728": 0.5368126630783081, - "729": 0.5741632580757141, - "730": 0.5675048828125, - "731": 0.5887230038642883, - "732": 0.6098219156265259, - "733": 0.5937517881393433, - "734": 0.530218243598938, - "735": 0.5037831664085388, - "736": 0.5180294513702393, - "737": 0.5833373665809631, - "738": 0.5581859350204468, - "739": 0.46934571862220764, - "740": 0.4947464168071747, - "741": 0.5710729956626892, - "742": 0.5602770447731018, - "743": 0.5280522704124451, - "744": 0.5501554608345032, - "745": 0.5401403903961182, - "746": 0.5467321872711182, - "747": 0.5148298144340515, - "748": 0.5362414121627808, - "749": 0.5347321033477783, - "750": 0.5060610771179199, - "751": 0.47633087635040283, - "752": 0.4503069519996643, - "753": 0.4799935519695282, - "754": 0.46059876680374146, - "755": 0.5446042418479919 - }, - "loss": { - "702": 2.082811117172241, - "703": 2.0821421146392822, - "704": 2.084503173828125, - "705": 2.0871059894561768, - "706": 2.071005344390869, - "707": 2.081273078918457, - "708": 2.075662136077881, - "709": 2.0994250774383545, - "710": 2.08886981010437, - "711": 2.081561803817749, - "712": 2.0550527572631836, - "713": 2.081940174102783, - "714": 2.0638108253479004, - "715": 2.1018009185791016, - "716": 2.0605666637420654, - "717": 2.040350914001465, - "718": 2.064635992050171, - "719": 2.0932199954986572, - "720": 2.041184425354004, - "721": 2.053959846496582, - "722": 2.0766549110412598, - "723": 2.0492029190063477, - "724": 2.0703554153442383, - "725": 2.096932888031006, - "726": 2.0610103607177734, - "727": 2.062887191772461, - "728": 2.0565528869628906, - "729": 2.0642240047454834, - "730": 2.0523338317871094, - "731": 2.0799221992492676, - "732": 2.049804925918579, - "733": 2.0646867752075195, - "734": 2.0541014671325684, - "735": 2.0829780101776123, - "736": 2.0456130504608154, - "737": 2.0479836463928223, - "738": 2.0667760372161865, - "739": 2.069713830947876, - "740": 2.0498552322387695, - "741": 2.089901924133301, - "742": 2.043478012084961, - "743": 2.041771411895752, - "744": 2.0558619499206543, - "745": 2.0418753623962402, - "746": 2.0727176666259766, - "747": 2.060298442840576, - "748": 2.041529655456543, - "749": 2.0278160572052, - "750": 2.0660321712493896, - "751": 2.0380566120147705, - "752": 2.071171760559082, - "753": 2.065018653869629, - "754": 2.056190013885498, - "755": 2.0613954067230225 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 4.12446, - 4.22085, - 3.86179, - 3.75667, - 4.42516, - 4.5096, - 3.99743, - 3.91175, - 3.82027, - 4.63539, - 4.98286, - 5.31832, - 4.77356, - 3.96425, - 3.78763, - 4.16636, - 5.03468, - 5.1162, - 5.37071, - 5.50756, - 5.58971, - 6.11738, - 6.32732, - 7.00227, - 6.21756, - 7.07349, - 7.13665, - 6.26161, - 6.37249, - 6.00102, - 5.51197, - 5.85658, - 7.30655, - 8.20724, - 7.6228, - 6.01848, - 6.63339, - 9.3956, - 8.37448, - 6.4083, - 6.50975, - 7.3224, - 6.7924, - 6.99868, - 6.93411, - 7.77325, - 7.09962, - 7.09179, - 8.06736, - 8.98253, - 10.2141, - 8.963, - 9.6921, - 6.95023 - ], - "train_epoch_time": 5.058979749679565, - "train_loss": 2.045722785331255, - "train_score": 0.39361437410329986, - "val_loss": 2.140639336867393, - "val_score": 0.3675283445419592 - }, - { - "epoch": 14, - "grad_norm": 0.44176968932151794, - "learning_rate": 0.07166666666666667, - "model_norm": 87.74591064453125, - "step_logs": { - "grad_norm": { - "756": 0.4906125068664551, - "757": 0.5034204721450806, - "758": 0.5374881029129028, - "759": 0.5053876638412476, - "760": 0.5014017820358276, - "761": 0.494512677192688, - "762": 0.4802466630935669, - "763": 0.4998571276664734, - "764": 0.516618013381958, - "765": 0.46685874462127686, - "766": 0.4829357862472534, - "767": 0.4740074574947357, - "768": 0.4797145128250122, - "769": 0.5078187584877014, - "770": 0.5147002339363098, - "771": 0.4584689438343048, - "772": 0.45266568660736084, - "773": 0.48620399832725525, - "774": 0.48905470967292786, - "775": 0.5051054954528809, - "776": 0.4816376268863678, - "777": 0.42868202924728394, - "778": 0.4547256827354431, - "779": 0.5143576860427856, - "780": 0.4641590118408203, - "781": 0.480656236410141, - "782": 0.43557465076446533, - "783": 0.4607539474964142, - "784": 0.4573465883731842, - "785": 0.44665324687957764, - "786": 0.45431259274482727, - "787": 0.46867701411247253, - "788": 0.4666171073913574, - "789": 0.4550279378890991, - "790": 0.46211832761764526, - "791": 0.4761371910572052, - "792": 0.42929545044898987, - "793": 0.4465051591396332, - "794": 0.45080915093421936, - "795": 0.42503631114959717, - "796": 0.4344958961009979, - "797": 0.4685482084751129, - "798": 0.4361921548843384, - "799": 0.43827101588249207, - "800": 0.47122254967689514, - "801": 0.47953304648399353, - "802": 0.429753839969635, - "803": 0.4799472391605377, - "804": 0.451869934797287, - "805": 0.439447820186615, - "806": 0.43792325258255005, - "807": 0.4672706127166748, - "808": 0.4483935236930847, - "809": 0.44176968932151794 - }, - "loss": { - "756": 2.0622403621673584, - "757": 2.0562312602996826, - "758": 2.058318614959717, - "759": 2.063681125640869, - "760": 2.0369791984558105, - "761": 2.0508737564086914, - "762": 2.0467677116394043, - "763": 2.062342643737793, - "764": 2.0008726119995117, - "765": 2.0311782360076904, - "766": 2.051344633102417, - "767": 2.0478522777557373, - "768": 2.0625946521759033, - "769": 2.017423391342163, - "770": 2.0727338790893555, - "771": 2.017502784729004, - "772": 2.0456480979919434, - "773": 2.0360159873962402, - "774": 2.041134834289551, - "775": 2.025242328643799, - "776": 2.0499439239501953, - "777": 2.0376076698303223, - "778": 2.0448663234710693, - "779": 2.0544586181640625, - "780": 2.0328807830810547, - "781": 2.005777359008789, - "782": 2.0554420948028564, - "783": 2.007248878479004, - "784": 2.0513405799865723, - "785": 2.044337749481201, - "786": 2.0244295597076416, - "787": 2.0545248985290527, - "788": 2.049156665802002, - "789": 1.9959447383880615, - "790": 2.015410900115967, - "791": 2.0546536445617676, - "792": 2.061164379119873, - "793": 2.021644115447998, - "794": 2.0457210540771484, - "795": 2.0294415950775146, - "796": 2.0455422401428223, - "797": 1.9957976341247559, - "798": 2.0266478061676025, - "799": 2.0487868785858154, - "800": 2.0228381156921387, - "801": 2.0152206420898438, - "802": 2.0473623275756836, - "803": 2.0365805625915527, - "804": 2.057499408721924, - "805": 2.0447020530700684, - "806": 2.041956901550293, - "807": 2.0415797233581543, - "808": 2.0305683612823486, - "809": 2.017944574356079 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 8.56766, - 8.11354, - 7.12484, - 8.07966, - 8.10242, - 8.38656, - 8.87442, - 8.25409, - 7.49688, - 9.31917, - 8.79549, - 9.1144, - 8.96289, - 7.82311, - 7.82411, - 9.5983, - 9.98334, - 8.61279, - 8.53408, - 7.93803, - 8.83692, - 11.0879, - 9.88931, - 7.76546, - 9.43579, - 8.68188, - 10.8338, - 9.45503, - 9.80724, - 10.2474, - 9.80829, - 9.35328, - 9.41139, - 9.6399, - 9.43751, - 9.06305, - 11.1841, - 10.1403, - 10.0661, - 11.2337, - 10.8352, - 9.09092, - 10.6518, - 10.6662, - 9.1098, - 8.76366, - 11.0855, - 8.84127, - 10.0766, - 10.588, - 10.6476, - 9.35038, - 10.0995, - 10.3399 - ], - "train_epoch_time": 5.0532004833221436, - "train_loss": 2.0328163226331495, - "train_score": 0.3969411315158587, - "val_loss": 2.131256647142833, - "val_score": 0.3697393443499586 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:27:11.321219", - "final_model_norm": 87.74591064453125, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:25:26.519759", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 8.82536792755127, - "learning_rate": 4.64e-11, - "model_norm": 87.40768432617188, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.300286769866943, - "3": 8.448266983032227, - "4": 16.9385929107666, - "5": 5.521918296813965, - "6": 3.9200997352600098, - "7": 3.599931001663208, - "8": 5.704758644104004, - "9": 5.834868431091309, - "10": 5.412257194519043, - "11": 12.190019607543945, - "12": 10.025042533874512, - "13": 6.54962682723999, - "14": 28.818603515625, - "15": 4.159427165985107, - "16": 36.858619689941406, - "17": 4.056296348571777, - "18": 7.1390380859375, - "19": 9.596250534057617, - "20": 4.975948333740234, - "21": 7.607447624206543, - "22": 15.746626853942871, - "23": 4.523006439208984, - "24": 20.08755874633789, - "25": 25.93598175048828, - "26": 5.108270168304443, - "27": 8.370865821838379, - "28": 13.677657127380371, - "29": 7.280508995056152, - "30": 3.9361441135406494, - "31": 13.632987022399902, - "32": 9.50937271118164, - "33": 4.104121208190918, - "34": 7.985281467437744, - "35": 6.304378509521484, - "36": 12.142670631408691, - "37": 3.0258395671844482, - "38": 11.962531089782715, - "39": 7.152475833892822, - "40": 5.270654678344727, - "41": 7.082338809967041, - "42": 3.53645920753479, - "43": 7.500590801239014, - "44": 12.024969100952148, - "45": 6.4233293533325195, - "46": 6.324449062347412, - "47": 3.1137545108795166, - "48": 10.045760154724121, - "49": 5.26921272277832, - "50": 4.198294639587402, - "51": 5.467690467834473, - "52": 8.350181579589844, - "53": 8.82536792755127 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.8053386211395264, - "3": 3.8723838329315186, - "4": 4.444986343383789, - "5": 4.119450569152832, - "6": 3.5325722694396973, - "7": 3.6846227645874023, - "8": 3.6134226322174072, - "9": 4.427856922149658, - "10": 4.080832004547119, - "11": 4.590441703796387, - "12": 5.791167259216309, - "13": 5.108011722564697, - "14": 4.309202194213867, - "15": 3.5151607990264893, - "16": 4.652713775634766, - "17": 3.707624912261963, - "18": 3.6932244300842285, - "19": 3.9449338912963867, - "20": 3.7334671020507812, - "21": 5.518805980682373, - "22": 4.755866050720215, - "23": 4.560586929321289, - "24": 5.46610689163208, - "25": 4.7802863121032715, - "26": 3.646597385406494, - "27": 4.823602676391602, - "28": 4.182075023651123, - "29": 3.780635118484497, - "30": 4.1041646003723145, - "31": 4.27890157699585, - "32": 4.1620893478393555, - "33": 3.46358585357666, - "34": 5.6740546226501465, - "35": 3.9396560192108154, - "36": 5.884576797485352, - "37": 3.360581159591675, - "38": 4.597044944763184, - "39": 4.134245872497559, - "40": 3.536829710006714, - "41": 4.366776466369629, - "42": 3.3245744705200195, - "43": 4.319300651550293, - "44": 4.683053016662598, - "45": 3.8527963161468506, - "46": 4.030945777893066, - "47": 3.3693461418151855, - "48": 4.704986572265625, - "49": 3.9191548824310303, - "50": 3.571779727935791, - "51": 5.254242420196533, - "52": 4.4730963706970215, - "53": 4.387752532958984 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 0.00839976, - 0.00857642, - 0.0958678, - 0.0542554, - 0.0154923, - 0.135101, - 0.229878, - 0.284318, - 0.111031, - 0.130056, - 0.139313, - 0.030892, - 0.0576227, - 0.119075, - 0.00518861, - 0.203179, - 0.00342474, - 0.225339, - 0.0724647, - 0.0428387, - 0.150786, - 0.0953602, - 0.0191803, - 0.222929, - 0.0135464, - 0.00710638, - 0.139746, - 0.0688384, - 0.0223547, - 0.0713249, - 0.264901, - 0.0230224, - 0.0460265, - 0.20563, - 0.0889842, - 0.0991228, - 0.0399105, - 0.367048, - 0.0321242, - 0.0808134, - 0.127317, - 0.0870578, - 0.265827, - 0.0767755, - 0.0323863, - 0.0933804, - 0.100777, - 0.347518, - 0.0466222, - 0.141157, - 0.202646, - 0.175753, - 0.0641529, - 0.0563348 - ], - "train_epoch_time": 5.05444073677063, - "train_loss": 4.594941640550129, - "train_score": 0.07883227225099096, - "val_loss": 4.602305918144988, - "val_score": 0.07776173228990334 - }, - { - "epoch": 1, - "grad_norm": 3.006953239440918, - "learning_rate": 0.464, - "model_norm": 87.36406707763672, - "step_logs": { - "grad_norm": { - "54": 4.710721969604492, - "55": 6.39650297164917, - "56": 5.843128681182861, - "57": 3.7431066036224365, - "58": 10.560835838317871, - "59": 4.017078876495361, - "60": 10.557452201843262, - "61": 8.319095611572266, - "62": 4.053964614868164, - "63": 9.202042579650879, - "64": 4.659931182861328, - "65": 12.972692489624023, - "66": 3.0958261489868164, - "67": 6.036882400512695, - "68": 5.621063232421875, - "69": 3.284179210662842, - "70": 5.690109729766846, - "71": 4.0513787269592285, - "72": 4.812616348266602, - "73": 11.357001304626465, - "74": 3.325056552886963, - "75": 3.4481284618377686, - "76": 5.75864839553833, - "77": 5.69787073135376, - "78": 2.842493772506714, - "79": 5.701220512390137, - "80": 6.768085479736328, - "81": 3.201327085494995, - "82": 6.8929667472839355, - "83": 6.23445463180542, - "84": 3.2729835510253906, - "85": 3.375899076461792, - "86": 5.4267473220825195, - "87": 4.822790145874023, - "88": 2.607414722442627, - "89": 4.681608200073242, - "90": 3.2702584266662598, - "91": 10.841818809509277, - "92": 3.875659942626953, - "93": 4.503223896026611, - "94": 3.666914224624634, - "95": 3.1708147525787354, - "96": 9.237199783325195, - "97": 3.3563873767852783, - "98": 9.311427116394043, - "99": 3.0541188716888428, - "100": 5.967686653137207, - "101": 2.543062210083008, - "102": 10.831096649169922, - "103": 8.295923233032227, - "104": 6.005552291870117, - "105": 3.4593918323516846, - "106": 4.312873840332031, - "107": 3.006953239440918 - }, - "loss": { - "54": 4.604720115661621, - "55": 4.238933086395264, - "56": 3.8177826404571533, - "57": 3.3083527088165283, - "58": 4.512346267700195, - "59": 3.6355762481689453, - "60": 4.134289741516113, - "61": 3.8532001972198486, - "62": 3.693216323852539, - "63": 4.743278503417969, - "64": 3.793639659881592, - "65": 5.383148193359375, - "66": 3.3732831478118896, - "67": 3.5797066688537598, - "68": 4.2783660888671875, - "69": 3.5555572509765625, - "70": 4.0370612144470215, - "71": 3.8692026138305664, - "72": 3.649075984954834, - "73": 5.1226959228515625, - "74": 3.494163751602173, - "75": 3.435516595840454, - "76": 4.286025047302246, - "77": 3.748389482498169, - "78": 3.6159849166870117, - "79": 3.966820240020752, - "80": 4.193337440490723, - "81": 3.1364057064056396, - "82": 4.423172950744629, - "83": 3.787367343902588, - "84": 3.4963293075561523, - "85": 3.8199548721313477, - "86": 4.016044616699219, - "87": 4.007709503173828, - "88": 3.229917049407959, - "89": 4.222854137420654, - "90": 3.899428129196167, - "91": 5.141974449157715, - "92": 3.5960309505462646, - "93": 3.7712361812591553, - "94": 3.76520037651062, - "95": 3.376819133758545, - "96": 4.155144214630127, - "97": 3.5652108192443848, - "98": 4.161769866943359, - "99": 3.2475061416625977, - "100": 4.360395908355713, - "101": 3.0919129848480225, - "102": 5.570976734161377, - "103": 4.026264190673828, - "104": 3.7307796478271484, - "105": 3.5170247554779053, - "106": 3.3530328273773193, - "107": 3.46724534034729 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.207505, - 0.103603, - 0.11182, - 0.236128, - 0.0404581, - 0.225296, - 0.0370922, - 0.0556762, - 0.224722, - 0.0560157, - 0.174702, - 0.0319872, - 0.351965, - 0.098225, - 0.135407, - 0.329651, - 0.124688, - 0.23573, - 0.157551, - 0.0397165, - 0.316042, - 0.288952, - 0.129245, - 0.115457, - 0.447536, - 0.122041, - 0.0915436, - 0.306036, - 0.093094, - 0.0974407, - 0.326381, - 0.335181, - 0.13637, - 0.172306, - 0.475085, - 0.192671, - 0.364617, - 0.0437447, - 0.239404, - 0.185967, - 0.280018, - 0.335866, - 0.0486974, - 0.316476, - 0.0480005, - 0.348159, - 0.122437, - 0.478094, - 0.0474883, - 0.0585023, - 0.103441, - 0.293884, - 0.180262, - 0.38347 - ], - "train_epoch_time": 5.0525500774383545, - "train_loss": 4.562017924611161, - "train_score": 0.18486145980645458, - "val_loss": 4.528683683765468, - "val_score": 0.1867689794500441 - }, - { - "epoch": 2, - "grad_norm": 2.1464927196502686, - "learning_rate": 0.464, - "model_norm": 87.26619720458984, - "step_logs": { - "grad_norm": { - "108": 8.169489860534668, - "109": 4.130614280700684, - "110": 5.649086952209473, - "111": 2.59594988822937, - "112": 4.144364356994629, - "113": 2.533385992050171, - "114": 3.6351304054260254, - "115": 3.638923406600952, - "116": 5.83383846282959, - "117": 2.0029103755950928, - "118": 3.1203951835632324, - "119": 7.157638072967529, - "120": 2.707301139831543, - "121": 8.3097505569458, - "122": 2.2818338871002197, - "123": 3.313976526260376, - "124": 3.3985631465911865, - "125": 2.8124184608459473, - "126": 5.011419773101807, - "127": 3.0676419734954834, - "128": 5.723419666290283, - "129": 2.9939911365509033, - "130": 2.884540557861328, - "131": 3.2379729747772217, - "132": 4.2943267822265625, - "133": 2.302279233932495, - "134": 4.502897262573242, - "135": 2.9996230602264404, - "136": 3.02902889251709, - "137": 4.490320682525635, - "138": 2.1783552169799805, - "139": 3.811655282974243, - "140": 2.590667247772217, - "141": 3.2014966011047363, - "142": 6.4773993492126465, - "143": 2.298563003540039, - "144": 5.1533002853393555, - "145": 1.9343879222869873, - "146": 2.8834545612335205, - "147": 2.548436403274536, - "148": 3.1128015518188477, - "149": 2.6498889923095703, - "150": 3.1814181804656982, - "151": 2.720179557800293, - "152": 10.12575626373291, - "153": 2.546569585800171, - "154": 5.204450607299805, - "155": 3.207531452178955, - "156": 2.602947950363159, - "157": 2.8123319149017334, - "158": 2.6033408641815186, - "159": 2.2779171466827393, - "160": 2.269800901412964, - "161": 2.1464927196502686 - }, - "loss": { - "108": 4.580419540405273, - "109": 3.487710952758789, - "110": 3.9428746700286865, - "111": 3.545706033706665, - "112": 3.683506488800049, - "113": 3.107248306274414, - "114": 3.8686742782592773, - "115": 3.341705799102783, - "116": 4.002868175506592, - "117": 3.345228672027588, - "118": 3.464733839035034, - "119": 4.078404426574707, - "120": 3.628619909286499, - "121": 4.028453826904297, - "122": 3.2160584926605225, - "123": 3.4308042526245117, - "124": 3.641704559326172, - "125": 3.5471291542053223, - "126": 3.857940196990967, - "127": 3.4921181201934814, - "128": 4.167585372924805, - "129": 3.240133762359619, - "130": 3.1896281242370605, - "131": 3.7312583923339844, - "132": 3.6839218139648438, - "133": 3.2522432804107666, - "134": 3.948781967163086, - "135": 3.5439393520355225, - "136": 3.437025547027588, - "137": 3.670748710632324, - "138": 3.453272819519043, - "139": 3.491819381713867, - "140": 3.388313055038452, - "141": 3.403357982635498, - "142": 3.9253406524658203, - "143": 3.3482656478881836, - "144": 3.880375862121582, - "145": 3.125821113586426, - "146": 3.3218133449554443, - "147": 3.2574081420898438, - "148": 3.747436761856079, - "149": 3.327773094177246, - "150": 3.937485456466675, - "151": 3.4126486778259277, - "152": 4.637243270874023, - "153": 3.2354354858398438, - "154": 4.095819473266602, - "155": 3.3196070194244385, - "156": 3.292613983154297, - "157": 3.6738102436065674, - "158": 3.430298328399658, - "159": 3.264312744140625, - "160": 3.2965221405029297, - "161": 3.218731164932251 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.0686302, - 0.204414, - 0.123554, - 0.526151, - 0.21446, - 0.484143, - 0.292767, - 0.252361, - 0.117615, - 0.833878, - 0.355837, - 0.0796069, - 0.495072, - 0.0583395, - 0.617669, - 0.31239, - 0.315293, - 0.448453, - 0.153615, - 0.37109, - 0.127225, - 0.361461, - 0.383342, - 0.355884, - 0.199765, - 0.613574, - 0.194751, - 0.39387, - 0.374607, - 0.182054, - 0.727735, - 0.240339, - 0.504848, - 0.332049, - 0.0935569, - 0.633734, - 0.146118, - 0.835366, - 0.39953, - 0.501562, - 0.386751, - 0.473913, - 0.389025, - 0.461208, - 0.0452277, - 0.498909, - 0.151214, - 0.32266, - 0.48597, - 0.464498, - 0.506139, - 0.629095, - 0.639853, - 0.698596 - ], - "train_epoch_time": 5.051715135574341, - "train_loss": 3.2515053442595168, - "train_score": 0.15544409075033033, - "val_loss": 3.2648093703146235, - "val_score": 0.1537699128162163 - }, - { - "epoch": 3, - "grad_norm": 0.7859781384468079, - "learning_rate": 0.464, - "model_norm": 87.29029846191406, - "step_logs": { - "grad_norm": { - "162": 2.9996471405029297, - "163": 1.7292149066925049, - "164": 1.6292093992233276, - "165": 1.736675500869751, - "166": 1.515742540359497, - "167": 1.1343401670455933, - "168": 1.131856918334961, - "169": 1.1080681085586548, - "170": 1.2317570447921753, - "171": 1.1229251623153687, - "172": 1.0073604583740234, - "173": 1.0934464931488037, - "174": 1.1346526145935059, - "175": 0.8990886807441711, - "176": 0.9201200008392334, - "177": 1.358350157737732, - "178": 1.1711368560791016, - "179": 0.750200092792511, - "180": 0.6113574504852295, - "181": 0.6565418243408203, - "182": 0.7366262078285217, - "183": 1.0335490703582764, - "184": 1.047764539718628, - "185": 1.078670620918274, - "186": 1.0676854848861694, - "187": 0.9073778986930847, - "188": 1.008515477180481, - "189": 1.1846050024032593, - "190": 0.9940149188041687, - "191": 0.7675840854644775, - "192": 0.8644276857376099, - "193": 1.1324049234390259, - "194": 0.9839430451393127, - "195": 0.6516187787055969, - "196": 0.7045209407806396, - "197": 1.100314974784851, - "198": 1.2486379146575928, - "199": 0.9664126634597778, - "200": 0.8384034633636475, - "201": 0.8335612416267395, - "202": 0.8664008975028992, - "203": 0.9978999495506287, - "204": 0.954193651676178, - "205": 0.8018909692764282, - "206": 0.7958046793937683, - "207": 0.9081897139549255, - "208": 0.9873657822608948, - "209": 1.0146435499191284, - "210": 0.9648124575614929, - "211": 0.8621169328689575, - "212": 0.820178747177124, - "213": 0.8205053806304932, - "214": 0.8169720768928528, - "215": 0.7859781384468079 - }, - "loss": { - "162": 3.2603979110717773, - "163": 3.1458051204681396, - "164": 3.2391433715820312, - "165": 2.882030963897705, - "166": 3.206644058227539, - "167": 2.762540340423584, - "168": 2.8386037349700928, - "169": 2.7174291610717773, - "170": 2.8106064796447754, - "171": 2.7681338787078857, - "172": 2.7565886974334717, - "173": 2.6932692527770996, - "174": 2.7876548767089844, - "175": 2.6847944259643555, - "176": 2.6747021675109863, - "177": 2.700319528579712, - "178": 2.8367528915405273, - "179": 2.682593822479248, - "180": 2.5992677211761475, - "181": 2.624390125274658, - "182": 2.5951452255249023, - "183": 2.6522347927093506, - "184": 2.749772071838379, - "185": 2.666172504425049, - "186": 2.729923725128174, - "187": 2.6606221199035645, - "188": 2.669532299041748, - "189": 2.6987133026123047, - "190": 2.765218734741211, - "191": 2.6127982139587402, - "192": 2.611755132675171, - "193": 2.6538138389587402, - "194": 2.712456703186035, - "195": 2.596526622772217, - "196": 2.5897269248962402, - "197": 2.6425256729125977, - "198": 2.7527050971984863, - "199": 2.6624763011932373, - "200": 2.655541181564331, - "201": 2.600940227508545, - "202": 2.646070718765259, - "203": 2.6062302589416504, - "204": 2.6927528381347656, - "205": 2.6068761348724365, - "206": 2.6212356090545654, - "207": 2.5866236686706543, - "208": 2.6545803546905518, - "209": 2.623156785964966, - "210": 2.655985116958618, - "211": 2.611877918243408, - "212": 2.6071112155914307, - "213": 2.5948870182037354, - "214": 2.625883102416992, - "215": 2.586777687072754 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 0.362352, - 1.05204, - 1.22033, - 0.955567, - 1.39573, - 2.14695, - 2.21575, - 2.21322, - 1.85246, - 2.19526, - 2.71645, - 2.2526, - 2.16528, - 3.32128, - 3.15927, - 1.4635, - 2.06827, - 4.76651, - 6.95441, - 6.08841, - 4.78264, - 2.48285, - 2.50478, - 2.29145, - 2.39477, - 3.23152, - 2.62464, - 1.92313, - 2.79862, - 4.43459, - 3.49522, - 2.06951, - 2.80171, - 6.11513, - 5.21754, - 2.18266, - 1.76558, - 2.85076, - 3.77787, - 3.74331, - 3.52504, - 2.61721, - 2.95749, - 4.05406, - 4.13898, - 3.13603, - 2.72295, - 2.54799, - 2.85325, - 3.51415, - 3.87563, - 3.85439, - 3.93424, - 4.18734 - ], - "train_epoch_time": 5.054364204406738, - "train_loss": 2.599224467941131, - "train_score": 0.23703707853382938, - "val_loss": 2.6324636979984497, - "val_score": 0.22956282321265317 - }, - { - "epoch": 4, - "grad_norm": 0.7366073727607727, - "learning_rate": 0.464, - "model_norm": 87.323486328125, - "step_logs": { - "grad_norm": { - "216": 0.7751213312149048, - "217": 0.8250643610954285, - "218": 0.8395969271659851, - "219": 0.8602483868598938, - "220": 0.9384766817092896, - "221": 0.9298914670944214, - "222": 0.8511123657226562, - "223": 0.7777748703956604, - "224": 0.7834213972091675, - "225": 0.8141236305236816, - "226": 0.7789618372917175, - "227": 0.7421006560325623, - "228": 0.7813637852668762, - "229": 0.8658016920089722, - "230": 0.8998185396194458, - "231": 0.8682911992073059, - "232": 0.777773380279541, - "233": 0.769269585609436, - "234": 0.8563226461410522, - "235": 0.8227230906486511, - "236": 0.8421036005020142, - "237": 0.9573317170143127, - "238": 0.9203121662139893, - "239": 0.8030405044555664, - "240": 0.7765429615974426, - "241": 0.772923469543457, - "242": 0.7743852138519287, - "243": 0.8424039483070374, - "244": 0.8016358017921448, - "245": 0.7169095873832703, - "246": 0.7090617418289185, - "247": 0.705998957157135, - "248": 0.7767693996429443, - "249": 0.8481795787811279, - "250": 0.8440188765525818, - "251": 0.7840527296066284, - "252": 0.7568413615226746, - "253": 0.7544783353805542, - "254": 0.8051284551620483, - "255": 0.8347389101982117, - "256": 0.8167509436607361, - "257": 0.7878116369247437, - "258": 0.7775871753692627, - "259": 0.7840420007705688, - "260": 0.8864841461181641, - "261": 0.8494069576263428, - "262": 0.712838888168335, - "263": 0.6982460618019104, - "264": 0.6957581639289856, - "265": 0.7341987490653992, - "266": 0.8037201166152954, - "267": 0.9084228873252869, - "268": 0.8392064571380615, - "269": 0.7366073727607727 - }, - "loss": { - "216": 2.611741542816162, - "217": 2.5883846282958984, - "218": 2.6176917552948, - "219": 2.5711255073547363, - "220": 2.6345291137695312, - "221": 2.6057045459747314, - "222": 2.6083571910858154, - "223": 2.5951972007751465, - "224": 2.587825059890747, - "225": 2.572554111480713, - "226": 2.6032943725585938, - "227": 2.5650768280029297, - "228": 2.5970864295959473, - "229": 2.57080340385437, - "230": 2.601482391357422, - "231": 2.600564956665039, - "232": 2.6188318729400635, - "233": 2.552961826324463, - "234": 2.6209750175476074, - "235": 2.5775938034057617, - "236": 2.574636459350586, - "237": 2.5973219871520996, - "238": 2.6412911415100098, - "239": 2.5575971603393555, - "240": 2.5757863521575928, - "241": 2.561328411102295, - "242": 2.5778720378875732, - "243": 2.558349132537842, - "244": 2.600175380706787, - "245": 2.5513784885406494, - "246": 2.551772117614746, - "247": 2.5409252643585205, - "248": 2.5703318119049072, - "249": 2.5635628700256348, - "250": 2.6016480922698975, - "251": 2.5579848289489746, - "252": 2.562375545501709, - "253": 2.5555100440979004, - "254": 2.5685741901397705, - "255": 2.5679335594177246, - "256": 2.5884275436401367, - "257": 2.56412410736084, - "258": 2.5438132286071777, - "259": 2.5496699810028076, - "260": 2.5670204162597656, - "261": 2.595435857772827, - "262": 2.565859794616699, - "263": 2.5291213989257812, - "264": 2.532837390899658, - "265": 2.527437925338745, - "266": 2.552936553955078, - "267": 2.534872055053711, - "268": 2.5888028144836426, - "269": 2.5330333709716797 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 4.34701, - 3.80236, - 3.71344, - 3.47437, - 2.99127, - 3.01343, - 3.60075, - 4.29005, - 4.21642, - 3.88136, - 4.29033, - 4.65773, - 4.25383, - 3.42951, - 3.213, - 3.44935, - 4.32914, - 4.31407, - 3.57428, - 3.80809, - 3.63065, - 2.83401, - 3.1185, - 3.96604, - 4.27148, - 4.28738, - 4.2988, - 3.60511, - 4.04621, - 4.96416, - 5.07544, - 5.09781, - 4.25995, - 3.56343, - 3.65211, - 4.16109, - 4.47335, - 4.48936, - 3.96243, - 3.68538, - 3.88022, - 4.13137, - 4.20714, - 4.14768, - 3.26654, - 3.59732, - 5.04952, - 5.18744, - 5.23228, - 4.68871, - 3.95212, - 3.07171, - 3.67588, - 4.66841 - ], - "train_epoch_time": 5.053727149963379, - "train_loss": 2.5606365554130917, - "train_score": 0.23850990851093057, - "val_loss": 2.5998535243843506, - "val_score": 0.23252278278944277 - }, - { - "epoch": 5, - "grad_norm": 0.7789791822433472, - "learning_rate": 0.464, - "model_norm": 87.36167907714844, - "step_logs": { - "grad_norm": { - "270": 0.7879934310913086, - "271": 0.8151271343231201, - "272": 0.8245845437049866, - "273": 0.7975809574127197, - "274": 0.826008141040802, - "275": 0.8231139779090881, - "276": 0.7795182466506958, - "277": 0.7362702488899231, - "278": 0.7211339473724365, - "279": 0.7161098718643188, - "280": 0.7130730152130127, - "281": 0.7160906195640564, - "282": 0.7397487759590149, - "283": 0.7289568185806274, - "284": 0.6959989070892334, - "285": 0.7171751856803894, - "286": 0.7552029490470886, - "287": 0.875968337059021, - "288": 0.9072176814079285, - "289": 0.7873736619949341, - "290": 0.7143775224685669, - "291": 0.7785099744796753, - "292": 0.8036198019981384, - "293": 0.7516964673995972, - "294": 0.6911051869392395, - "295": 0.7114232778549194, - "296": 0.7727879285812378, - "297": 0.756477415561676, - "298": 0.8052388429641724, - "299": 0.8475075364112854, - "300": 0.953319251537323, - "301": 0.8589737415313721, - "302": 0.6984808444976807, - "303": 0.5516240000724792, - "304": 0.5574950575828552, - "305": 0.5726935863494873, - "306": 0.6276155710220337, - "307": 0.6959097385406494, - "308": 0.709283709526062, - "309": 0.655981719493866, - "310": 0.6116015315055847, - "311": 0.6479474306106567, - "312": 0.6818233132362366, - "313": 0.7761996388435364, - "314": 0.823245108127594, - "315": 0.8815235495567322, - "316": 0.8640546202659607, - "317": 0.7447691559791565, - "318": 0.7237235903739929, - "319": 0.7106289267539978, - "320": 0.7508794665336609, - "321": 0.8465685248374939, - "322": 0.8888311982154846, - "323": 0.7789791822433472 - }, - "loss": { - "270": 2.5435686111450195, - "271": 2.5349855422973633, - "272": 2.587954044342041, - "273": 2.548557758331299, - "274": 2.5630228519439697, - "275": 2.544992446899414, - "276": 2.570162296295166, - "277": 2.5358524322509766, - "278": 2.5405116081237793, - "279": 2.540127754211426, - "280": 2.5498757362365723, - "281": 2.5221893787384033, - "282": 2.542597532272339, - "283": 2.5179834365844727, - "284": 2.5406641960144043, - "285": 2.5211777687072754, - "286": 2.544454574584961, - "287": 2.530001163482666, - "288": 2.5910000801086426, - "289": 2.5405683517456055, - "290": 2.533108711242676, - "291": 2.5320348739624023, - "292": 2.537235736846924, - "293": 2.5382938385009766, - "294": 2.510328769683838, - "295": 2.529064655303955, - "296": 2.5307998657226562, - "297": 2.5230109691619873, - "298": 2.5292470455169678, - "299": 2.5291075706481934, - "300": 2.5657010078430176, - "301": 2.5819036960601807, - "302": 2.546079158782959, - "303": 2.495604991912842, - "304": 2.499277114868164, - "305": 2.488581895828247, - "306": 2.485379457473755, - "307": 2.5085036754608154, - "308": 2.52060604095459, - "309": 2.4949469566345215, - "310": 2.4939627647399902, - "311": 2.5050272941589355, - "312": 2.4954237937927246, - "313": 2.4973912239074707, - "314": 2.5299363136291504, - "315": 2.5229904651641846, - "316": 2.5666661262512207, - "317": 2.5081264972686768, - "318": 2.510794162750244, - "319": 2.504138946533203, - "320": 2.492478847503662, - "321": 2.51399827003479, - "322": 2.5444583892822266, - "323": 2.520963430404663 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 4.09636, - 3.81527, - 3.80615, - 4.00631, - 3.7565, - 3.75635, - 4.22968, - 4.67788, - 4.88528, - 4.95332, - 5.01477, - 4.9186, - 4.64632, - 4.73859, - 5.24481, - 4.90177, - 4.46136, - 3.29719, - 3.14807, - 4.09797, - 4.96362, - 4.17774, - 3.9288, - 4.49218, - 5.25584, - 4.99694, - 4.23777, - 4.40887, - 3.90069, - 3.52111, - 2.82312, - 3.49929, - 5.21871, - 8.20143, - 8.04141, - 7.58765, - 6.30965, - 5.17975, - 5.01031, - 5.798, - 6.66734, - 5.96668, - 5.36785, - 4.14514, - 3.73294, - 3.24674, - 3.43785, - 4.52174, - 4.79364, - 4.95876, - 4.4207, - 3.50785, - 3.22075, - 4.15446 - ], - "train_epoch_time": 5.054446697235107, - "train_loss": 2.4865550700013914, - "train_score": 0.2752634057783157, - "val_loss": 2.5258698274566442, - "val_score": 0.26641432295984296 - }, - { - "epoch": 6, - "grad_norm": 0.7137139439582825, - "learning_rate": 0.464, - "model_norm": 87.4074478149414, - "step_logs": { - "grad_norm": { - "324": 0.6271835565567017, - "325": 0.5162777900695801, - "326": 0.45463573932647705, - "327": 0.48428601026535034, - "328": 0.5417293310165405, - "329": 0.5998635292053223, - "330": 0.7049420475959778, - "331": 1.0166071653366089, - "332": 0.9228875041007996, - "333": 0.691796600818634, - "334": 0.6814365386962891, - "335": 0.6703628897666931, - "336": 0.7252383232116699, - "337": 0.8871849179267883, - "338": 0.879819929599762, - "339": 0.757430911064148, - "340": 0.6660841107368469, - "341": 0.6151532530784607, - "342": 0.670251727104187, - "343": 0.7393510937690735, - "344": 0.7295030355453491, - "345": 0.7082820534706116, - "346": 0.7068021297454834, - "347": 0.6874851584434509, - "348": 0.7131623029708862, - "349": 0.8159708380699158, - "350": 0.8107249736785889, - "351": 0.7199427485466003, - "352": 0.7256243228912354, - "353": 0.6989573240280151, - "354": 0.7428707480430603, - "355": 0.7518157362937927, - "356": 0.6944369077682495, - "357": 0.709665834903717, - "358": 0.8446700572967529, - "359": 0.766253650188446, - "360": 0.6400922536849976, - "361": 0.6063502430915833, - "362": 0.6322147250175476, - "363": 0.7301038503646851, - "364": 0.935541033744812, - "365": 0.9347594976425171, - "366": 0.8508215546607971, - "367": 1.0017222166061401, - "368": 0.8770847916603088, - "369": 0.8094735741615295, - "370": 0.8130273818969727, - "371": 0.8326973915100098, - "372": 0.805537760257721, - "373": 0.7913177609443665, - "374": 0.7684414386749268, - "375": 0.7492300868034363, - "376": 0.8499711751937866, - "377": 0.7137139439582825 - }, - "loss": { - "324": 2.4609580039978027, - "325": 2.486044406890869, - "326": 2.4552671909332275, - "327": 2.456817865371704, - "328": 2.468639612197876, - "329": 2.439281940460205, - "330": 2.4610490798950195, - "331": 2.5160906314849854, - "332": 2.5812013149261475, - "333": 2.487825393676758, - "334": 2.4659054279327393, - "335": 2.457551956176758, - "336": 2.4987754821777344, - "337": 2.497960090637207, - "338": 2.5343010425567627, - "339": 2.4975650310516357, - "340": 2.482572317123413, - "341": 2.4568405151367188, - "342": 2.476325035095215, - "343": 2.4544246196746826, - "344": 2.4693198204040527, - "345": 2.4664196968078613, - "346": 2.4847562313079834, - "347": 2.475238084793091, - "348": 2.4955506324768066, - "349": 2.4817051887512207, - "350": 2.5186798572540283, - "351": 2.4716553688049316, - "352": 2.469747543334961, - "353": 2.458035707473755, - "354": 2.455958127975464, - "355": 2.457535743713379, - "356": 2.4468395709991455, - "357": 2.435042381286621, - "358": 2.4795143604278564, - "359": 2.466731071472168, - "360": 2.4289627075195312, - "361": 2.4095346927642822, - "362": 2.4092440605163574, - "363": 2.430191993713379, - "364": 2.440250873565674, - "365": 2.496913194656372, - "366": 2.4840247631073, - "367": 2.447493553161621, - "368": 2.5323054790496826, - "369": 2.428744316101074, - "370": 2.4415202140808105, - "371": 2.4556589126586914, - "372": 2.4467263221740723, - "373": 2.421767234802246, - "374": 2.431647777557373, - "375": 2.406076669692993, - "376": 2.4108290672302246, - "377": 2.4239096641540527 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 6.25626, - 9.327, - 11.8788, - 10.4754, - 8.41188, - 6.77887, - 4.95237, - 2.43456, - 3.03057, - 5.19832, - 5.31038, - 5.46868, - 4.75079, - 3.17364, - 3.27394, - 4.35342, - 5.59556, - 6.49248, - 5.51229, - 4.49002, - 4.64005, - 4.91648, - 4.9738, - 5.23709, - 4.9067, - 3.72736, - 3.832, - 4.76861, - 4.69061, - 5.03138, - 4.45035, - 4.34787, - 5.07388, - 4.83503, - 3.4753, - 4.20123, - 5.92838, - 6.55369, - 6.0277, - 4.55902, - 2.7881, - 2.85761, - 3.43146, - 2.43909, - 3.2918, - 3.70661, - 3.6936, - 3.54155, - 3.77063, - 3.8675, - 4.11793, - 4.28627, - 3.33701, - 4.75848 - ], - "train_epoch_time": 5.054785490036011, - "train_loss": 2.387372040919628, - "train_score": 0.287844108603093, - "val_loss": 2.4200390195463335, - "val_score": 0.28176126547850644 - }, - { - "epoch": 7, - "grad_norm": 0.7057314515113831, - "learning_rate": 0.464, - "model_norm": 87.4571304321289, - "step_logs": { - "grad_norm": { - "378": 0.6375489234924316, - "379": 0.8574041724205017, - "380": 0.9480672478675842, - "381": 1.0023635625839233, - "382": 0.8999370336532593, - "383": 0.7423200607299805, - "384": 0.7463111877441406, - "385": 0.8375042676925659, - "386": 0.9662033319473267, - "387": 0.8519492745399475, - "388": 0.7708650231361389, - "389": 0.8508585691452026, - "390": 0.7910328507423401, - "391": 0.8769818544387817, - "392": 0.7972829937934875, - "393": 0.649968147277832, - "394": 0.6651025414466858, - "395": 0.7307669520378113, - "396": 0.879423975944519, - "397": 0.9184058308601379, - "398": 1.015202283859253, - "399": 0.9219325184822083, - "400": 0.8425905704498291, - "401": 0.9114477038383484, - "402": 0.8427307605743408, - "403": 0.6758178472518921, - "404": 0.6432573795318604, - "405": 0.6658771634101868, - "406": 0.7152196168899536, - "407": 0.7890692353248596, - "408": 1.0599589347839355, - "409": 0.8242237567901611, - "410": 0.9556509852409363, - "411": 1.124696969985962, - "412": 1.0104082822799683, - "413": 0.8460323214530945, - "414": 0.7274619936943054, - "415": 0.8518417477607727, - "416": 1.1824973821640015, - "417": 1.078783631324768, - "418": 0.7930795550346375, - "419": 0.6923499703407288, - "420": 0.7985798716545105, - "421": 0.7141899466514587, - "422": 0.5975543856620789, - "423": 0.6306990385055542, - "424": 0.803292989730835, - "425": 0.9265243411064148, - "426": 1.0233440399169922, - "427": 0.7907835245132446, - "428": 0.5657371282577515, - "429": 0.5865322947502136, - "430": 0.6819055676460266, - "431": 0.7057314515113831 - }, - "loss": { - "378": 2.388603687286377, - "379": 2.380326271057129, - "380": 2.4764041900634766, - "381": 2.406127691268921, - "382": 2.4708681106567383, - "383": 2.38834810256958, - "384": 2.3860855102539062, - "385": 2.380411148071289, - "386": 2.4289848804473877, - "387": 2.437363386154175, - "388": 2.4080100059509277, - "389": 2.3924927711486816, - "390": 2.4215683937072754, - "391": 2.381993293762207, - "392": 2.427151679992676, - "393": 2.3666839599609375, - "394": 2.3613901138305664, - "395": 2.342604398727417, - "396": 2.4052934646606445, - "397": 2.409193992614746, - "398": 2.4350767135620117, - "399": 2.414005756378174, - "400": 2.4030091762542725, - "401": 2.3974595069885254, - "402": 2.4292213916778564, - "403": 2.3631086349487305, - "404": 2.3518929481506348, - "405": 2.3557896614074707, - "406": 2.370086193084717, - "407": 2.3623900413513184, - "408": 2.3709654808044434, - "409": 2.4475955963134766, - "410": 2.4127299785614014, - "411": 2.4374356269836426, - "412": 2.445082187652588, - "413": 2.4067864418029785, - "414": 2.368389129638672, - "415": 2.3836231231689453, - "416": 2.405555248260498, - "417": 2.504934787750244, - "418": 2.368527889251709, - "419": 2.375561237335205, - "420": 2.331961154937744, - "421": 2.3814916610717773, - "422": 2.328709363937378, - "423": 2.329042673110962, - "424": 2.345515251159668, - "425": 2.390723705291748, - "426": 2.384331703186035, - "427": 2.4061336517333984, - "428": 2.3169169425964355, - "429": 2.314420700073242, - "430": 2.3164196014404297, - "431": 2.3172173500061035 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 5.87648, - 3.23791, - 2.75514, - 2.39479, - 3.05088, - 4.33426, - 4.28397, - 3.39373, - 2.60188, - 3.35809, - 4.0523, - 3.30473, - 3.86997, - 3.09713, - 3.81832, - 5.60217, - 5.33815, - 4.38673, - 3.11008, - 2.85629, - 2.36269, - 2.84014, - 3.38472, - 2.88594, - 3.4205, - 5.17398, - 5.68392, - 5.3131, - 4.63325, - 3.79421, - 2.11031, - 3.60288, - 2.64186, - 1.92691, - 2.39497, - 3.36251, - 4.47541, - 3.28488, - 1.72034, - 2.15242, - 3.76569, - 4.95581, - 3.65666, - 4.66898, - 6.52169, - 5.85508, - 3.63488, - 2.78494, - 2.27679, - 3.84773, - 7.23904, - 6.72757, - 4.9816, - 4.65252 - ], - "train_epoch_time": 5.0608069896698, - "train_loss": 2.3236168134161184, - "train_score": 0.3147282997368056, - "val_loss": 2.370218958564522, - "val_score": 0.30544991440532127 - }, - { - "epoch": 8, - "grad_norm": 0.7709482312202454, - "learning_rate": 0.464, - "model_norm": 87.51288604736328, - "step_logs": { - "grad_norm": { - "432": 0.7742019891738892, - "433": 0.8048451542854309, - "434": 0.8584392070770264, - "435": 0.9014288783073425, - "436": 0.9329814910888672, - "437": 0.7818324565887451, - "438": 0.6735311150550842, - "439": 0.7207943797111511, - "440": 0.8359708189964294, - "441": 1.0091593265533447, - "442": 0.9774025082588196, - "443": 0.9620204567909241, - "444": 1.1691254377365112, - "445": 0.9814362525939941, - "446": 0.8438099026679993, - "447": 0.676818311214447, - "448": 0.754360556602478, - "449": 0.7340006232261658, - "450": 0.734951376914978, - "451": 0.7800947427749634, - "452": 0.7486482262611389, - "453": 0.7244609594345093, - "454": 0.7954311966896057, - "455": 0.8835601210594177, - "456": 0.8508532047271729, - "457": 0.8095985651016235, - "458": 0.855453610420227, - "459": 0.8926812410354614, - "460": 0.8820421099662781, - "461": 0.820753276348114, - "462": 0.7099539637565613, - "463": 0.6721706390380859, - "464": 0.7528849840164185, - "465": 0.8814135193824768, - "466": 0.8815662860870361, - "467": 0.7790881395339966, - "468": 0.7338931560516357, - "469": 0.7409322261810303, - "470": 0.7974900603294373, - "471": 0.8311624526977539, - "472": 0.8229005336761475, - "473": 0.8401023745536804, - "474": 0.8360876441001892, - "475": 0.8216034173965454, - "476": 0.8300091624259949, - "477": 0.8260737657546997, - "478": 1.0198419094085693, - "479": 0.8955154418945312, - "480": 0.8397411108016968, - "481": 1.089308738708496, - "482": 0.9894914627075195, - "483": 0.8322555422782898, - "484": 0.8540805578231812, - "485": 0.7709482312202454 - }, - "loss": { - "432": 2.3349180221557617, - "433": 2.3573460578918457, - "434": 2.3331007957458496, - "435": 2.385509967803955, - "436": 2.362790107727051, - "437": 2.3727855682373047, - "438": 2.3391332626342773, - "439": 2.3124806880950928, - "440": 2.353797674179077, - "441": 2.3675384521484375, - "442": 2.386098861694336, - "443": 2.3554799556732178, - "444": 2.3905320167541504, - "445": 2.422302007675171, - "446": 2.3350419998168945, - "447": 2.3125033378601074, - "448": 2.2823407649993896, - "449": 2.3381686210632324, - "450": 2.318347930908203, - "451": 2.322920083999634, - "452": 2.290309429168701, - "453": 2.3206136226654053, - "454": 2.3229427337646484, - "455": 2.345212459564209, - "456": 2.3333492279052734, - "457": 2.3499529361724854, - "458": 2.3212950229644775, - "459": 2.3592188358306885, - "460": 2.3115272521972656, - "461": 2.342606544494629, - "462": 2.287214994430542, - "463": 2.299330949783325, - "464": 2.286092758178711, - "465": 2.3428871631622314, - "466": 2.3253884315490723, - "467": 2.3341774940490723, - "468": 2.284053087234497, - "469": 2.3118135929107666, - "470": 2.2840709686279297, - "471": 2.315932512283325, - "472": 2.3071727752685547, - "473": 2.3133482933044434, - "474": 2.297257900238037, - "475": 2.2959394454956055, - "476": 2.2795228958129883, - "477": 2.307526111602783, - "478": 2.3175907135009766, - "479": 2.36483097076416, - "480": 2.3247885704040527, - "481": 2.3374457359313965, - "482": 2.3501124382019043, - "483": 2.3068530559539795, - "484": 2.3033838272094727, - "485": 2.3385672569274902 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 3.8955, - 3.63914, - 3.16603, - 2.93574, - 2.71443, - 3.88178, - 5.15632, - 4.45098, - 3.36811, - 2.32476, - 2.49771, - 2.54513, - 1.74893, - 2.5148, - 3.27948, - 5.04822, - 4.01072, - 4.33993, - 4.29202, - 3.81715, - 4.08638, - 4.42153, - 3.67141, - 3.00407, - 3.22308, - 3.58525, - 3.17203, - 2.96057, - 2.97112, - 3.47756, - 4.53781, - 5.08912, - 4.03308, - 3.01573, - 2.99217, - 3.84557, - 4.24073, - 4.2111, - 3.59136, - 3.35239, - 3.4071, - 3.27776, - 3.28629, - 3.40123, - 3.30886, - 3.3815, - 2.22829, - 2.94886, - 3.2968, - 1.96988, - 2.40029, - 3.33048, - 3.15768, - 3.93459 - ], - "train_epoch_time": 5.0537073612213135, - "train_loss": 2.280266422520751, - "train_score": 0.33841911751878484, - "val_loss": 2.3427316233703928, - "val_score": 0.324007965393373 - }, - { - "epoch": 9, - "grad_norm": 1.2686938047409058, - "learning_rate": 0.464, - "model_norm": 87.5705795288086, - "step_logs": { - "grad_norm": { - "486": 0.7851659059524536, - "487": 0.8902822732925415, - "488": 0.9662135243415833, - "489": 0.9832495450973511, - "490": 0.8648109436035156, - "491": 0.7941068410873413, - "492": 0.7695399522781372, - "493": 0.7362545132637024, - "494": 0.7355393171310425, - "495": 0.8485167622566223, - "496": 0.8725716471672058, - "497": 0.7784942984580994, - "498": 0.7393256425857544, - "499": 0.7618834376335144, - "500": 0.7379514575004578, - "501": 0.763367235660553, - "502": 0.7525189518928528, - "503": 0.6701223850250244, - "504": 0.721518874168396, - "505": 0.7934356331825256, - "506": 0.8603341579437256, - "507": 0.9990874528884888, - "508": 1.0042060613632202, - "509": 0.8474621772766113, - "510": 0.749873161315918, - "511": 0.6934238076210022, - "512": 0.7034761309623718, - "513": 0.8546044826507568, - "514": 0.8088599443435669, - "515": 0.6858404874801636, - "516": 0.6820484399795532, - "517": 0.7325760722160339, - "518": 0.8107496500015259, - "519": 0.8114433884620667, - "520": 0.7677037715911865, - "521": 0.7214632034301758, - "522": 0.6353814005851746, - "523": 0.7185224294662476, - "524": 0.8090643286705017, - "525": 0.7631515860557556, - "526": 0.7186344861984253, - "527": 0.8085775375366211, - "528": 0.9327294826507568, - "529": 0.9190691709518433, - "530": 0.9887126088142395, - "531": 0.9865207672119141, - "532": 0.9222756028175354, - "533": 0.8012776374816895, - "534": 0.6898269653320312, - "535": 0.6873668432235718, - "536": 0.7523880004882812, - "537": 0.8484282493591309, - "538": 1.0232796669006348, - "539": 1.2686938047409058 - }, - "loss": { - "486": 2.2811126708984375, - "487": 2.3104922771453857, - "488": 2.311701536178589, - "489": 2.3457655906677246, - "490": 2.2997727394104004, - "491": 2.3074018955230713, - "492": 2.2888741493225098, - "493": 2.26469087600708, - "494": 2.2830967903137207, - "495": 2.292107582092285, - "496": 2.310253620147705, - "497": 2.282397985458374, - "498": 2.2857651710510254, - "499": 2.224403142929077, - "500": 2.279804229736328, - "501": 2.271515130996704, - "502": 2.291240930557251, - "503": 2.265056848526001, - "504": 2.2835335731506348, - "505": 2.2513251304626465, - "506": 2.279625177383423, - "507": 2.294560432434082, - "508": 2.319828510284424, - "509": 2.2903213500976562, - "510": 2.287503719329834, - "511": 2.233738899230957, - "512": 2.2568113803863525, - "513": 2.2923905849456787, - "514": 2.3041679859161377, - "515": 2.2597708702087402, - "516": 2.233388900756836, - "517": 2.2521467208862305, - "518": 2.2564029693603516, - "519": 2.276836395263672, - "520": 2.2647783756256104, - "521": 2.255136251449585, - "522": 2.237454891204834, - "523": 2.2669026851654053, - "524": 2.258902072906494, - "525": 2.272812843322754, - "526": 2.254998207092285, - "527": 2.226747989654541, - "528": 2.261782169342041, - "529": 2.2746810913085938, - "530": 2.3026933670043945, - "531": 2.3066163063049316, - "532": 2.287996292114258, - "533": 2.289806842803955, - "534": 2.2100539207458496, - "535": 2.2068634033203125, - "536": 2.2271270751953125, - "537": 2.24106502532959, - "538": 2.2470970153808594, - "539": 2.2986319065093994 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 3.70019, - 2.91507, - 2.4762, - 2.42637, - 3.07498, - 3.65902, - 3.86509, - 4.17785, - 4.22, - 3.18357, - 3.03429, - 3.766, - 4.18177, - 3.8321, - 4.18641, - 3.89806, - 4.04609, - 5.04395, - 4.38644, - 3.57614, - 3.07985, - 2.29875, - 2.30044, - 3.18901, - 4.06805, - 4.64553, - 4.56033, - 3.13876, - 3.52182, - 4.80417, - 4.80102, - 4.19654, - 3.43276, - 3.45792, - 3.84272, - 4.33256, - 5.54224, - 4.39089, - 3.45089, - 3.90249, - 4.36647, - 3.40587, - 2.5998, - 2.69292, - 2.35557, - 2.37008, - 2.68989, - 3.56642, - 4.64432, - 4.67088, - 3.93424, - 3.11332, - 2.14602, - 1.42809 - ], - "train_epoch_time": 5.053997039794922, - "train_loss": 2.3445621073844616, - "train_score": 0.32579582139443464, - "val_loss": 2.4186753881785132, - "val_score": 0.3054364599398713 - }, - { - "epoch": 10, - "grad_norm": 0.9750382900238037, - "learning_rate": 0.464, - "model_norm": 87.63328552246094, - "step_logs": { - "grad_norm": { - "540": 1.1835196018218994, - "541": 0.94350665807724, - "542": 0.9540534019470215, - "543": 1.0197839736938477, - "544": 1.2302792072296143, - "545": 1.2041865587234497, - "546": 1.120396375656128, - "547": 0.8945721983909607, - "548": 0.7712628245353699, - "549": 0.7412620782852173, - "550": 0.7946042418479919, - "551": 0.8093903064727783, - "552": 0.7556217908859253, - "553": 0.7436901926994324, - "554": 0.7568050026893616, - "555": 0.7321219444274902, - "556": 0.7226343750953674, - "557": 0.7480760216712952, - "558": 0.7279438972473145, - "559": 0.7623870372772217, - "560": 0.8186307549476624, - "561": 0.7554833889007568, - "562": 0.7386659383773804, - "563": 0.7012243866920471, - "564": 0.6587784290313721, - "565": 0.7076345086097717, - "566": 0.7385809421539307, - "567": 0.7746321558952332, - "568": 0.7839486002922058, - "569": 0.7919442653656006, - "570": 0.7336534261703491, - "571": 0.6945879459381104, - "572": 0.7922754287719727, - "573": 0.8719609975814819, - "574": 0.8545740246772766, - "575": 0.8933283090591431, - "576": 0.9756830334663391, - "577": 0.9009069204330444, - "578": 0.8197300434112549, - "579": 0.7380638122558594, - "580": 0.6490558981895447, - "581": 0.6385520100593567, - "582": 0.7874158620834351, - "583": 0.8764427304267883, - "584": 0.8372117877006531, - "585": 0.7458502650260925, - "586": 0.7517497539520264, - "587": 0.7369568347930908, - "588": 0.7382214069366455, - "589": 0.7276107668876648, - "590": 0.7043472528457642, - "591": 0.766448974609375, - "592": 0.8357090353965759, - "593": 0.9750382900238037 - }, - "loss": { - "540": 2.3539493083953857, - "541": 2.2981619834899902, - "542": 2.2958292961120605, - "543": 2.322713613510132, - "544": 2.2743759155273438, - "545": 2.34297776222229, - "546": 2.334174156188965, - "547": 2.297511100769043, - "548": 2.248453140258789, - "549": 2.232665538787842, - "550": 2.2525105476379395, - "551": 2.2676198482513428, - "552": 2.2209653854370117, - "553": 2.232907772064209, - "554": 2.2223243713378906, - "555": 2.2527878284454346, - "556": 2.2135627269744873, - "557": 2.2180428504943848, - "558": 2.1899607181549072, - "559": 2.2403485774993896, - "560": 2.1900129318237305, - "561": 2.2425734996795654, - "562": 2.2281856536865234, - "563": 2.2081387042999268, - "564": 2.2237088680267334, - "565": 2.1757054328918457, - "566": 2.1926674842834473, - "567": 2.2095744609832764, - "568": 2.2260971069335938, - "569": 2.2344472408294678, - "570": 2.2290022373199463, - "571": 2.2139875888824463, - "572": 2.2216098308563232, - "573": 2.253282308578491, - "574": 2.2164113521575928, - "575": 2.213837146759033, - "576": 2.2379891872406006, - "577": 2.257227897644043, - "578": 2.2344202995300293, - "579": 2.2295289039611816, - "580": 2.1947102546691895, - "581": 2.185819625854492, - "582": 2.213284492492676, - "583": 2.2145490646362305, - "584": 2.2354774475097656, - "585": 2.1978392601013184, - "586": 2.1795501708984375, - "587": 2.1730775833129883, - "588": 2.193183183670044, - "589": 2.1779942512512207, - "590": 2.195094585418701, - "591": 2.188117504119873, - "592": 2.2057876586914062, - "593": 2.2147738933563232 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 1.68053, - 2.58161, - 2.52229, - 2.23347, - 1.50264, - 1.61577, - 1.85947, - 2.87096, - 3.77989, - 4.06331, - 3.56751, - 3.46142, - 3.88985, - 4.03726, - 3.88007, - 4.20294, - 4.23891, - 3.9635, - 4.13276, - 3.85447, - 3.26791, - 3.92913, - 4.08371, - 4.49068, - 5.12388, - 4.34492, - 4.01954, - 3.68229, - 3.62217, - 3.56271, - 4.14122, - 4.58903, - 3.53928, - 2.96361, - 3.03495, - 2.77411, - 2.35093, - 2.78109, - 3.32524, - 4.09284, - 5.2097, - 5.3607, - 3.56968, - 2.88296, - 3.18933, - 3.95087, - 3.85674, - 4.00121, - 4.0244, - 4.11395, - 4.42466, - 3.72481, - 3.1583, - 2.32963 - ], - "train_epoch_time": 5.053994417190552, - "train_loss": 2.26130206978304, - "train_score": 0.32888495330317974, - "val_loss": 2.342782899514953, - "val_score": 0.30547233848018845 - }, - { - "epoch": 11, - "grad_norm": 0.8273022174835205, - "learning_rate": 0.464, - "model_norm": 87.70013427734375, - "step_logs": { - "grad_norm": { - "594": 1.0164622068405151, - "595": 0.978763222694397, - "596": 0.9974223971366882, - "597": 0.9824926257133484, - "598": 0.9369617700576782, - "599": 0.9179539680480957, - "600": 0.9198263883590698, - "601": 0.9800050854682922, - "602": 0.9910167455673218, - "603": 1.0632083415985107, - "604": 0.9760513305664062, - "605": 0.9206082224845886, - "606": 0.9623233079910278, - "607": 0.944540798664093, - "608": 0.7439101338386536, - "609": 0.679442822933197, - "610": 0.7358267903327942, - "611": 0.8010754585266113, - "612": 0.7818032503128052, - "613": 0.7321988344192505, - "614": 0.8337776064872742, - "615": 0.9823498129844666, - "616": 0.9329684972763062, - "617": 0.975547194480896, - "618": 0.9363654851913452, - "619": 0.7973529696464539, - "620": 0.7889009714126587, - "621": 0.8207709789276123, - "622": 0.7937414646148682, - "623": 0.7389844059944153, - "624": 0.7226306200027466, - "625": 0.7109134197235107, - "626": 0.7178105711936951, - "627": 0.7838050127029419, - "628": 0.8162763118743896, - "629": 0.775202751159668, - "630": 0.6732639074325562, - "631": 0.6817336082458496, - "632": 0.7645159363746643, - "633": 0.8073098063468933, - "634": 0.931305468082428, - "635": 0.9899008274078369, - "636": 0.9269702434539795, - "637": 0.8649294972419739, - "638": 0.7406699657440186, - "639": 0.7121018767356873, - "640": 0.7555528879165649, - "641": 0.777006208896637, - "642": 0.7744395732879639, - "643": 0.8030886054039001, - "644": 0.8008630275726318, - "645": 0.7854599952697754, - "646": 0.805654764175415, - "647": 0.8273022174835205 - }, - "loss": { - "594": 2.2600948810577393, - "595": 2.260560989379883, - "596": 2.2460503578186035, - "597": 2.2132647037506104, - "598": 2.213008403778076, - "599": 2.1883063316345215, - "600": 2.2109174728393555, - "601": 2.2285168170928955, - "602": 2.251828670501709, - "603": 2.2240641117095947, - "604": 2.242880344390869, - "605": 2.2348902225494385, - "606": 2.2197818756103516, - "607": 2.222428560256958, - "608": 2.182300567626953, - "609": 2.184536933898926, - "610": 2.1794681549072266, - "611": 2.1533432006835938, - "612": 2.201977491378784, - "613": 2.161073684692383, - "614": 2.1864712238311768, - "615": 2.2045974731445312, - "616": 2.236830234527588, - "617": 2.22475528717041, - "618": 2.2685189247131348, - "619": 2.211355686187744, - "620": 2.182935953140259, - "621": 2.196930408477783, - "622": 2.1796860694885254, - "623": 2.153675079345703, - "624": 2.172628402709961, - "625": 2.1341538429260254, - "626": 2.159029483795166, - "627": 2.189436197280884, - "628": 2.1797616481781006, - "629": 2.1593220233917236, - "630": 2.1281826496124268, - "631": 2.1284921169281006, - "632": 2.1576924324035645, - "633": 2.1742377281188965, - "634": 2.159294605255127, - "635": 2.198235034942627, - "636": 2.2072763442993164, - "637": 2.197878837585449, - "638": 2.1755714416503906, - "639": 2.115135669708252, - "640": 2.150275707244873, - "641": 2.1557679176330566, - "642": 2.154815673828125, - "643": 2.165224552154541, - "644": 2.148066759109497, - "645": 2.156641960144043, - "646": 2.171790838241577, - "647": 2.1523656845092773 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 2.18748, - 2.35972, - 2.25767, - 2.29285, - 2.52081, - 2.59697, - 2.61313, - 2.32038, - 2.29284, - 1.96748, - 2.35429, - 2.63698, - 2.397, - 2.49107, - 3.94343, - 4.7321, - 4.02531, - 3.35557, - 3.60262, - 4.03099, - 3.14516, - 2.28453, - 2.5698, - 2.33768, - 2.58733, - 3.47822, - 3.50749, - 3.26116, - 3.45968, - 3.94375, - 4.16057, - 4.22272, - 4.19024, - 3.56382, - 3.27141, - 3.59324, - 4.69503, - 4.57976, - 3.69162, - 3.336, - 2.48959, - 2.24332, - 2.56877, - 2.93794, - 3.96574, - 4.17113, - 3.76673, - 3.5707, - 3.59282, - 3.35719, - 3.34912, - 3.49567, - 3.34595, - 3.14476 - ], - "train_epoch_time": 5.0541229248046875, - "train_loss": 2.1574668594207105, - "train_score": 0.3588986280487805, - "val_loss": 2.25482671898624, - "val_score": 0.33570877648248465 - }, - { - "epoch": 12, - "grad_norm": 0.5213214159011841, - "learning_rate": 0.464, - "model_norm": 87.76070404052734, - "step_logs": { - "grad_norm": { - "648": 0.7569950819015503, - "649": 0.7735424637794495, - "650": 0.792700469493866, - "651": 0.768092930316925, - "652": 0.7337979078292847, - "653": 0.6889377236366272, - "654": 0.714954674243927, - "655": 0.7085017561912537, - "656": 0.6625272631645203, - "657": 0.5941329002380371, - "658": 0.5833716988563538, - "659": 0.6013264656066895, - "660": 0.6253089308738708, - "661": 0.6521468162536621, - "662": 0.6336640119552612, - "663": 0.6292108297348022, - "664": 0.6899257898330688, - "665": 0.7523852586746216, - "666": 0.7956793308258057, - "667": 0.7607225179672241, - "668": 0.7084807753562927, - "669": 0.6430822610855103, - "670": 0.6105523705482483, - "671": 0.6632283926010132, - "672": 0.6920416355133057, - "673": 0.6887260675430298, - "674": 0.6674964427947998, - "675": 0.6178245544433594, - "676": 0.6420121192932129, - "677": 0.7029278874397278, - "678": 0.6824374794960022, - "679": 0.6836779713630676, - "680": 0.6397414207458496, - "681": 0.5898133516311646, - "682": 0.6011072993278503, - "683": 0.6327524185180664, - "684": 0.6570475697517395, - "685": 0.653169572353363, - "686": 0.5740808248519897, - "687": 0.5248444676399231, - "688": 0.52183598279953, - "689": 0.5088307857513428, - "690": 0.5654743313789368, - "691": 0.5853663682937622, - "692": 0.5990533828735352, - "693": 0.596141517162323, - "694": 0.6164954304695129, - "695": 0.6716930866241455, - "696": 0.6152611374855042, - "697": 0.5288187861442566, - "698": 0.5166124701499939, - "699": 0.5450300574302673, - "700": 0.5482425689697266, - "701": 0.5213214159011841 - }, - "loss": { - "648": 2.1507115364074707, - "649": 2.142016887664795, - "650": 2.1239571571350098, - "651": 2.1406073570251465, - "652": 2.1432909965515137, - "653": 2.1183032989501953, - "654": 2.1332972049713135, - "655": 2.136540412902832, - "656": 2.130563735961914, - "657": 2.1043756008148193, - "658": 2.102400779724121, - "659": 2.1108832359313965, - "660": 2.112490177154541, - "661": 2.0898590087890625, - "662": 2.1311960220336914, - "663": 2.1345791816711426, - "664": 2.114157199859619, - "665": 2.1389048099517822, - "666": 2.105990409851074, - "667": 2.086348056793213, - "668": 2.120601177215576, - "669": 2.1081249713897705, - "670": 2.0916836261749268, - "671": 2.1078715324401855, - "672": 2.1157338619232178, - "673": 2.07753324508667, - "674": 2.1002273559570312, - "675": 2.0919601917266846, - "676": 2.053832530975342, - "677": 2.0911052227020264, - "678": 2.0771384239196777, - "679": 2.106577157974243, - "680": 2.091855525970459, - "681": 2.091642379760742, - "682": 2.0712103843688965, - "683": 2.0842442512512207, - "684": 2.0857014656066895, - "685": 2.081791400909424, - "686": 2.0966548919677734, - "687": 2.110111713409424, - "688": 2.063055992126465, - "689": 2.052750587463379, - "690": 2.0683670043945312, - "691": 2.07271146774292, - "692": 2.038562297821045, - "693": 2.030576229095459, - "694": 2.0623843669891357, - "695": 2.085425853729248, - "696": 2.0528316497802734, - "697": 2.0866293907165527, - "698": 2.0487313270568848, - "699": 2.0785648822784424, - "700": 2.0680387020111084, - "701": 2.0458428859710693 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 3.75315, - 3.57977, - 3.38008, - 3.62835, - 3.98041, - 4.46301, - 4.17344, - 4.25627, - 4.85386, - 5.96151, - 6.17767, - 5.83772, - 5.40263, - 4.9139, - 5.30769, - 5.39163, - 4.44153, - 3.77843, - 3.32644, - 3.60524, - 4.22477, - 5.09757, - 5.61113, - 4.79201, - 4.4177, - 4.37981, - 4.71377, - 5.48054, - 4.98286, - 4.23208, - 4.46005, - 4.50686, - 5.1112, - 6.01254, - 5.73219, - 5.20572, - 4.83124, - 4.87961, - 6.36181, - 7.66027, - 7.57605, - 7.92847, - 6.46847, - 6.049, - 5.68058, - 5.71374, - 5.42638, - 4.62225, - 5.42293, - 7.46159, - 7.67636, - 6.99717, - 6.88039, - 7.52768 - ], - "train_epoch_time": 5.060282945632935, - "train_loss": 2.0488108048651106, - "train_score": 0.3966710007173601, - "val_loss": 2.156653976057208, - "val_score": 0.36659102342022 - }, - { - "epoch": 13, - "grad_norm": 0.3689773678779602, - "learning_rate": 0.3093333333333334, - "model_norm": 87.79572296142578, - "step_logs": { - "grad_norm": { - "702": 0.5364833474159241, - "703": 0.5259773135185242, - "704": 0.5002303719520569, - "705": 0.4888346791267395, - "706": 0.5394003391265869, - "707": 0.5595197081565857, - "708": 0.5669735074043274, - "709": 0.5195918679237366, - "710": 0.47284796833992004, - "711": 0.43037548661231995, - "712": 0.4000774323940277, - "713": 0.4237956404685974, - "714": 0.436426043510437, - "715": 0.46994078159332275, - "716": 0.43116238713264465, - "717": 0.4097455143928528, - "718": 0.3929866850376129, - "719": 0.39943069219589233, - "720": 0.3972277343273163, - "721": 0.38414469361305237, - "722": 0.41446948051452637, - "723": 0.4122876822948456, - "724": 0.3981236517429352, - "725": 0.43609535694122314, - "726": 0.4336734414100647, - "727": 0.42594480514526367, - "728": 0.4029075503349304, - "729": 0.3956875503063202, - "730": 0.3929194509983063, - "731": 0.409662663936615, - "732": 0.42363542318344116, - "733": 0.4122909605503082, - "734": 0.4097675085067749, - "735": 0.3712646961212158, - "736": 0.36579084396362305, - "737": 0.3617793917655945, - "738": 0.3636852204799652, - "739": 0.39375123381614685, - "740": 0.34693223237991333, - "741": 0.36366233229637146, - "742": 0.3651773929595947, - "743": 0.3666030466556549, - "744": 0.39372679591178894, - "745": 0.38121268153190613, - "746": 0.3903146982192993, - "747": 0.3828539252281189, - "748": 0.4013423025608063, - "749": 0.3672022223472595, - "750": 0.3636907935142517, - "751": 0.35521090030670166, - "752": 0.40065255761146545, - "753": 0.35991808772087097, - "754": 0.3769945502281189, - "755": 0.3689773678779602 - }, - "loss": { - "702": 2.034242868423462, - "703": 2.041623115539551, - "704": 2.015681266784668, - "705": 2.019620418548584, - "706": 2.045684576034546, - "707": 2.0498275756835938, - "708": 2.0647382736206055, - "709": 2.036111831665039, - "710": 2.041393280029297, - "711": 2.04914927482605, - "712": 2.036518096923828, - "713": 2.046377658843994, - "714": 2.0181939601898193, - "715": 2.0533552169799805, - "716": 2.0251524448394775, - "717": 2.048600196838379, - "718": 2.0302419662475586, - "719": 2.028761863708496, - "720": 2.031588554382324, - "721": 2.032290458679199, - "722": 2.0233216285705566, - "723": 2.0069386959075928, - "724": 2.020716905593872, - "725": 2.009347915649414, - "726": 2.027273178100586, - "727": 2.0027408599853516, - "728": 2.023124933242798, - "729": 2.033870220184326, - "730": 2.041031837463379, - "731": 1.9998836517333984, - "732": 2.045276403427124, - "733": 2.0177018642425537, - "734": 2.018979072570801, - "735": 2.0090126991271973, - "736": 2.0201165676116943, - "737": 1.996307373046875, - "738": 2.008244276046753, - "739": 2.030090093612671, - "740": 2.0360255241394043, - "741": 2.0037169456481934, - "742": 2.0351085662841797, - "743": 2.038628578186035, - "744": 2.0207865238189697, - "745": 2.034184455871582, - "746": 2.0122597217559814, - "747": 2.012249231338501, - "748": 2.008070468902588, - "749": 1.9660768508911133, - "750": 1.9985742568969727, - "751": 1.9919071197509766, - "752": 1.9824224710464478, - "753": 1.9904437065124512, - "754": 1.9913536310195923, - "755": 1.9934518337249756 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 7.0679, - 7.37975, - 8.0553, - 8.45173, - 7.03099, - 6.54767, - 6.42302, - 7.54183, - 9.13027, - 11.0631, - 12.7233, - 11.3939, - 10.596, - 9.29775, - 10.8937, - 12.2019, - 13.146, - 12.7159, - 12.8753, - 13.772, - 11.7782, - 11.8068, - 12.7488, - 10.5656, - 10.7792, - 11.0387, - 12.4627, - 12.9903, - 13.2203, - 11.9166, - 11.3964, - 11.87, - 12.0242, - 14.5752, - 15.0977, - 15.2525, - 15.1833, - 13.094, - 16.9159, - 15.1509, - 15.2609, - 15.1686, - 13.0356, - 13.9977, - 13.2085, - 13.7283, - 12.4666, - 14.5811, - 15.1097, - 15.7869, - 12.3498, - 15.3654, - 14.0113, - 14.6422 - ], - "train_epoch_time": 5.053859710693359, - "train_loss": 2.000632954191101, - "train_score": 0.4104801827985553, - "val_loss": 2.117544219907198, - "val_score": 0.3790273394020773 - }, - { - "epoch": 14, - "grad_norm": 0.3107752501964569, - "learning_rate": 0.1546666666666667, - "model_norm": 87.80721282958984, - "step_logs": { - "grad_norm": { - "756": 0.36391544342041016, - "757": 0.34061774611473083, - "758": 0.3373683989048004, - "759": 0.368813157081604, - "760": 0.37175610661506653, - "761": 0.3224388062953949, - "762": 0.30784475803375244, - "763": 0.3142504096031189, - "764": 0.3739141821861267, - "765": 0.3637954890727997, - "766": 0.34604036808013916, - "767": 0.3342316150665283, - "768": 0.35025209188461304, - "769": 0.33368217945098877, - "770": 0.35340237617492676, - "771": 0.3525720536708832, - "772": 0.3336862325668335, - "773": 0.3644021153450012, - "774": 0.320532888174057, - "775": 0.3363337516784668, - "776": 0.33224961161613464, - "777": 0.33082783222198486, - "778": 0.3233364522457123, - "779": 0.33373716473579407, - "780": 0.34446826577186584, - "781": 0.33136895298957825, - "782": 0.3461458683013916, - "783": 0.3803730607032776, - "784": 0.3377346694469452, - "785": 0.3358157277107239, - "786": 0.32283443212509155, - "787": 0.3645469546318054, - "788": 0.3507978618144989, - "789": 0.3115021884441376, - "790": 0.31787246465682983, - "791": 0.3238396644592285, - "792": 0.3129032850265503, - "793": 0.3185950517654419, - "794": 0.34452077746391296, - "795": 0.329192191362381, - "796": 0.3147062063217163, - "797": 0.3022286295890808, - "798": 0.3186566233634949, - "799": 0.3093697130680084, - "800": 0.3028464913368225, - "801": 0.30358558893203735, - "802": 0.3344113230705261, - "803": 0.29959699511528015, - "804": 0.3260193467140198, - "805": 0.34442007541656494, - "806": 0.3120967447757721, - "807": 0.3306928873062134, - "808": 0.34221121668815613, - "809": 0.3107752501964569 - }, - "loss": { - "756": 2.008708953857422, - "757": 1.992264986038208, - "758": 1.9933116436004639, - "759": 1.99192476272583, - "760": 1.9767273664474487, - "761": 2.0058794021606445, - "762": 2.0074713230133057, - "763": 1.9907655715942383, - "764": 1.9953372478485107, - "765": 1.9932351112365723, - "766": 2.018752098083496, - "767": 1.9948519468307495, - "768": 2.0240020751953125, - "769": 1.9772720336914062, - "770": 1.9919686317443848, - "771": 1.9680653810501099, - "772": 2.0073580741882324, - "773": 1.99609375, - "774": 2.01682710647583, - "775": 2.000037670135498, - "776": 1.9889997243881226, - "777": 1.9887484312057495, - "778": 2.012557029724121, - "779": 1.9965174198150635, - "780": 2.014468193054199, - "781": 1.9837143421173096, - "782": 1.9620920419692993, - "783": 2.006413221359253, - "784": 1.994457721710205, - "785": 2.009927272796631, - "786": 1.964850902557373, - "787": 1.9998902082443237, - "788": 2.0035266876220703, - "789": 2.0017571449279785, - "790": 1.9732887744903564, - "791": 1.9986650943756104, - "792": 1.9936249256134033, - "793": 2.0048770904541016, - "794": 2.004767894744873, - "795": 1.9931751489639282, - "796": 1.9738640785217285, - "797": 1.9997490644454956, - "798": 1.9952969551086426, - "799": 1.9724063873291016, - "800": 1.9803149700164795, - "801": 1.9830210208892822, - "802": 1.9854111671447754, - "803": 1.9689775705337524, - "804": 1.9684094190597534, - "805": 1.9876070022583008, - "806": 1.9952058792114258, - "807": 1.9629908800125122, - "808": 2.017622470855713, - "809": 2.006425380706787 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 15.1676, - 17.1717, - 17.5132, - 14.644, - 14.3031, - 19.2935, - 21.1829, - 20.159, - 14.2716, - 15.0607, - 16.8589, - 17.8573, - 16.4987, - 17.7583, - 15.9494, - 15.8323, - 18.028, - 15.0321, - 19.6301, - 17.6806, - 18.018, - 18.1709, - 19.2504, - 17.9252, - 16.977, - 18.0657, - 16.3757, - 13.8676, - 17.4853, - 17.8229, - 18.8525, - 15.0487, - 16.281, - 20.6295, - 19.5292, - 19.0581, - 20.3621, - 19.7519, - 16.8901, - 18.3927, - 19.93, - 21.893, - 19.65, - 20.6082, - 21.5918, - 21.5162, - 17.7537, - 21.9364, - 18.5195, - 16.7554, - 20.4837, - 17.9502, - 17.2287, - 20.7745 - ], - "train_epoch_time": 5.053775787353516, - "train_loss": 1.9869832968609233, - "train_score": 0.41368252341682293, - "val_loss": 2.108280881811639, - "val_score": 0.38268692625513306 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:28:56.414613", - "final_model_norm": 87.80721282958984, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:27:11.463805", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 9.828468322753906, - "learning_rate": 4.64e-11, - "model_norm": 87.36962890625, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.540498733520508, - "3": 8.915801048278809, - "4": 14.781766891479492, - "5": 5.608191013336182, - "6": 3.5889790058135986, - "7": 5.249990463256836, - "8": 3.8745484352111816, - "9": 4.497948169708252, - "10": 4.748721122741699, - "11": 4.648350715637207, - "12": 20.810983657836914, - "13": 6.429323673248291, - "14": 21.983104705810547, - "15": 3.9209253787994385, - "16": 6.288517951965332, - "17": 13.581155776977539, - "18": 5.620336055755615, - "19": 14.86163330078125, - "20": 9.477866172790527, - "21": 5.467764854431152, - "22": 3.5305988788604736, - "23": 10.763609886169434, - "24": 8.13704776763916, - "25": 5.859228610992432, - "26": 16.31954574584961, - "27": 7.764303207397461, - "28": 8.32188892364502, - "29": 10.810586929321289, - "30": 4.629059314727783, - "31": 7.128204345703125, - "32": 7.402172565460205, - "33": 8.087552070617676, - "34": 10.075281143188477, - "35": 3.894308090209961, - "36": 10.271742820739746, - "37": 4.768221378326416, - "38": 12.201920509338379, - "39": 7.188667297363281, - "40": 6.573385238647461, - "41": 12.998823165893555, - "42": 3.9970970153808594, - "43": 9.127283096313477, - "44": 10.654000282287598, - "45": 4.676833152770996, - "46": 12.401044845581055, - "47": 3.9859025478363037, - "48": 9.84560775756836, - "49": 7.28375768661499, - "50": 4.3989996910095215, - "51": 14.074361801147461, - "52": 5.319989204406738, - "53": 9.828468322753906 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.8002490997314453, - "3": 3.9038262367248535, - "4": 4.260810852050781, - "5": 4.260765075683594, - "6": 3.5228896141052246, - "7": 3.688533306121826, - "8": 4.044903755187988, - "9": 3.5027456283569336, - "10": 3.9566173553466797, - "11": 4.078937530517578, - "12": 4.282462120056152, - "13": 7.085672855377197, - "14": 4.148512840270996, - "15": 3.8898236751556396, - "16": 3.8796591758728027, - "17": 4.587935447692871, - "18": 3.8093762397766113, - "19": 4.920986652374268, - "20": 5.559451580047607, - "21": 4.693474292755127, - "22": 3.4429643154144287, - "23": 4.915096282958984, - "24": 5.12191915512085, - "25": 3.9754562377929688, - "26": 5.782840251922607, - "27": 4.330013275146484, - "28": 4.404635906219482, - "29": 4.462398052215576, - "30": 5.823443412780762, - "31": 4.089975357055664, - "32": 3.8508737087249756, - "33": 5.038377285003662, - "34": 4.2024688720703125, - "35": 3.9904799461364746, - "36": 6.611223220825195, - "37": 5.6425323486328125, - "38": 5.7696852684021, - "39": 4.86868953704834, - "40": 4.1282758712768555, - "41": 5.179817199707031, - "42": 4.502457618713379, - "43": 4.382997989654541, - "44": 5.242732524871826, - "45": 4.001960754394531, - "46": 6.762143135070801, - "47": 4.60350227355957, - "48": 6.042437553405762, - "49": 5.121825695037842, - "50": 4.212311744689941, - "51": 5.92720890045166, - "52": 4.657595157623291, - "53": 5.204258918762207 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 0.00869338, - 0.00874607, - 0.0888363, - 0.04911, - 0.0195002, - 0.13547, - 0.2735, - 0.133825, - 0.269442, - 0.173133, - 0.175457, - 0.188777, - 0.009888, - 0.171416, - 0.00858449, - 0.253019, - 0.0981063, - 0.0248739, - 0.120595, - 0.0222802, - 0.0618886, - 0.156991, - 0.276208, - 0.0424244, - 0.0773569, - 0.115799, - 0.0217133, - 0.0718264, - 0.0636013, - 0.038183, - 0.271766, - 0.0804934, - 0.0702815, - 0.0770294, - 0.041399, - 0.263126, - 0.0626605, - 0.248177, - 0.0387521, - 0.094214, - 0.0955412, - 0.0306554, - 0.281812, - 0.0526124, - 0.0461883, - 0.182965, - 0.0439711, - 0.289758, - 0.0623343, - 0.0965414, - 0.217677, - 0.0299222, - 0.164566, - 0.053875 - ], - "train_epoch_time": 5.054558753967285, - "train_loss": 4.656336928576958, - "train_score": 0.17824381281176804, - "val_loss": 4.700132458683544, - "val_score": 0.17385727603386245 - }, - { - "epoch": 1, - "grad_norm": 1.9219194650650024, - "learning_rate": 0.464, - "model_norm": 87.32701873779297, - "step_logs": { - "grad_norm": { - "54": 8.648448944091797, - "55": 4.578888416290283, - "56": 9.155689239501953, - "57": 4.598766326904297, - "58": 11.530369758605957, - "59": 11.030740737915039, - "60": 4.289649963378906, - "61": 10.072020530700684, - "62": 4.655407905578613, - "63": 6.099062442779541, - "64": 4.3996148109436035, - "65": 5.014732837677002, - "66": 6.065854549407959, - "67": 3.0276389122009277, - "68": 7.581371784210205, - "69": 2.718651533126831, - "70": 12.671808242797852, - "71": 5.8091840744018555, - "72": 5.4534735679626465, - "73": 2.60016131401062, - "74": 8.829605102539062, - "75": 4.44790506362915, - "76": 4.387836933135986, - "77": 5.095001697540283, - "78": 3.3428914546966553, - "79": 6.636009216308594, - "80": 7.214669704437256, - "81": 2.0348479747772217, - "82": 6.88657808303833, - "83": 2.659146785736084, - "84": 5.854611396789551, - "85": 3.974867105484009, - "86": 2.5644941329956055, - "87": 8.266683578491211, - "88": 4.98799467086792, - "89": 6.879557132720947, - "90": 2.684969902038574, - "91": 9.80031681060791, - "92": 7.215325832366943, - "93": 5.047442436218262, - "94": 5.031023025512695, - "95": 4.495306491851807, - "96": 4.4241228103637695, - "97": 3.1164910793304443, - "98": 9.714051246643066, - "99": 3.9311654567718506, - "100": 3.965953826904297, - "101": 3.0835049152374268, - "102": 6.171586990356445, - "103": 2.359495162963867, - "104": 1.8651447296142578, - "105": 2.323331832885742, - "106": 3.661193609237671, - "107": 1.9219194650650024 - }, - "loss": { - "54": 4.683562278747559, - "55": 4.135055065155029, - "56": 4.433767318725586, - "57": 3.9660756587982178, - "58": 6.08804988861084, - "59": 5.184176921844482, - "60": 3.7943224906921387, - "61": 4.71868896484375, - "62": 4.050958633422852, - "63": 4.403265476226807, - "64": 4.339849948883057, - "65": 3.511442184448242, - "66": 4.623457908630371, - "67": 3.6430869102478027, - "68": 4.774730682373047, - "69": 3.583538055419922, - "70": 6.695191383361816, - "71": 4.642904281616211, - "72": 3.9379208087921143, - "73": 4.109834671020508, - "74": 4.738090515136719, - "75": 3.473706007003784, - "76": 3.4843969345092773, - "77": 4.260203838348389, - "78": 3.5664355754852295, - "79": 4.1052961349487305, - "80": 5.061912536621094, - "81": 3.204319715499878, - "82": 4.00563383102417, - "83": 3.421499252319336, - "84": 4.289734840393066, - "85": 3.433037519454956, - "86": 3.4934334754943848, - "87": 4.548451900482178, - "88": 4.164238929748535, - "89": 3.838240623474121, - "90": 3.443056583404541, - "91": 4.547194480895996, - "92": 4.2838826179504395, - "93": 3.8870186805725098, - "94": 4.154099464416504, - "95": 3.80397629737854, - "96": 3.806995153427124, - "97": 3.9605796337127686, - "98": 4.141651153564453, - "99": 3.4924166202545166, - "100": 3.4166598320007324, - "101": 3.3425891399383545, - "102": 4.16901159286499, - "103": 3.720499277114868, - "104": 3.306309223175049, - "105": 3.304769277572632, - "106": 3.4789741039276123, - "107": 3.6573867797851562 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.0626181, - 0.197225, - 0.0528921, - 0.187533, - 0.0457922, - 0.042606, - 0.206201, - 0.0465145, - 0.186914, - 0.118372, - 0.224205, - 0.139634, - 0.125656, - 0.397431, - 0.0830717, - 0.484847, - 0.0416952, - 0.137581, - 0.13241, - 0.607888, - 0.0607744, - 0.175583, - 0.180978, - 0.164113, - 0.319146, - 0.0932246, - 0.0972482, - 0.773877, - 0.0844626, - 0.483873, - 0.125151, - 0.217287, - 0.531189, - 0.0665581, - 0.167372, - 0.0810982, - 0.477601, - 0.0473438, - 0.0822859, - 0.152572, - 0.164121, - 0.188243, - 0.194504, - 0.407781, - 0.0438907, - 0.225987, - 0.217223, - 0.351555, - 0.109456, - 0.668287, - 0.950426, - 0.612236, - 0.259541, - 0.990149 - ], - "train_epoch_time": 5.054613828659058, - "train_loss": 3.2655267616939683, - "train_score": 0.1486504662840746, - "val_loss": 3.273803014295383, - "val_score": 0.14700685266991298 - }, - { - "epoch": 2, - "grad_norm": 0.8656755089759827, - "learning_rate": 0.464, - "model_norm": 87.37055206298828, - "step_logs": { - "grad_norm": { - "108": 2.224764823913574, - "109": 2.4785454273223877, - "110": 1.7006232738494873, - "111": 2.4705331325531006, - "112": 2.2004644870758057, - "113": 1.3159208297729492, - "114": 1.4564001560211182, - "115": 1.6065329313278198, - "116": 1.8932063579559326, - "117": 1.3055025339126587, - "118": 0.9097657799720764, - "119": 1.0402640104293823, - "120": 1.3806099891662598, - "121": 2.1265838146209717, - "122": 1.08148193359375, - "123": 1.3173999786376953, - "124": 1.15640389919281, - "125": 0.6935372948646545, - "126": 0.6353476643562317, - "127": 0.8891350626945496, - "128": 2.159102201461792, - "129": 1.2500455379486084, - "130": 1.0583934783935547, - "131": 0.719110369682312, - "132": 1.1054965257644653, - "133": 1.209761381149292, - "134": 1.1139384508132935, - "135": 1.0394201278686523, - "136": 1.0191543102264404, - "137": 1.1278642416000366, - "138": 1.42232346534729, - "139": 1.1458137035369873, - "140": 0.4841899275779724, - "141": 0.37479013204574585, - "142": 0.43170198798179626, - "143": 0.5522638559341431, - "144": 0.9335963129997253, - "145": 1.1546589136123657, - "146": 1.5090208053588867, - "147": 1.112670660018921, - "148": 0.7939635515213013, - "149": 0.8073285818099976, - "150": 0.9377206563949585, - "151": 1.3339279890060425, - "152": 1.0855871438980103, - "153": 0.6903332471847534, - "154": 0.5326964259147644, - "155": 0.5710505247116089, - "156": 0.6821045875549316, - "157": 0.9781208038330078, - "158": 1.0031172037124634, - "159": 1.0019264221191406, - "160": 0.9743958711624146, - "161": 0.8656755089759827 - }, - "loss": { - "108": 3.2651774883270264, - "109": 3.5514795780181885, - "110": 3.45906925201416, - "111": 3.0252509117126465, - "112": 3.6742281913757324, - "113": 3.1361379623413086, - "114": 2.958592176437378, - "115": 3.0104382038116455, - "116": 2.9714958667755127, - "117": 3.229057788848877, - "118": 2.8667311668395996, - "119": 2.767115831375122, - "120": 2.8680105209350586, - "121": 2.9593679904937744, - "122": 3.189466953277588, - "123": 2.9810893535614014, - "124": 2.914921522140503, - "125": 2.728799343109131, - "126": 2.6786763668060303, - "127": 2.713738441467285, - "128": 2.8392176628112793, - "129": 3.252112627029419, - "130": 2.979386329650879, - "131": 2.6869688034057617, - "132": 2.6942973136901855, - "133": 2.8581767082214355, - "134": 2.738443613052368, - "135": 2.77777099609375, - "136": 2.6899850368499756, - "137": 2.7787766456604004, - "138": 2.7383408546447754, - "139": 2.908726215362549, - "140": 2.6322412490844727, - "141": 2.595581531524658, - "142": 2.5955400466918945, - "143": 2.6038382053375244, - "144": 2.6158382892608643, - "145": 2.7760329246520996, - "146": 2.745797634124756, - "147": 2.921092987060547, - "148": 2.6332855224609375, - "149": 2.639986038208008, - "150": 2.6682796478271484, - "151": 2.692105770111084, - "152": 2.8223981857299805, - "153": 2.6438698768615723, - "154": 2.5921244621276855, - "155": 2.597104787826538, - "156": 2.5918169021606445, - "157": 2.620973825454712, - "158": 2.7008495330810547, - "159": 2.6395912170410156, - "160": 2.702662467956543, - "161": 2.632098436355591 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.659688, - 0.578117, - 1.19603, - 0.495656, - 0.758818, - 1.81107, - 1.39484, - 1.16641, - 0.829047, - 1.89461, - 3.4636, - 2.55706, - 1.50466, - 0.654386, - 2.72696, - 1.71767, - 2.17976, - 5.67325, - 6.63586, - 3.43267, - 0.609049, - 2.0812, - 2.6597, - 5.19603, - 2.2046, - 1.95294, - 2.20689, - 2.57107, - 2.58982, - 2.18444, - 1.3536, - 2.21552, - 11.2278, - 18.4781, - 13.9271, - 8.5373, - 3.00118, - 2.08217, - 1.20581, - 2.35946, - 4.17731, - 4.05043, - 3.03448, - 1.51296, - 2.39491, - 5.54782, - 9.13474, - 7.96417, - 5.57061, - 2.73954, - 2.68409, - 2.62945, - 2.84656, - 3.5123 - ], - "train_epoch_time": 5.060021638870239, - "train_loss": 2.664649004436805, - "train_score": 0.23297278515492142, - "val_loss": 2.700881416022983, - "val_score": 0.2264369263577817 - }, - { - "epoch": 3, - "grad_norm": 0.8106290698051453, - "learning_rate": 0.464, - "model_norm": 87.40774536132812, - "step_logs": { - "grad_norm": { - "162": 0.8817408084869385, - "163": 1.0190868377685547, - "164": 1.0005944967269897, - "165": 0.8770706057548523, - "166": 0.9105992317199707, - "167": 1.1062794923782349, - "168": 1.010089635848999, - "169": 0.6871556639671326, - "170": 0.6717459559440613, - "171": 0.8958699703216553, - "172": 0.9576413035392761, - "173": 1.0030708312988281, - "174": 0.9145611524581909, - "175": 0.7132887244224548, - "176": 0.8106444478034973, - "177": 1.098758578300476, - "178": 1.0777448415756226, - "179": 0.8357540369033813, - "180": 0.7871129512786865, - "181": 0.8443882465362549, - "182": 0.9341241717338562, - "183": 1.0236402750015259, - "184": 0.8844297528266907, - "185": 0.6667125821113586, - "186": 0.7151057720184326, - "187": 0.9944509863853455, - "188": 0.9487119913101196, - "189": 0.7194960713386536, - "190": 0.7447572946548462, - "191": 0.9531127214431763, - "192": 1.022182583808899, - "193": 0.890994668006897, - "194": 0.8254366517066956, - "195": 0.8376187086105347, - "196": 0.8385922908782959, - "197": 0.786694347858429, - "198": 0.8559815287590027, - "199": 1.051024317741394, - "200": 1.0123729705810547, - "201": 0.9012372493743896, - "202": 0.9190186858177185, - "203": 1.050588607788086, - "204": 1.076776385307312, - "205": 0.9972522854804993, - "206": 0.898023247718811, - "207": 0.8465771079063416, - "208": 0.8197812438011169, - "209": 0.7887733578681946, - "210": 0.8892205357551575, - "211": 1.0172501802444458, - "212": 0.9609780311584473, - "213": 0.813028872013092, - "214": 0.7615267038345337, - "215": 0.8106290698051453 - }, - "loss": { - "162": 2.6643261909484863, - "163": 2.629546642303467, - "164": 2.7244205474853516, - "165": 2.6418588161468506, - "166": 2.6596574783325195, - "167": 2.610051155090332, - "168": 2.7604384422302246, - "169": 2.614164352416992, - "170": 2.6069884300231934, - "171": 2.591151237487793, - "172": 2.671731948852539, - "173": 2.6373350620269775, - "174": 2.697756767272949, - "175": 2.591090440750122, - "176": 2.5971221923828125, - "177": 2.642165422439575, - "178": 2.7219491004943848, - "179": 2.636932849884033, - "180": 2.6107211112976074, - "181": 2.6221394538879395, - "182": 2.638202667236328, - "183": 2.628596782684326, - "184": 2.6915245056152344, - "185": 2.5791103839874268, - "186": 2.585062026977539, - "187": 2.5909202098846436, - "188": 2.6705780029296875, - "189": 2.5907747745513916, - "190": 2.5837063789367676, - "191": 2.5773487091064453, - "192": 2.6718831062316895, - "193": 2.611485481262207, - "194": 2.6175007820129395, - "195": 2.5951855182647705, - "196": 2.620110511779785, - "197": 2.575089454650879, - "198": 2.6082868576049805, - "199": 2.6181271076202393, - "200": 2.6842989921569824, - "201": 2.5998175144195557, - "202": 2.6240413188934326, - "203": 2.6005783081054688, - "204": 2.672299385070801, - "205": 2.602001667022705, - "206": 2.6349525451660156, - "207": 2.5763492584228516, - "208": 2.617816925048828, - "209": 2.5412867069244385, - "210": 2.6208558082580566, - "211": 2.60310697555542, - "212": 2.64933180809021, - "213": 2.567422389984131, - "214": 2.581874370574951, - "215": 2.564702033996582 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 3.42693, - 2.53197, - 2.72118, - 3.43432, - 3.20753, - 2.13265, - 2.70557, - 5.53634, - 5.77735, - 3.22852, - 2.91331, - 2.62121, - 3.22535, - 5.09274, - 3.95213, - 2.18854, - 2.34341, - 3.77522, - 4.21392, - 3.67766, - 3.02342, - 2.50859, - 3.4409, - 5.8022, - 5.05511, - 2.61992, - 2.96713, - 5.00464, - 4.65815, - 2.83717, - 2.55718, - 3.28956, - 3.84166, - 3.69893, - 3.72579, - 4.16083, - 3.55981, - 2.37009, - 2.61909, - 3.20084, - 3.10686, - 2.35616, - 2.3048, - 2.61636, - 3.26736, - 3.59477, - 3.89532, - 4.0846, - 3.31455, - 2.51557, - 2.86886, - 3.88406, - 4.4521, - 3.90295 - ], - "train_epoch_time": 5.0556724071502686, - "train_loss": 2.600386273467558, - "train_score": 0.23735876973595477, - "val_loss": 2.641591264514507, - "val_score": 0.23300265490871894 - }, - { - "epoch": 4, - "grad_norm": 0.715764045715332, - "learning_rate": 0.464, - "model_norm": 87.44450378417969, - "step_logs": { - "grad_norm": { - "216": 0.8147035241127014, - "217": 0.7814739942550659, - "218": 0.8018308877944946, - "219": 0.8745168447494507, - "220": 0.848491907119751, - "221": 0.7085331082344055, - "222": 0.6883143186569214, - "223": 0.795415461063385, - "224": 0.8790597319602966, - "225": 0.9082624316215515, - "226": 0.822569727897644, - "227": 0.697720468044281, - "228": 0.7037535309791565, - "229": 0.8077332973480225, - "230": 0.8776323795318604, - "231": 0.9197269082069397, - "232": 0.914128839969635, - "233": 0.8946424126625061, - "234": 0.8728997707366943, - "235": 0.9009494781494141, - "236": 0.8744539022445679, - "237": 0.7535949349403381, - "238": 0.7415274381637573, - "239": 0.8407319784164429, - "240": 0.8422963619232178, - "241": 0.8557510375976562, - "242": 0.902815043926239, - "243": 0.8630161285400391, - "244": 0.7883111238479614, - "245": 0.6763781309127808, - "246": 0.6961337327957153, - "247": 0.7964216470718384, - "248": 0.7997011542320251, - "249": 0.7394702434539795, - "250": 0.7804990410804749, - "251": 0.8331275582313538, - "252": 0.8206280469894409, - "253": 0.7827075719833374, - "254": 0.7920497059822083, - "255": 0.8238981366157532, - "256": 0.7816749811172485, - "257": 0.7070029377937317, - "258": 0.6945733428001404, - "259": 0.7669711709022522, - "260": 0.8645933866500854, - "261": 0.9310718178749084, - "262": 0.9162704944610596, - "263": 0.9239433407783508, - "264": 0.9411354064941406, - "265": 0.9254506230354309, - "266": 0.8275494575500488, - "267": 0.7672973871231079, - "268": 0.7667270302772522, - "269": 0.715764045715332 - }, - "loss": { - "216": 2.6052346229553223, - "217": 2.555436372756958, - "218": 2.601337194442749, - "219": 2.5790903568267822, - "220": 2.6068601608276367, - "221": 2.5617146492004395, - "222": 2.5569586753845215, - "223": 2.555171489715576, - "224": 2.617776870727539, - "225": 2.58357310295105, - "226": 2.6194419860839844, - "227": 2.5379624366760254, - "228": 2.5633058547973633, - "229": 2.555050849914551, - "230": 2.6009440422058105, - "231": 2.5609793663024902, - "232": 2.626481056213379, - "233": 2.584388256072998, - "234": 2.57894229888916, - "235": 2.56315016746521, - "236": 2.6240882873535156, - "237": 2.549525737762451, - "238": 2.5621232986450195, - "239": 2.5661332607269287, - "240": 2.5973246097564697, - "241": 2.5558292865753174, - "242": 2.62863826751709, - "243": 2.5682787895202637, - "244": 2.5877485275268555, - "245": 2.541343927383423, - "246": 2.536112070083618, - "247": 2.5410687923431396, - "248": 2.568793535232544, - "249": 2.5299339294433594, - "250": 2.550713062286377, - "251": 2.5351595878601074, - "252": 2.567039966583252, - "253": 2.542923927307129, - "254": 2.560943603515625, - "255": 2.5632314682006836, - "256": 2.5640065670013428, - "257": 2.5342507362365723, - "258": 2.544525146484375, - "259": 2.5233750343322754, - "260": 2.568404197692871, - "261": 2.573936939239502, - "262": 2.599677324295044, - "263": 2.5530166625976562, - "264": 2.5886425971984863, - "265": 2.5661191940307617, - "266": 2.5861520767211914, - "267": 2.5155277252197266, - "268": 2.5421180725097656, - "269": 2.521008014678955 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 3.92507, - 4.18443, - 4.04605, - 3.37233, - 3.62095, - 5.10282, - 5.39697, - 4.03861, - 3.38763, - 3.13183, - 3.87136, - 5.21341, - 5.17558, - 3.91619, - 3.3768, - 3.02753, - 3.14311, - 3.22893, - 3.38464, - 3.15772, - 3.43166, - 4.48935, - 4.65957, - 3.63048, - 3.66097, - 3.49009, - 3.22503, - 3.44829, - 4.16415, - 5.555, - 5.23339, - 4.00618, - 4.01674, - 4.62666, - 4.18713, - 3.65243, - 3.81189, - 4.15082, - 4.08221, - 3.77608, - 4.1963, - 5.06999, - 5.27437, - 4.28967, - 3.43589, - 2.96915, - 3.09651, - 2.99063, - 2.92259, - 2.9962, - 3.7763, - 4.27269, - 4.32428, - 4.92079 - ], - "train_epoch_time": 5.055287599563599, - "train_loss": 2.541071497384924, - "train_score": 0.25027797703164895, - "val_loss": 2.5712752596792754, - "val_score": 0.24682925523490776 - }, - { - "epoch": 5, - "grad_norm": 0.8155317306518555, - "learning_rate": 0.464, - "model_norm": 87.49127197265625, - "step_logs": { - "grad_norm": { - "270": 0.709923267364502, - "271": 0.7353038787841797, - "272": 0.7703292369842529, - "273": 0.8287577629089355, - "274": 0.8209397792816162, - "275": 0.7596935629844666, - "276": 0.7621415853500366, - "277": 0.822248637676239, - "278": 0.8194732069969177, - "279": 0.794235348701477, - "280": 0.8863086700439453, - "281": 0.9842506647109985, - "282": 1.0085747241973877, - "283": 0.7977074384689331, - "284": 0.6794868111610413, - "285": 0.7212539911270142, - "286": 0.9283609390258789, - "287": 1.05335533618927, - "288": 1.0210649967193604, - "289": 0.8998480439186096, - "290": 0.8394947052001953, - "291": 0.8239033818244934, - "292": 0.8653450608253479, - "293": 0.91135573387146, - "294": 0.8895570039749146, - "295": 0.8021307587623596, - "296": 0.8022594451904297, - "297": 1.8957005739212036, - "298": 0.7364959716796875, - "299": 0.48802316188812256, - "300": 0.4164507985115051, - "301": 0.500688910484314, - "302": 0.6880447864532471, - "303": 0.986847996711731, - "304": 0.9654015302658081, - "305": 0.966780960559845, - "306": 0.9232786893844604, - "307": 0.7876191735267639, - "308": 0.7683576941490173, - "309": 0.8035704493522644, - "310": 0.8007235527038574, - "311": 0.8699837923049927, - "312": 0.8615723848342896, - "313": 0.7858172655105591, - "314": 0.7602621912956238, - "315": 0.7625511884689331, - "316": 0.7627614736557007, - "317": 0.8088719844818115, - "318": 0.8202798962593079, - "319": 0.7959516644477844, - "320": 0.7554711699485779, - "321": 0.7299551367759705, - "322": 0.7404099106788635, - "323": 0.8155317306518555 - }, - "loss": { - "270": 2.530697822570801, - "271": 2.520373821258545, - "272": 2.543203592300415, - "273": 2.5499415397644043, - "274": 2.5715713500976562, - "275": 2.5144195556640625, - "276": 2.5348362922668457, - "277": 2.524979591369629, - "278": 2.5430073738098145, - "279": 2.5175833702087402, - "280": 2.5496034622192383, - "281": 2.5477521419525146, - "282": 2.5816190242767334, - "283": 2.5505175590515137, - "284": 2.4904980659484863, - "285": 2.4714856147766113, - "286": 2.5006422996520996, - "287": 2.530031204223633, - "288": 2.5561769008636475, - "289": 2.517416477203369, - "290": 2.5242419242858887, - "291": 2.4763975143432617, - "292": 2.4675889015197754, - "293": 2.4743995666503906, - "294": 2.525834798812866, - "295": 2.482401132583618, - "296": 2.475626230239868, - "297": 2.535186767578125, - "298": 2.597148895263672, - "299": 2.498443126678467, - "300": 2.4709739685058594, - "301": 2.4736552238464355, - "302": 2.4779701232910156, - "303": 2.548572540283203, - "304": 2.567087173461914, - "305": 2.536877155303955, - "306": 2.5445680618286133, - "307": 2.5344479084014893, - "308": 2.533520221710205, - "309": 2.478400468826294, - "310": 2.520012855529785, - "311": 2.512167453765869, - "312": 2.542781352996826, - "313": 2.4930758476257324, - "314": 2.4958527088165283, - "315": 2.4869534969329834, - "316": 2.51597261428833, - "317": 2.495067834854126, - "318": 2.5288801193237305, - "319": 2.4858336448669434, - "320": 2.509807825088501, - "321": 2.469442367553711, - "322": 2.5032758712768555, - "323": 2.4647891521453857 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 5.02131, - 4.66156, - 4.28577, - 3.71257, - 3.81572, - 4.35673, - 4.36394, - 3.73466, - 3.78685, - 3.99103, - 3.24566, - 2.62994, - 2.53791, - 4.00812, - 5.39416, - 4.75096, - 2.90147, - 2.28022, - 2.45179, - 3.10897, - 3.58175, - 3.64811, - 3.29529, - 2.97916, - 3.19196, - 3.85817, - 3.84641, - 0.705457, - 4.78802, - 10.4903, - 14.2476, - 9.86741, - 5.23435, - 2.61696, - 2.75438, - 2.71421, - 2.98503, - 4.08555, - 4.29138, - 3.83816, - 3.93041, - 3.31915, - 3.42551, - 4.03731, - 4.31809, - 4.27691, - 4.32443, - 3.81349, - 3.75841, - 3.92373, - 4.39749, - 4.63454, - 4.5663, - 3.70594 - ], - "train_epoch_time": 5.05573034286499, - "train_loss": 2.5106289301917406, - "train_score": 0.25232021160840645, - "val_loss": 2.563950307607377, - "val_score": 0.24603544820600481 - }, - { - "epoch": 6, - "grad_norm": 0.6872848868370056, - "learning_rate": 0.464, - "model_norm": 87.54857635498047, - "step_logs": { - "grad_norm": { - "324": 0.7839581966400146, - "325": 0.7095580697059631, - "326": 0.862248420715332, - "327": 0.9032272696495056, - "328": 0.7961395978927612, - "329": 0.7738818526268005, - "330": 0.7535048723220825, - "331": 0.7769403457641602, - "332": 0.8828176259994507, - "333": 0.9689627885818481, - "334": 0.8665558695793152, - "335": 0.7570440173149109, - "336": 1.123897910118103, - "337": 1.06027090549469, - "338": 0.8520289063453674, - "339": 0.5979045033454895, - "340": 0.6060411334037781, - "341": 0.7240143418312073, - "342": 1.0907390117645264, - "343": 1.1220327615737915, - "344": 1.0307424068450928, - "345": 1.0251318216323853, - "346": 0.6972977519035339, - "347": 0.7023191452026367, - "348": 0.8285085558891296, - "349": 0.8545849323272705, - "350": 0.9272441267967224, - "351": 1.012359380722046, - "352": 0.9842120409011841, - "353": 0.8615912795066833, - "354": 0.9104143977165222, - "355": 0.9002649188041687, - "356": 0.9262411594390869, - "357": 1.001621961593628, - "358": 0.9617332816123962, - "359": 0.8877976536750793, - "360": 0.9657832980155945, - "361": 0.8201555609703064, - "362": 1.131272554397583, - "363": 0.6465250253677368, - "364": 0.6325067281723022, - "365": 0.8593429327011108, - "366": 0.898367702960968, - "367": 1.1018083095550537, - "368": 0.8325973749160767, - "369": 0.8152550458908081, - "370": 0.7561051845550537, - "371": 0.7377316355705261, - "372": 0.9109453558921814, - "373": 0.9643517732620239, - "374": 0.8522351384162903, - "375": 0.9295061826705933, - "376": 0.881537914276123, - "377": 0.6872848868370056 - }, - "loss": { - "324": 2.505286693572998, - "325": 2.456019163131714, - "326": 2.4898881912231445, - "327": 2.5054454803466797, - "328": 2.479649066925049, - "329": 2.4650940895080566, - "330": 2.4800806045532227, - "331": 2.4616641998291016, - "332": 2.467437267303467, - "333": 2.4682998657226562, - "334": 2.493536949157715, - "335": 2.4131741523742676, - "336": 2.4347915649414062, - "337": 2.5208096504211426, - "338": 2.513828754425049, - "339": 2.4165737628936768, - "340": 2.3917222023010254, - "341": 2.4062037467956543, - "342": 2.431318759918213, - "343": 2.5053200721740723, - "344": 2.531595230102539, - "345": 2.4476327896118164, - "346": 2.4766273498535156, - "347": 2.413266181945801, - "348": 2.41045880317688, - "349": 2.40478515625, - "350": 2.4176554679870605, - "351": 2.454700469970703, - "352": 2.4540939331054688, - "353": 2.399731159210205, - "354": 2.444430112838745, - "355": 2.416689872741699, - "356": 2.416731834411621, - "357": 2.4256677627563477, - "358": 2.442284107208252, - "359": 2.4102227687835693, - "360": 2.402315616607666, - "361": 2.425611734390259, - "362": 2.4030792713165283, - "363": 2.4333086013793945, - "364": 2.3604183197021484, - "365": 2.3863296508789062, - "366": 2.4027938842773438, - "367": 2.4116382598876953, - "368": 2.4453606605529785, - "369": 2.3839893341064453, - "370": 2.3941221237182617, - "371": 2.340608596801758, - "372": 2.380875825881958, - "373": 2.3923251628875732, - "374": 2.3657093048095703, - "375": 2.398134708404541, - "376": 2.4324960708618164, - "377": 2.365286350250244 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 4.07635, - 4.87816, - 3.349, - 3.07108, - 3.91212, - 4.11608, - 4.36811, - 4.07806, - 3.16595, - 2.62896, - 3.32065, - 4.21062, - 1.92756, - 2.24237, - 3.4628, - 6.75984, - 6.51188, - 4.59027, - 2.04362, - 1.99, - 2.38284, - 2.32909, - 5.09359, - 4.89256, - 3.51161, - 3.2928, - 2.81194, - 2.39513, - 2.53346, - 3.23266, - 2.94917, - 2.98181, - 2.81696, - 2.41782, - 2.6405, - 3.05794, - 2.57555, - 3.60603, - 1.87773, - 5.82139, - 5.90009, - 3.23145, - 2.9772, - 1.98655, - 3.52755, - 3.58688, - 4.18776, - 4.30063, - 2.86914, - 2.57246, - 3.25719, - 2.77568, - 3.13019, - 5.00738 - ], - "train_epoch_time": 5.054721355438232, - "train_loss": 2.3425577487972924, - "train_score": 0.31036137009795806, - "val_loss": 2.399173461473905, - "val_score": 0.29665972998988066 - }, - { - "epoch": 7, - "grad_norm": 0.8393017053604126, - "learning_rate": 0.464, - "model_norm": 87.60615539550781, - "step_logs": { - "grad_norm": { - "378": 0.7589331865310669, - "379": 0.8892024159431458, - "380": 0.858923614025116, - "381": 0.8288053274154663, - "382": 0.815409779548645, - "383": 0.8348818421363831, - "384": 0.922610342502594, - "385": 0.8301099538803101, - "386": 0.7437319755554199, - "387": 0.7968907356262207, - "388": 0.8912876844406128, - "389": 1.1475551128387451, - "390": 0.7989431619644165, - "391": 0.8346490859985352, - "392": 0.9870498776435852, - "393": 1.0443447828292847, - "394": 1.223189115524292, - "395": 0.9580093026161194, - "396": 0.9073383808135986, - "397": 1.0248693227767944, - "398": 0.8835127949714661, - "399": 0.8155155181884766, - "400": 0.8422682285308838, - "401": 1.074558973312378, - "402": 0.8556275963783264, - "403": 0.795551598072052, - "404": 0.8677669167518616, - "405": 0.9945087432861328, - "406": 0.9845489263534546, - "407": 0.9531922340393066, - "408": 0.7928701043128967, - "409": 0.7659369707107544, - "410": 0.8509597182273865, - "411": 0.867879331111908, - "412": 0.7661714553833008, - "413": 0.7206516265869141, - "414": 0.8043820858001709, - "415": 0.938575804233551, - "416": 0.875091016292572, - "417": 0.8536773324012756, - "418": 0.9054034352302551, - "419": 0.9733379483222961, - "420": 1.0094554424285889, - "421": 0.8578090071678162, - "422": 0.7676835060119629, - "423": 0.7866225838661194, - "424": 0.8058855533599854, - "425": 0.9008245468139648, - "426": 1.0388469696044922, - "427": 0.9630997776985168, - "428": 0.8510398864746094, - "429": 1.0077499151229858, - "430": 0.9395222663879395, - "431": 0.8393017053604126 - }, - "loss": { - "378": 2.353041172027588, - "379": 2.3699467182159424, - "380": 2.392122268676758, - "381": 2.3604695796966553, - "382": 2.3915934562683105, - "383": 2.339259147644043, - "384": 2.3710484504699707, - "385": 2.356905460357666, - "386": 2.3484816551208496, - "387": 2.345686435699463, - "388": 2.3691048622131348, - "389": 2.395533800125122, - "390": 2.4183292388916016, - "391": 2.3802669048309326, - "392": 2.394064426422119, - "393": 2.3875062465667725, - "394": 2.4428303241729736, - "395": 2.41601824760437, - "396": 2.409115791320801, - "397": 2.3533449172973633, - "398": 2.4201550483703613, - "399": 2.353846549987793, - "400": 2.3462610244750977, - "401": 2.3599376678466797, - "402": 2.4097695350646973, - "403": 2.3267579078674316, - "404": 2.314735174179077, - "405": 2.3632850646972656, - "406": 2.366142511367798, - "407": 2.3565785884857178, - "408": 2.3394815921783447, - "409": 2.307865619659424, - "410": 2.3455278873443604, - "411": 2.3363208770751953, - "412": 2.343714475631714, - "413": 2.285933256149292, - "414": 2.3379223346710205, - "415": 2.3197362422943115, - "416": 2.3655757904052734, - "417": 2.3053879737854004, - "418": 2.3367247581481934, - "419": 2.3189516067504883, - "420": 2.371898889541626, - "421": 2.322610378265381, - "422": 2.3212804794311523, - "423": 2.297940492630005, - "424": 2.3085403442382812, - "425": 2.3058664798736572, - "426": 2.348581314086914, - "427": 2.3665213584899902, - "428": 2.3129308223724365, - "429": 2.3490896224975586, - "430": 2.3440871238708496, - "431": 2.3069348335266113 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 4.08529, - 2.99735, - 3.24246, - 3.43632, - 3.59696, - 3.35605, - 2.7855, - 3.42035, - 4.24575, - 3.69379, - 2.98228, - 1.81909, - 3.78864, - 3.41679, - 2.4573, - 2.18906, - 1.6327, - 2.63245, - 2.9263, - 2.24052, - 3.1004, - 3.53927, - 3.30731, - 2.04381, - 3.29159, - 3.67633, - 3.07394, - 2.38946, - 2.44099, - 2.59371, - 3.72148, - 3.93391, - 3.23909, - 3.1018, - 3.99257, - 4.40162, - 3.61331, - 2.6333, - 3.08909, - 3.16342, - 2.85051, - 2.44773, - 2.32767, - 3.15642, - 3.93879, - 3.71369, - 3.5546, - 2.84154, - 2.17622, - 2.55134, - 3.19347, - 2.3131, - 2.65558, - 3.27491 - ], - "train_epoch_time": 5.060734033584595, - "train_loss": 2.322275898480518, - "train_score": 0.29503116021881487, - "val_loss": 2.3888586859478607, - "val_score": 0.2832053670174219 - }, - { - "epoch": 8, - "grad_norm": 0.8663375973701477, - "learning_rate": 0.464, - "model_norm": 87.67359924316406, - "step_logs": { - "grad_norm": { - "432": 0.9048224687576294, - "433": 0.8958725929260254, - "434": 0.9489478468894958, - "435": 0.9375250935554504, - "436": 1.1159147024154663, - "437": 1.3539495468139648, - "438": 0.9937126636505127, - "439": 0.7304781079292297, - "440": 1.2727736234664917, - "441": 0.839798092842102, - "442": 0.8061545491218567, - "443": 0.8553096055984497, - "444": 0.8286412358283997, - "445": 0.7482856512069702, - "446": 0.6412596106529236, - "447": 0.6080443859100342, - "448": 0.7059473991394043, - "449": 0.8625189661979675, - "450": 1.1521011590957642, - "451": 1.0409513711929321, - "452": 0.8599990010261536, - "453": 0.7692727446556091, - "454": 0.8178277611732483, - "455": 0.9262793660163879, - "456": 0.9845213294029236, - "457": 1.1887562274932861, - "458": 0.849265456199646, - "459": 0.7488734722137451, - "460": 0.7918029427528381, - "461": 0.8631037473678589, - "462": 0.9187257289886475, - "463": 0.9063173532485962, - "464": 0.8658668994903564, - "465": 0.7803764343261719, - "466": 0.7280052304267883, - "467": 0.7875689268112183, - "468": 0.8168123364448547, - "469": 0.8298878073692322, - "470": 0.8827573657035828, - "471": 0.9784128665924072, - "472": 1.0885601043701172, - "473": 2.0626463890075684, - "474": 1.383554220199585, - "475": 1.3349108695983887, - "476": 1.8164100646972656, - "477": 1.3956739902496338, - "478": 1.3808982372283936, - "479": 1.0982495546340942, - "480": 1.036338448524475, - "481": 0.9854855537414551, - "482": 1.0208631753921509, - "483": 0.891883373260498, - "484": 0.8291735649108887, - "485": 0.8663375973701477 - }, - "loss": { - "432": 2.3207361698150635, - "433": 2.32639479637146, - "434": 2.3160781860351562, - "435": 2.327807903289795, - "436": 2.3533363342285156, - "437": 2.3823373317718506, - "438": 2.404601573944092, - "439": 2.274229049682617, - "440": 2.32706356048584, - "441": 2.372982978820801, - "442": 2.3207733631134033, - "443": 2.2941932678222656, - "444": 2.334689140319824, - "445": 2.2608489990234375, - "446": 2.2635459899902344, - "447": 2.2262797355651855, - "448": 2.26047420501709, - "449": 2.294696807861328, - "450": 2.344208240509033, - "451": 2.3642759323120117, - "452": 2.319617748260498, - "453": 2.2785825729370117, - "454": 2.2757506370544434, - "455": 2.292905807495117, - "456": 2.2992310523986816, - "457": 2.312321662902832, - "458": 2.332876205444336, - "459": 2.274783134460449, - "460": 2.267345428466797, - "461": 2.290771961212158, - "462": 2.279402256011963, - "463": 2.311415910720825, - "464": 2.2693541049957275, - "465": 2.283268690109253, - "466": 2.2357559204101562, - "467": 2.2635698318481445, - "468": 2.2616844177246094, - "469": 2.2747926712036133, - "470": 2.255516529083252, - "471": 2.315216541290283, - "472": 2.3080945014953613, - "473": 2.3555655479431152, - "474": 2.46317195892334, - "475": 2.39725399017334, - "476": 2.5200319290161133, - "477": 2.5057151317596436, - "478": 2.527132034301758, - "479": 2.3812496662139893, - "480": 2.363093852996826, - "481": 2.3452019691467285, - "482": 2.360673427581787, - "483": 2.3743820190429688, - "484": 2.3033931255340576, - "485": 2.329716920852661 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 2.83465, - 2.89862, - 2.57199, - 2.64839, - 1.88983, - 1.29957, - 2.43513, - 4.26206, - 1.4365, - 3.36469, - 3.57105, - 3.13605, - 3.40013, - 4.03772, - 5.50455, - 6.02156, - 4.53581, - 3.08452, - 1.7661, - 2.18191, - 3.13632, - 3.85038, - 3.40252, - 2.6724, - 2.3721, - 1.6363, - 3.23448, - 4.05624, - 3.61646, - 3.07508, - 2.70053, - 2.81396, - 3.02691, - 3.74929, - 4.21847, - 3.64936, - 3.3899, - 3.30296, - 2.89443, - 2.41851, - 1.94782, - 0.553663, - 1.28677, - 1.34527, - 0.763798, - 1.28636, - 1.32527, - 1.97425, - 2.20028, - 2.41479, - 2.26517, - 2.98493, - 3.35025, - 3.10405 - ], - "train_epoch_time": 5.054119110107422, - "train_loss": 2.301459996566882, - "train_score": 0.3255570750667512, - "val_loss": 2.3690481073683904, - "val_score": 0.30634238727467755 - }, - { - "epoch": 9, - "grad_norm": 0.8091462850570679, - "learning_rate": 0.464, - "model_norm": 87.738525390625, - "step_logs": { - "grad_norm": { - "486": 0.9061972498893738, - "487": 0.8700083494186401, - "488": 0.8097296357154846, - "489": 0.8147047162055969, - "490": 0.9064461588859558, - "491": 1.026487112045288, - "492": 0.9785983562469482, - "493": 0.8480057120323181, - "494": 0.715952455997467, - "495": 0.7886447906494141, - "496": 0.9205501079559326, - "497": 0.9369004368782043, - "498": 0.7981026768684387, - "499": 0.6879714131355286, - "500": 0.6765692830085754, - "501": 0.7633191347122192, - "502": 0.8823603987693787, - "503": 0.949428379535675, - "504": 0.8830804824829102, - "505": 0.8267177939414978, - "506": 0.8351994752883911, - "507": 0.757067859172821, - "508": 0.7504280209541321, - "509": 0.7553586363792419, - "510": 0.8671025037765503, - "511": 0.8524565100669861, - "512": 0.8685594797134399, - "513": 1.0522680282592773, - "514": 0.9484105706214905, - "515": 0.7447003126144409, - "516": 0.6856025457382202, - "517": 0.7139851450920105, - "518": 0.746656060218811, - "519": 0.7791493535041809, - "520": 0.7601523399353027, - "521": 0.7545337080955505, - "522": 0.8574492931365967, - "523": 0.9257049560546875, - "524": 0.8977177143096924, - "525": 0.7736575603485107, - "526": 0.7302653789520264, - "527": 0.7500297427177429, - "528": 0.8104608654975891, - "529": 0.8192557096481323, - "530": 0.7449462413787842, - "531": 0.7800441980361938, - "532": 0.869362473487854, - "533": 0.9132375717163086, - "534": 0.849949300289154, - "535": 0.6947661638259888, - "536": 0.6462142467498779, - "537": 0.7439555525779724, - "538": 0.7711809277534485, - "539": 0.8091462850570679 - }, - "loss": { - "486": 2.309541702270508, - "487": 2.3110992908477783, - "488": 2.2999320030212402, - "489": 2.2903366088867188, - "490": 2.2813172340393066, - "491": 2.297905683517456, - "492": 2.3322479724884033, - "493": 2.307438373565674, - "494": 2.252199411392212, - "495": 2.2337327003479004, - "496": 2.2935547828674316, - "497": 2.2870070934295654, - "498": 2.266975164413452, - "499": 2.239179849624634, - "500": 2.239522695541382, - "501": 2.229620933532715, - "502": 2.241481304168701, - "503": 2.2858049869537354, - "504": 2.2830092906951904, - "505": 2.248178482055664, - "506": 2.262666940689087, - "507": 2.232067108154297, - "508": 2.225383996963501, - "509": 2.2340638637542725, - "510": 2.219205141067505, - "511": 2.273848295211792, - "512": 2.229863166809082, - "513": 2.2780256271362305, - "514": 2.3253731727600098, - "515": 2.2247302532196045, - "516": 2.1997780799865723, - "517": 2.1794025897979736, - "518": 2.2175052165985107, - "519": 2.2099719047546387, - "520": 2.229396343231201, - "521": 2.1843299865722656, - "522": 2.2053704261779785, - "523": 2.223071575164795, - "524": 2.2601327896118164, - "525": 2.2394065856933594, - "526": 2.1992015838623047, - "527": 2.188534736633301, - "528": 2.2334952354431152, - "529": 2.221254348754883, - "530": 2.22395396232605, - "531": 2.1958765983581543, - "532": 2.200474262237549, - "533": 2.217067241668701, - "534": 2.2371339797973633, - "535": 2.196822166442871, - "536": 2.1863207817077637, - "537": 2.212778091430664, - "538": 2.2034809589385986, - "539": 2.202427625656128 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 2.81242, - 3.05332, - 3.5078, - 3.45063, - 2.77653, - 2.18085, - 2.43537, - 3.20873, - 4.39378, - 3.59144, - 2.70654, - 2.60544, - 3.55901, - 4.73095, - 4.8925, - 3.82665, - 2.87901, - 2.5358, - 2.92757, - 3.2894, - 3.2437, - 3.89437, - 3.95173, - 3.91552, - 2.95159, - 3.12908, - 2.95583, - 2.05734, - 2.58523, - 4.01157, - 4.67987, - 4.27523, - 3.97762, - 3.64037, - 3.85821, - 3.83673, - 2.99961, - 2.59423, - 2.80449, - 3.74141, - 4.12386, - 3.89042, - 3.40033, - 3.30948, - 4.00752, - 3.60886, - 2.91148, - 2.65834, - 3.09675, - 4.55111, - 5.23553, - 3.99801, - 3.70507, - 3.36393 - ], - "train_epoch_time": 5.055047035217285, - "train_loss": 2.2124781825450093, - "train_score": 0.3428454538414434, - "val_loss": 2.2925207798297587, - "val_score": 0.3205412238254613 - }, - { - "epoch": 10, - "grad_norm": 0.839262068271637, - "learning_rate": 0.464, - "model_norm": 87.80653381347656, - "step_logs": { - "grad_norm": { - "540": 0.8846848011016846, - "541": 0.851793646812439, - "542": 0.8648220300674438, - "543": 0.875769317150116, - "544": 0.8684296607971191, - "545": 0.8440709710121155, - "546": 0.8757414817810059, - "547": 0.8696944713592529, - "548": 0.8052344918251038, - "549": 0.7640607357025146, - "550": 0.8160861730575562, - "551": 0.8356430530548096, - "552": 0.8475441336631775, - "553": 0.9411374926567078, - "554": 0.9904058575630188, - "555": 0.7932664155960083, - "556": 0.6735861301422119, - "557": 0.6476566195487976, - "558": 0.7187003493309021, - "559": 0.7167638540267944, - "560": 0.6879050135612488, - "561": 0.750463604927063, - "562": 0.8491277098655701, - "563": 0.9181175827980042, - "564": 0.9281700253486633, - "565": 0.7286429405212402, - "566": 0.7585279941558838, - "567": 0.7738417983055115, - "568": 0.7366043329238892, - "569": 0.7681720852851868, - "570": 0.810899555683136, - "571": 0.8764036893844604, - "572": 1.11243736743927, - "573": 1.004509449005127, - "574": 0.8296661972999573, - "575": 0.74354088306427, - "576": 0.6978038549423218, - "577": 0.7900229692459106, - "578": 0.930242121219635, - "579": 0.9561259150505066, - "580": 0.8252516984939575, - "581": 0.7146198153495789, - "582": 0.7405450344085693, - "583": 0.8092361688613892, - "584": 0.8283589482307434, - "585": 0.7919597625732422, - "586": 0.8006977438926697, - "587": 0.7785877585411072, - "588": 0.6945397853851318, - "589": 0.8264068365097046, - "590": 0.8366949558258057, - "591": 0.8228870630264282, - "592": 0.8261078596115112, - "593": 0.839262068271637 - }, - "loss": { - "540": 2.2191858291625977, - "541": 2.2137722969055176, - "542": 2.2008423805236816, - "543": 2.2153892517089844, - "544": 2.18567156791687, - "545": 2.222735643386841, - "546": 2.229074001312256, - "547": 2.2262442111968994, - "548": 2.1911351680755615, - "549": 2.196539878845215, - "550": 2.183676242828369, - "551": 2.227532386779785, - "552": 2.167851448059082, - "553": 2.2243947982788086, - "554": 2.2245230674743652, - "555": 2.199087619781494, - "556": 2.19425106048584, - "557": 2.153940200805664, - "558": 2.1462035179138184, - "559": 2.154965400695801, - "560": 2.16123628616333, - "561": 2.163560390472412, - "562": 2.1878600120544434, - "563": 2.2012462615966797, - "564": 2.1952459812164307, - "565": 2.189441442489624, - "566": 2.1669788360595703, - "567": 2.1989150047302246, - "568": 2.179996967315674, - "569": 2.1789050102233887, - "570": 2.1882901191711426, - "571": 2.1846516132354736, - "572": 2.2329823970794678, - "573": 2.24784517288208, - "574": 2.1880455017089844, - "575": 2.1801204681396484, - "576": 2.184971809387207, - "577": 2.1718716621398926, - "578": 2.1804022789001465, - "579": 2.230469226837158, - "580": 2.183898448944092, - "581": 2.194782257080078, - "582": 2.148127317428589, - "583": 2.1465179920196533, - "584": 2.1728463172912598, - "585": 2.185518980026245, - "586": 2.1895668506622314, - "587": 2.1469428539276123, - "588": 2.1203830242156982, - "589": 2.1771926879882812, - "590": 2.181893825531006, - "591": 2.1747307777404785, - "592": 2.1478943824768066, - "593": 2.171764850616455 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 2.83541, - 3.05115, - 2.94263, - 2.88849, - 2.89811, - 3.11982, - 2.90652, - 2.94333, - 3.37928, - 3.76256, - 3.27881, - 3.18994, - 3.0179, - 2.51134, - 2.26783, - 3.49466, - 4.83615, - 5.13504, - 4.15504, - 4.19458, - 4.56715, - 3.84158, - 3.0344, - 2.61139, - 2.54817, - 4.12386, - 3.76627, - 3.67201, - 4.01779, - 3.69251, - 3.3279, - 2.84429, - 1.80441, - 2.22771, - 3.1787, - 3.9434, - 4.48724, - 3.4798, - 2.51968, - 2.43987, - 3.20671, - 4.29775, - 3.91703, - 3.27781, - 3.16659, - 3.48456, - 3.41524, - 3.54165, - 4.39562, - 3.18793, - 3.11673, - 3.21163, - 3.14731, - 3.08331 - ], - "train_epoch_time": 5.054749250411987, - "train_loss": 2.1566254325029326, - "train_score": 0.3630301739256214, - "val_loss": 2.253623548796749, - "val_score": 0.3428081953566603 - }, - { - "epoch": 11, - "grad_norm": 0.7908388376235962, - "learning_rate": 0.464, - "model_norm": 87.87235260009766, - "step_logs": { - "grad_norm": { - "594": 0.8385294079780579, - "595": 0.9107412099838257, - "596": 0.9985735416412354, - "597": 1.0071958303451538, - "598": 0.9127932190895081, - "599": 0.8815768361091614, - "600": 0.8051580190658569, - "601": 0.7952074408531189, - "602": 0.7971633076667786, - "603": 0.8336962461471558, - "604": 0.8893314599990845, - "605": 0.9319782257080078, - "606": 0.9262791872024536, - "607": 0.9126294851303101, - "608": 0.7592926621437073, - "609": 0.6793693900108337, - "610": 0.7502461075782776, - "611": 0.8824211359024048, - "612": 0.8688129186630249, - "613": 0.7475083470344543, - "614": 0.672337532043457, - "615": 0.7291651964187622, - "616": 0.7945804595947266, - "617": 0.8353235125541687, - "618": 0.8443430066108704, - "619": 0.8979946970939636, - "620": 0.8763735890388489, - "621": 0.7830246686935425, - "622": 0.7525209188461304, - "623": 0.823521614074707, - "624": 0.9534501433372498, - "625": 0.885374128818512, - "626": 0.7447731494903564, - "627": 0.7996683120727539, - "628": 0.8268854022026062, - "629": 0.7702431082725525, - "630": 0.7293129563331604, - "631": 0.8059701323509216, - "632": 0.8450236320495605, - "633": 0.8778682351112366, - "634": 0.9468300342559814, - "635": 0.9203754663467407, - "636": 0.8117689490318298, - "637": 0.7357116341590881, - "638": 0.7154402732849121, - "639": 0.7058746218681335, - "640": 0.7272049188613892, - "641": 0.8203611969947815, - "642": 0.8053290843963623, - "643": 0.7895781397819519, - "644": 0.8198033571243286, - "645": 0.8081340789794922, - "646": 0.7271595597267151, - "647": 0.7908388376235962 - }, - "loss": { - "594": 2.18215274810791, - "595": 2.179840087890625, - "596": 2.1663570404052734, - "597": 2.2396609783172607, - "598": 2.1682393550872803, - "599": 2.1708712577819824, - "600": 2.176543712615967, - "601": 2.14573335647583, - "602": 2.1722970008850098, - "603": 2.1507678031921387, - "604": 2.1619625091552734, - "605": 2.186767339706421, - "606": 2.202868938446045, - "607": 2.196329355239868, - "608": 2.1574296951293945, - "609": 2.1475911140441895, - "610": 2.1136789321899414, - "611": 2.169053316116333, - "612": 2.1615538597106934, - "613": 2.1541175842285156, - "614": 2.1183393001556396, - "615": 2.0860562324523926, - "616": 2.1464667320251465, - "617": 2.1274566650390625, - "618": 2.136549711227417, - "619": 2.172416925430298, - "620": 2.166909694671631, - "621": 2.1530230045318604, - "622": 2.14167857170105, - "623": 2.1571555137634277, - "624": 2.156428337097168, - "625": 2.1737239360809326, - "626": 2.1162500381469727, - "627": 2.1566338539123535, - "628": 2.1405065059661865, - "629": 2.128350257873535, - "630": 2.086911201477051, - "631": 2.1206464767456055, - "632": 2.1384353637695312, - "633": 2.153104066848755, - "634": 2.1471080780029297, - "635": 2.156794548034668, - "636": 2.120319366455078, - "637": 2.1094422340393066, - "638": 2.1094212532043457, - "639": 2.1174988746643066, - "640": 2.1273560523986816, - "641": 2.1183457374572754, - "642": 2.151970386505127, - "643": 2.1000776290893555, - "644": 2.1109485626220703, - "645": 2.1427557468414307, - "646": 2.1026482582092285, - "647": 2.1007349491119385 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 3.10348, - 2.62806, - 2.17255, - 2.20777, - 2.60233, - 2.79328, - 3.35742, - 3.39324, - 3.41841, - 3.09441, - 2.73351, - 2.51762, - 2.56747, - 2.63699, - 3.74212, - 4.65307, - 3.75519, - 2.7856, - 2.86361, - 3.85511, - 4.6862, - 3.9235, - 3.39976, - 3.04896, - 2.99692, - 2.69399, - 2.82138, - 3.51154, - 3.78196, - 3.18076, - 2.37213, - 2.77301, - 3.81521, - 3.37254, - 3.13059, - 3.58746, - 3.92352, - 3.2646, - 2.99474, - 2.79387, - 2.39502, - 2.54612, - 3.21763, - 3.8972, - 4.12113, - 4.2498, - 4.02278, - 3.14765, - 3.3181, - 3.36857, - 3.14093, - 3.281, - 3.97656, - 3.35889 - ], - "train_epoch_time": 5.055201292037964, - "train_loss": 2.1195851120750393, - "train_score": 0.37360002684114313, - "val_loss": 2.222439574593107, - "val_score": 0.35174637588102425 - }, - { - "epoch": 12, - "grad_norm": 0.5173906087875366, - "learning_rate": 0.464, - "model_norm": 87.92743682861328, - "step_logs": { - "grad_norm": { - "648": 0.8069720268249512, - "649": 0.8512582182884216, - "650": 0.8990569114685059, - "651": 0.8587251305580139, - "652": 0.7868680357933044, - "653": 0.7565704584121704, - "654": 0.8317120671272278, - "655": 0.9230534434318542, - "656": 0.834385871887207, - "657": 0.7295063734054565, - "658": 0.721542477607727, - "659": 0.7463744878768921, - "660": 0.836582601070404, - "661": 0.8649083971977234, - "662": 0.7884148955345154, - "663": 0.698732316493988, - "664": 0.6344377994537354, - "665": 0.6611167192459106, - "666": 0.6664699912071228, - "667": 0.636164665222168, - "668": 0.589198887348175, - "669": 0.5984880924224854, - "670": 0.6751964688301086, - "671": 0.726650595664978, - "672": 0.7066138982772827, - "673": 0.6512463092803955, - "674": 0.6764916181564331, - "675": 0.7289139032363892, - "676": 0.7452622056007385, - "677": 0.720969021320343, - "678": 0.6540064811706543, - "679": 0.6373347640037537, - "680": 0.5958762764930725, - "681": 0.5619276762008667, - "682": 0.603219211101532, - "683": 0.6242123246192932, - "684": 0.6478551030158997, - "685": 0.6306843161582947, - "686": 0.5928675532341003, - "687": 0.5836822390556335, - "688": 0.6253156065940857, - "689": 0.6734954714775085, - "690": 0.7497913837432861, - "691": 0.7331075072288513, - "692": 0.7071674466133118, - "693": 0.6890398859977722, - "694": 0.6262747645378113, - "695": 0.6144706606864929, - "696": 0.6431475877761841, - "697": 0.6287190318107605, - "698": 0.5598568320274353, - "699": 0.5094016790390015, - "700": 0.5272648334503174, - "701": 0.5173906087875366 - }, - "loss": { - "648": 2.1383960247039795, - "649": 2.150754451751709, - "650": 2.1586318016052246, - "651": 2.155897378921509, - "652": 2.100804328918457, - "653": 2.0960984230041504, - "654": 2.1015677452087402, - "655": 2.1342787742614746, - "656": 2.138262987136841, - "657": 2.093071937561035, - "658": 2.1227643489837646, - "659": 2.0809314250946045, - "660": 2.0938491821289062, - "661": 2.1232917308807373, - "662": 2.106125831604004, - "663": 2.070392608642578, - "664": 2.0769429206848145, - "665": 2.086477041244507, - "666": 2.0766024589538574, - "667": 2.093046188354492, - "668": 2.058943748474121, - "669": 2.053786516189575, - "670": 2.085644245147705, - "671": 2.079237699508667, - "672": 2.0442051887512207, - "673": 2.042421579360962, - "674": 2.060612678527832, - "675": 2.0651750564575195, - "676": 2.0722737312316895, - "677": 2.077267646789551, - "678": 2.0851898193359375, - "679": 2.023092269897461, - "680": 2.027134895324707, - "681": 2.049607276916504, - "682": 2.08774995803833, - "683": 2.0501937866210938, - "684": 2.043001651763916, - "685": 2.05068302154541, - "686": 2.0486350059509277, - "687": 2.0478270053863525, - "688": 2.05025577545166, - "689": 2.0634207725524902, - "690": 2.020798683166504, - "691": 2.042860984802246, - "692": 2.0566587448120117, - "693": 2.061901330947876, - "694": 2.0499391555786133, - "695": 2.0715231895446777, - "696": 2.0470504760742188, - "697": 2.0321576595306396, - "698": 2.041411876678467, - "699": 2.0115394592285156, - "700": 2.0329654216766357, - "701": 2.036001205444336 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 3.28376, - 2.96803, - 2.67057, - 2.92361, - 3.39298, - 3.66195, - 3.03807, - 2.50494, - 3.07134, - 3.93302, - 4.07735, - 3.73546, - 2.99177, - 2.83837, - 3.38824, - 4.24064, - 5.15996, - 4.77372, - 4.67511, - 5.17178, - 5.9309, - 5.73382, - 4.57488, - 3.93779, - 4.09412, - 4.81565, - 4.50268, - 3.88691, - 3.73103, - 3.99631, - 4.87508, - 4.98059, - 5.70914, - 6.49097, - 5.73757, - 5.26175, - 4.86758, - 5.15554, - 5.8284, - 6.01091, - 5.24336, - 4.54903, - 3.59453, - 3.80105, - 4.11261, - 4.34289, - 5.2265, - 5.4864, - 4.94888, - 5.14096, - 6.51293, - 7.75189, - 7.31261, - 7.60573 - ], - "train_epoch_time": 5.059199810028076, - "train_loss": 2.0195599937712614, - "train_score": 0.4029725609328518, - "val_loss": 2.13833751689416, - "val_score": 0.37244815588955493 - }, - { - "epoch": 13, - "grad_norm": 0.36634698510169983, - "learning_rate": 0.3093333333333334, - "model_norm": 87.95773315429688, - "step_logs": { - "grad_norm": { - "702": 0.5229237675666809, - "703": 0.5588756799697876, - "704": 0.5312861800193787, - "705": 0.5209408402442932, - "706": 0.5288822650909424, - "707": 0.50210040807724, - "708": 0.5301359295845032, - "709": 0.5049073100090027, - "710": 0.5181417465209961, - "711": 0.5633845925331116, - "712": 0.5114988088607788, - "713": 0.4986511468887329, - "714": 0.486305832862854, - "715": 0.4631892144680023, - "716": 0.46501418948173523, - "717": 0.46146804094314575, - "718": 0.47717010974884033, - "719": 0.4686819016933441, - "720": 0.4504156708717346, - "721": 0.4145474135875702, - "722": 0.4249396324157715, - "723": 0.4134324789047241, - "724": 0.44097408652305603, - "725": 0.45682865381240845, - "726": 0.47396573424339294, - "727": 0.5203550457954407, - "728": 0.48601025342941284, - "729": 0.4258767068386078, - "730": 0.4721459746360779, - "731": 0.4485870897769928, - "732": 0.45657941699028015, - "733": 0.4614412784576416, - "734": 0.4318462014198303, - "735": 0.42194807529449463, - "736": 0.40368345379829407, - "737": 0.3855956494808197, - "738": 0.42440441250801086, - "739": 0.42616212368011475, - "740": 0.4026164412498474, - "741": 0.4223616123199463, - "742": 0.39871248602867126, - "743": 0.3541635274887085, - "744": 0.39917099475860596, - "745": 0.41964536905288696, - "746": 0.4138050675392151, - "747": 0.4086674451828003, - "748": 0.41194820404052734, - "749": 0.4213542640209198, - "750": 0.3916550576686859, - "751": 0.39856600761413574, - "752": 0.42884230613708496, - "753": 0.42071759700775146, - "754": 0.3940809965133667, - "755": 0.36634698510169983 - }, - "loss": { - "702": 2.0297257900238037, - "703": 2.030059337615967, - "704": 2.0213115215301514, - "705": 2.0130062103271484, - "706": 2.0177547931671143, - "707": 1.9900016784667969, - "708": 2.010685920715332, - "709": 1.9931070804595947, - "710": 2.01471209526062, - "711": 2.0091209411621094, - "712": 1.9739407300949097, - "713": 2.0124783515930176, - "714": 2.0259242057800293, - "715": 2.004572868347168, - "716": 2.015043258666992, - "717": 2.003054141998291, - "718": 2.010009765625, - "719": 2.0349745750427246, - "720": 2.0104012489318848, - "721": 1.9826961755752563, - "722": 2.0095744132995605, - "723": 2.0087170600891113, - "724": 2.0249760150909424, - "725": 1.9865930080413818, - "726": 1.9993476867675781, - "727": 2.002136707305908, - "728": 2.0181610584259033, - "729": 1.9923796653747559, - "730": 2.0079102516174316, - "731": 2.0172810554504395, - "732": 1.9989569187164307, - "733": 1.9876576662063599, - "734": 1.997286081314087, - "735": 2.011650800704956, - "736": 1.9871288537979126, - "737": 1.9906885623931885, - "738": 1.983586311340332, - "739": 2.0142931938171387, - "740": 1.9857032299041748, - "741": 1.9814748764038086, - "742": 2.005915641784668, - "743": 1.9845361709594727, - "744": 2.0007758140563965, - "745": 1.971826195716858, - "746": 1.9957859516143799, - "747": 1.9714488983154297, - "748": 1.9844257831573486, - "749": 1.9937916994094849, - "750": 1.99928879737854, - "751": 1.9672081470489502, - "752": 2.0039079189300537, - "753": 1.97898268699646, - "754": 1.9656238555908203, - "755": 1.9715864658355713 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 7.42268, - 6.49948, - 7.16104, - 7.41768, - 7.21357, - 7.89355, - 7.15434, - 7.81821, - 7.5044, - 6.32989, - 7.54475, - 8.09352, - 8.56652, - 9.3434, - 9.31863, - 9.4061, - 8.82778, - 9.26409, - 9.90959, - 11.5374, - 11.1288, - 11.7519, - 10.4134, - 9.51924, - 8.90009, - 7.39425, - 8.54407, - 10.9851, - 9.00724, - 10.0247, - 9.58894, - 9.33488, - 10.7098, - 11.2989, - 12.1939, - 13.3887, - 11.0126, - 11.0911, - 12.2499, - 11.1076, - 12.6181, - 15.8216, - 12.5568, - 11.1971, - 11.6553, - 11.8044, - 11.6936, - 11.2301, - 13.0337, - 12.3837, - 10.8964, - 11.1805, - 12.657, - 14.6903 - ], - "train_epoch_time": 5.056206703186035, - "train_loss": 1.9774697557925494, - "train_score": 0.41260199953014914, - "val_loss": 2.1022859659862845, - "val_score": 0.3795924221607081 - }, - { - "epoch": 14, - "grad_norm": 0.33583253622055054, - "learning_rate": 0.1546666666666667, - "model_norm": 87.96804809570312, - "step_logs": { - "grad_norm": { - "756": 0.3802046775817871, - "757": 0.3616058826446533, - "758": 0.3936336636543274, - "759": 0.3817594349384308, - "760": 0.37455689907073975, - "761": 0.3480111360549927, - "762": 0.3552054762840271, - "763": 0.39839914441108704, - "764": 0.4005875587463379, - "765": 0.35761916637420654, - "766": 0.3719208240509033, - "767": 0.37680840492248535, - "768": 0.3443419635295868, - "769": 0.3533635139465332, - "770": 0.36023086309432983, - "771": 0.38463178277015686, - "772": 0.3412850499153137, - "773": 0.3631596267223358, - "774": 0.3623299300670624, - "775": 0.3818444013595581, - "776": 0.3888358473777771, - "777": 0.3385566473007202, - "778": 0.3568721115589142, - "779": 0.35211560130119324, - "780": 0.36498215794563293, - "781": 0.35718590021133423, - "782": 0.3571947515010834, - "783": 0.36193642020225525, - "784": 0.3788422644138336, - "785": 0.3368608355522156, - "786": 0.36773037910461426, - "787": 0.31993165612220764, - "788": 0.3490808308124542, - "789": 0.3746925890445709, - "790": 0.36006924510002136, - "791": 0.38169974088668823, - "792": 0.3476676046848297, - "793": 0.3273763954639435, - "794": 0.36559224128723145, - "795": 0.35534340143203735, - "796": 0.3426799774169922, - "797": 0.36438578367233276, - "798": 0.3359687030315399, - "799": 0.3274863064289093, - "800": 0.36267516016960144, - "801": 0.35557401180267334, - "802": 0.3628932535648346, - "803": 0.32415181398391724, - "804": 0.34925830364227295, - "805": 0.3335937559604645, - "806": 0.31899911165237427, - "807": 0.35474878549575806, - "808": 0.3257681429386139, - "809": 0.33583253622055054 - }, - "loss": { - "756": 1.9527702331542969, - "757": 1.967621088027954, - "758": 1.9965500831604004, - "759": 1.9632370471954346, - "760": 1.946903944015503, - "761": 1.9913570880889893, - "762": 1.992281436920166, - "763": 1.9814878702163696, - "764": 1.989046573638916, - "765": 1.962976336479187, - "766": 1.9798290729522705, - "767": 1.9996309280395508, - "768": 1.9602906703948975, - "769": 1.9398014545440674, - "770": 1.99808931350708, - "771": 1.967024803161621, - "772": 1.9892666339874268, - "773": 1.9669721126556396, - "774": 1.9795551300048828, - "775": 1.9853723049163818, - "776": 1.9783544540405273, - "777": 1.9808259010314941, - "778": 1.94935941696167, - "779": 1.975574016571045, - "780": 1.9466999769210815, - "781": 1.955148696899414, - "782": 1.9818899631500244, - "783": 1.9680747985839844, - "784": 2.005983352661133, - "785": 1.954211711883545, - "786": 1.972951054573059, - "787": 1.9691839218139648, - "788": 2.0018749237060547, - "789": 1.9747917652130127, - "790": 1.9653880596160889, - "791": 1.9741907119750977, - "792": 1.9759174585342407, - "793": 1.97254478931427, - "794": 1.9574637413024902, - "795": 1.985666036605835, - "796": 1.9525442123413086, - "797": 1.9861587285995483, - "798": 1.964959740638733, - "799": 1.9495489597320557, - "800": 1.978536605834961, - "801": 1.9767224788665771, - "802": 1.9514148235321045, - "803": 1.954641580581665, - "804": 1.9631741046905518, - "805": 1.962278962135315, - "806": 1.9704556465148926, - "807": 1.972823977470398, - "808": 1.9327316284179688, - "809": 1.9612082242965698 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 13.5088, - 15.0477, - 12.8853, - 13.4708, - 13.8774, - 16.4423, - 15.7903, - 12.484, - 12.3951, - 15.3488, - 14.3129, - 14.0834, - 16.5326, - 15.5351, - 15.3976, - 13.296, - 17.0788, - 14.9143, - 15.0785, - 13.6166, - 13.0849, - 17.2816, - 15.3062, - 15.9339, - 14.6136, - 15.3247, - 15.5335, - 15.0237, - 13.9769, - 17.2215, - 14.5901, - 19.2385, - 16.428, - 14.066, - 15.1592, - 13.5502, - 16.3471, - 18.4048, - 14.6454, - 15.7257, - 16.6274, - 14.9586, - 17.4083, - 18.1781, - 15.0421, - 15.6346, - 14.8181, - 18.6025, - 16.094, - 17.6329, - 19.3637, - 15.6764, - 18.2119, - 17.3891 - ], - "train_epoch_time": 5.056036472320557, - "train_loss": 1.9639772283808572, - "train_score": 0.4159892844589403, - "val_loss": 2.092978860973354, - "val_score": 0.38209044923054225 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:30:41.397832", - "final_model_norm": 87.96804809570312, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:28:56.561042", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 12.606510162353516, - "learning_rate": 4.64e-11, - "model_norm": 87.41987609863281, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 6.298553466796875, - "3": 8.560247421264648, - "4": 17.110065460205078, - "5": 5.777316093444824, - "6": 3.025073289871216, - "7": 4.015471458435059, - "8": 7.4302473068237305, - "9": 4.279419422149658, - "10": 5.940033912658691, - "11": 7.862672805786133, - "12": 61.43458938598633, - "13": 6.147928237915039, - "14": 32.13908767700195, - "15": 4.184981822967529, - "16": 13.011384010314941, - "17": 14.165390014648438, - "18": 13.909735679626465, - "19": 8.554447174072266, - "20": 3.96052622795105, - "21": 18.339412689208984, - "22": 8.036703109741211, - "23": 8.141822814941406, - "24": 25.710947036743164, - "25": 3.3410255908966064, - "26": 12.816083908081055, - "27": 16.520401000976562, - "28": 3.8276913166046143, - "29": 6.947815895080566, - "30": 4.014272212982178, - "31": 4.714061737060547, - "32": 5.406624794006348, - "33": 15.096064567565918, - "34": 6.352755546569824, - "35": 12.0445556640625, - "36": 8.452657699584961, - "37": 7.5752058029174805, - "38": 3.7555744647979736, - "39": 17.629322052001953, - "40": 18.58986473083496, - "41": 3.2410812377929688, - "42": 7.705453872680664, - "43": 18.782581329345703, - "44": 8.247995376586914, - "45": 6.117463111877441, - "46": 3.738041639328003, - "47": 8.339092254638672, - "48": 3.7876646518707275, - "49": 7.20920467376709, - "50": 7.390130043029785, - "51": 6.423828601837158, - "52": 3.014044761657715, - "53": 12.606510162353516 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.7870278358459473, - "3": 3.843776226043701, - "4": 4.448698997497559, - "5": 4.182013988494873, - "6": 3.5125656127929688, - "7": 3.5778086185455322, - "8": 3.8151068687438965, - "9": 4.662734031677246, - "10": 3.905299186706543, - "11": 4.246630668640137, - "12": 4.94174337387085, - "13": 4.868429183959961, - "14": 6.734435081481934, - "15": 3.885507822036743, - "16": 4.395578861236572, - "17": 6.381930828094482, - "18": 4.0087666511535645, - "19": 3.788578987121582, - "20": 3.294801950454712, - "21": 3.505646228790283, - "22": 3.587235450744629, - "23": 3.910609006881714, - "24": 4.010260581970215, - "25": 3.143622398376465, - "26": 4.334235191345215, - "27": 6.767123222351074, - "28": 3.5167510509490967, - "29": 4.420467853546143, - "30": 3.5368971824645996, - "31": 4.082586288452148, - "32": 4.310230255126953, - "33": 6.0619401931762695, - "34": 4.213866233825684, - "35": 3.9019479751586914, - "36": 3.730398416519165, - "37": 4.163652420043945, - "38": 3.4271225929260254, - "39": 4.799983024597168, - "40": 5.101315498352051, - "41": 3.415433883666992, - "42": 4.737327575683594, - "43": 4.968088150024414, - "44": 3.970263957977295, - "45": 3.909759044647217, - "46": 3.664367198944092, - "47": 4.095044136047363, - "48": 3.687072277069092, - "49": 3.828706979751587, - "50": 3.560635566711426, - "51": 3.4306516647338867, - "52": 3.3406014442443848, - "53": 5.411184310913086 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 0.00874362, - 0.00824185, - 0.095459, - 0.0524548, - 0.015196, - 0.125295, - 0.383842, - 0.221893, - 0.0691035, - 0.254607, - 0.110682, - 0.0686917, - 0.00130935, - 0.128805, - 0.0065198, - 0.221851, - 0.0259639, - 0.031805, - 0.0207192, - 0.0517717, - 0.21005, - 0.0104231, - 0.0555398, - 0.0589931, - 0.00606648, - 0.281625, - 0.0263877, - 0.0247949, - 0.240031, - 0.0915739, - 0.219487, - 0.183715, - 0.147451, - 0.0266002, - 0.104413, - 0.0268968, - 0.0522118, - 0.072558, - 0.242984, - 0.0154443, - 0.0147615, - 0.325137, - 0.0797879, - 0.0140825, - 0.058361, - 0.104474, - 0.262247, - 0.0588872, - 0.257003, - 0.0736677, - 0.0651964, - 0.0831359, - 0.367727, - 0.0340489 - ], - "train_epoch_time": 5.057607889175415, - "train_loss": 3.488603684960342, - "train_score": 0.15680595404619466, - "val_loss": 3.5175707800106393, - "val_score": 0.15275635051056657 - }, - { - "epoch": 1, - "grad_norm": 2.086177110671997, - "learning_rate": 0.464, - "model_norm": 87.38078308105469, - "step_logs": { - "grad_norm": { - "54": 7.211445331573486, - "55": 5.573832988739014, - "56": 4.934818744659424, - "57": 2.683844566345215, - "58": 3.7610249519348145, - "59": 9.699939727783203, - "60": 6.837521076202393, - "61": 8.089282989501953, - "62": 5.719318389892578, - "63": 6.253697872161865, - "64": 3.220794916152954, - "65": 7.582128047943115, - "66": 11.214761734008789, - "67": 3.2234272956848145, - "68": 6.003808498382568, - "69": 2.8221869468688965, - "70": 5.528847694396973, - "71": 4.349385738372803, - "72": 5.863676071166992, - "73": 4.172426700592041, - "74": 9.085822105407715, - "75": 2.6077518463134766, - "76": 12.66690731048584, - "77": 3.5533547401428223, - "78": 3.201880693435669, - "79": 7.03429651260376, - "80": 4.152105331420898, - "81": 4.5799384117126465, - "82": 3.683461904525757, - "83": 19.63254737854004, - "84": 6.914511203765869, - "85": 2.5504748821258545, - "86": 8.619253158569336, - "87": 3.1781845092773438, - "88": 6.734860897064209, - "89": 2.6362171173095703, - "90": 7.103269100189209, - "91": 3.6764862537384033, - "92": 4.495292663574219, - "93": 2.966806411743164, - "94": 7.526096343994141, - "95": 2.2169010639190674, - "96": 9.367757797241211, - "97": 2.8542661666870117, - "98": 3.140660285949707, - "99": 3.3742315769195557, - "100": 4.749008655548096, - "101": 3.7978298664093018, - "102": 4.022359848022461, - "103": 2.3437440395355225, - "104": 3.0056989192962646, - "105": 5.0609869956970215, - "106": 3.900080442428589, - "107": 2.086177110671997 - }, - "loss": { - "54": 3.4821786880493164, - "55": 3.524817943572998, - "56": 3.4969308376312256, - "57": 2.9422590732574463, - "58": 3.396406888961792, - "59": 4.774542331695557, - "60": 4.1636199951171875, - "61": 5.306468963623047, - "62": 4.275067329406738, - "63": 3.852780342102051, - "64": 3.3348326683044434, - "65": 4.176594257354736, - "66": 4.1349711418151855, - "67": 3.3686180114746094, - "68": 4.030369281768799, - "69": 3.394491195678711, - "70": 4.017953395843506, - "71": 3.5812599658966064, - "72": 3.936899185180664, - "73": 3.600769281387329, - "74": 4.130125522613525, - "75": 3.312443733215332, - "76": 5.475100517272949, - "77": 3.4123334884643555, - "78": 3.7659895420074463, - "79": 4.2888641357421875, - "80": 3.751763343811035, - "81": 3.5230894088745117, - "82": 3.8292384147644043, - "83": 8.19796371459961, - "84": 4.1747541427612305, - "85": 3.5090599060058594, - "86": 4.884117126464844, - "87": 3.2893052101135254, - "88": 4.006354331970215, - "89": 3.4439144134521484, - "90": 4.2537126541137695, - "91": 3.4454574584960938, - "92": 3.654297351837158, - "93": 3.37580943107605, - "94": 5.030043601989746, - "95": 3.084895372390747, - "96": 5.440011978149414, - "97": 3.31675124168396, - "98": 3.6908421516418457, - "99": 3.5664219856262207, - "100": 4.165191650390625, - "101": 3.666332721710205, - "102": 3.603498697280884, - "103": 3.365772247314453, - "104": 3.3738274574279785, - "105": 3.901677370071411, - "106": 3.3689045906066895, - "107": 3.2488253116607666 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.0669586, - 0.113456, - 0.143597, - 0.408476, - 0.240108, - 0.0507451, - 0.0890582, - 0.0810934, - 0.130694, - 0.0985146, - 0.321476, - 0.0726507, - 0.032877, - 0.324202, - 0.111813, - 0.42619, - 0.131442, - 0.189313, - 0.114502, - 0.206832, - 0.0500305, - 0.487098, - 0.0341233, - 0.270255, - 0.367341, - 0.0866764, - 0.21762, - 0.167959, - 0.282228, - 0.0212693, - 0.0873188, - 0.539447, - 0.0657426, - 0.325646, - 0.0883267, - 0.495553, - 0.0843047, - 0.254907, - 0.180837, - 0.38353, - 0.0888039, - 0.627694, - 0.061991, - 0.407121, - 0.374183, - 0.313244, - 0.184684, - 0.254191, - 0.222722, - 0.612723, - 0.37345, - 0.152328, - 0.221484, - 0.74649 - ], - "train_epoch_time": 5.054611682891846, - "train_loss": 3.7866450488994934, - "train_score": 0.16468010223236795, - "val_loss": 3.796820852430762, - "val_score": 0.16142096041127543 - }, - { - "epoch": 2, - "grad_norm": 1.5617152452468872, - "learning_rate": 0.464, - "model_norm": 87.29520416259766, - "step_logs": { - "grad_norm": { - "108": 5.174774646759033, - "109": 4.12007999420166, - "110": 1.7303886413574219, - "111": 4.427254676818848, - "112": 2.8291821479797363, - "113": 4.789940357208252, - "114": 2.6759793758392334, - "115": 5.9315972328186035, - "116": 2.50423002243042, - "117": 2.4670097827911377, - "118": 2.942711353302002, - "119": 3.0502896308898926, - "120": 3.9882960319519043, - "121": 3.0690927505493164, - "122": 8.91759204864502, - "123": 2.70465350151062, - "124": 7.68349027633667, - "125": 4.860230922698975, - "126": 4.693564414978027, - "127": 2.9068589210510254, - "128": 3.3190314769744873, - "129": 3.3391661643981934, - "130": 3.1243960857391357, - "131": 3.5345096588134766, - "132": 2.4333200454711914, - "133": 3.634084463119507, - "134": 2.0293712615966797, - "135": 1.925835371017456, - "136": 2.5134263038635254, - "137": 2.383866786956787, - "138": 2.1264994144439697, - "139": 2.248568296432495, - "140": 3.6036765575408936, - "141": 2.16681170463562, - "142": 3.3844339847564697, - "143": 2.4891295433044434, - "144": 4.069764614105225, - "145": 4.275892734527588, - "146": 1.7504932880401611, - "147": 3.4661357402801514, - "148": 1.5262902975082397, - "149": 3.0400474071502686, - "150": 1.547815203666687, - "151": 1.8305917978286743, - "152": 1.9314154386520386, - "153": 2.2506613731384277, - "154": 3.288557291030884, - "155": 1.6452332735061646, - "156": 1.8329997062683105, - "157": 2.162513256072998, - "158": 1.7185066938400269, - "159": 2.794290542602539, - "160": 1.88178551197052, - "161": 1.5617152452468872 - }, - "loss": { - "108": 3.784679412841797, - "109": 3.881744146347046, - "110": 2.9859018325805664, - "111": 3.584927558898926, - "112": 3.4188284873962402, - "113": 3.917698383331299, - "114": 3.1544692516326904, - "115": 4.037147521972656, - "116": 3.2700865268707275, - "117": 3.2213311195373535, - "118": 3.755624771118164, - "119": 3.42659592628479, - "120": 3.9031686782836914, - "121": 3.338649272918701, - "122": 4.3641157150268555, - "123": 3.478846549987793, - "124": 4.221761703491211, - "125": 3.5050649642944336, - "126": 3.3685100078582764, - "127": 3.173963785171509, - "128": 3.6466803550720215, - "129": 3.533693552017212, - "130": 3.632357597351074, - "131": 3.740203380584717, - "132": 3.5694665908813477, - "133": 3.4836928844451904, - "134": 3.4337148666381836, - "135": 2.996690273284912, - "136": 3.208099842071533, - "137": 3.6351749897003174, - "138": 3.281034469604492, - "139": 3.291280746459961, - "140": 3.620832920074463, - "141": 3.2909412384033203, - "142": 3.167038917541504, - "143": 3.306180477142334, - "144": 3.371760845184326, - "145": 3.1871633529663086, - "146": 3.0126659870147705, - "147": 3.1859991550445557, - "148": 2.9950218200683594, - "149": 3.101545810699463, - "150": 3.1349854469299316, - "151": 2.9979517459869385, - "152": 3.1339712142944336, - "153": 3.026371955871582, - "154": 3.495938301086426, - "155": 3.0939064025878906, - "156": 2.956056594848633, - "157": 3.3256912231445312, - "158": 3.0154240131378174, - "159": 3.2236666679382324, - "160": 3.375075101852417, - "161": 2.961386203765869 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.141334, - 0.228673, - 0.997214, - 0.182899, - 0.427126, - 0.170754, - 0.440515, - 0.114744, - 0.521448, - 0.52929, - 0.433697, - 0.368282, - 0.245382, - 0.354447, - 0.0548784, - 0.475567, - 0.0715116, - 0.148382, - 0.152909, - 0.375625, - 0.331036, - 0.316922, - 0.372097, - 0.29939, - 0.602844, - 0.263785, - 0.83376, - 0.807985, - 0.507827, - 0.639678, - 0.725572, - 0.650958, - 0.278815, - 0.700935, - 0.276491, - 0.533619, - 0.203572, - 0.174321, - 0.983173, - 0.265188, - 1.28566, - 0.335597, - 1.30857, - 0.894626, - 0.840124, - 0.597451, - 0.323261, - 1.14302, - 0.879808, - 0.711155, - 1.02105, - 0.412864, - 0.95311, - 1.2142 - ], - "train_epoch_time": 5.058100938796997, - "train_loss": 2.9174963414070425, - "train_score": 0.2051638719255647, - "val_loss": 2.934233468107186, - "val_score": 0.20034891656085627 - }, - { - "epoch": 3, - "grad_norm": 0.8685116171836853, - "learning_rate": 0.464, - "model_norm": 87.30843353271484, - "step_logs": { - "grad_norm": { - "162": 1.3233016729354858, - "163": 1.3805890083312988, - "164": 1.3771088123321533, - "165": 1.619173288345337, - "166": 2.123507261276245, - "167": 1.580917239189148, - "168": 1.569885492324829, - "169": 1.4523284435272217, - "170": 1.5622698068618774, - "171": 1.8772118091583252, - "172": 1.3246896266937256, - "173": 1.1375446319580078, - "174": 1.2851296663284302, - "175": 1.437145709991455, - "176": 1.4072016477584839, - "177": 1.1651967763900757, - "178": 1.0694130659103394, - "179": 1.033494472503662, - "180": 1.160187840461731, - "181": 1.1136846542358398, - "182": 0.9990133047103882, - "183": 1.0658434629440308, - "184": 1.4890798330307007, - "185": 1.3053770065307617, - "186": 0.9689182639122009, - "187": 0.8400119543075562, - "188": 1.236419439315796, - "189": 1.0966224670410156, - "190": 0.8431130647659302, - "191": 0.8443288803100586, - "192": 1.1127458810806274, - "193": 0.9202256202697754, - "194": 0.6413537263870239, - "195": 0.7446667551994324, - "196": 0.9289188981056213, - "197": 1.1127568483352661, - "198": 1.0037789344787598, - "199": 0.8704903721809387, - "200": 0.8721373081207275, - "201": 0.9219102263450623, - "202": 0.8627682328224182, - "203": 0.8936599493026733, - "204": 0.9451618194580078, - "205": 1.1369582414627075, - "206": 0.9595654606819153, - "207": 0.6201716065406799, - "208": 0.6199973821640015, - "209": 0.748771071434021, - "210": 0.7995222210884094, - "211": 0.7926169037818909, - "212": 0.8660521507263184, - "213": 0.861849844455719, - "214": 0.9272751212120056, - "215": 0.8685116171836853 - }, - "loss": { - "162": 2.907710552215576, - "163": 2.7727720737457275, - "164": 2.9045190811157227, - "165": 2.775491237640381, - "166": 3.0231313705444336, - "167": 3.0611536502838135, - "168": 2.847963333129883, - "169": 2.873194694519043, - "170": 2.823009490966797, - "171": 2.9426791667938232, - "172": 2.990231513977051, - "173": 2.751009702682495, - "174": 2.726351737976074, - "175": 2.8155994415283203, - "176": 2.7724971771240234, - "177": 2.7551820278167725, - "178": 2.71229887008667, - "179": 2.6968798637390137, - "180": 2.657564878463745, - "181": 2.7476954460144043, - "182": 2.6545183658599854, - "183": 2.6813511848449707, - "184": 2.711733818054199, - "185": 2.845114231109619, - "186": 2.71380877494812, - "187": 2.6293625831604004, - "188": 2.6584177017211914, - "189": 2.7791028022766113, - "190": 2.6139655113220215, - "191": 2.6295032501220703, - "192": 2.6224162578582764, - "193": 2.7156434059143066, - "194": 2.561741352081299, - "195": 2.58410382270813, - "196": 2.5837948322296143, - "197": 2.663771867752075, - "198": 2.6909401416778564, - "199": 2.612114906311035, - "200": 2.613621711730957, - "201": 2.598762035369873, - "202": 2.6329920291900635, - "203": 2.597872257232666, - "204": 2.628574848175049, - "205": 2.6163954734802246, - "206": 2.721754312515259, - "207": 2.550924777984619, - "208": 2.541594982147217, - "209": 2.575082540512085, - "210": 2.5971484184265137, - "211": 2.5586647987365723, - "212": 2.586764097213745, - "213": 2.5999419689178467, - "214": 2.6009154319763184, - "215": 2.617593765258789 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 1.66048, - 1.45474, - 1.53157, - 1.05865, - 0.670424, - 1.2248, - 1.15558, - 1.36218, - 1.15665, - 0.835057, - 1.70403, - 2.12596, - 1.65078, - 1.36323, - 1.4001, - 2.02933, - 2.37163, - 2.52491, - 1.97436, - 2.21536, - 2.65976, - 2.3603, - 1.22296, - 1.66966, - 2.89071, - 3.72631, - 1.73897, - 2.31095, - 3.67729, - 3.68851, - 2.11792, - 3.20689, - 6.22788, - 4.66, - 2.99435, - 2.15128, - 2.67072, - 3.44718, - 3.43616, - 3.05766, - 3.53721, - 3.25292, - 2.94244, - 2.02402, - 2.95597, - 6.63245, - 6.61191, - 4.59296, - 4.0629, - 4.07274, - 3.44881, - 3.50026, - 3.02489, - 3.47017 - ], - "train_epoch_time": 5.055733919143677, - "train_loss": 2.5681115788743005, - "train_score": 0.253008428972797, - "val_loss": 2.6080097133886118, - "val_score": 0.24912098152766135 - }, - { - "epoch": 4, - "grad_norm": 0.6302034258842468, - "learning_rate": 0.464, - "model_norm": 87.34224700927734, - "step_logs": { - "grad_norm": { - "216": 0.8106901049613953, - "217": 0.8104560971260071, - "218": 1.0373423099517822, - "219": 1.0504659414291382, - "220": 0.9251309633255005, - "221": 0.8067808747291565, - "222": 0.7914109826087952, - "223": 0.853254497051239, - "224": 0.9180495142936707, - "225": 0.9719438552856445, - "226": 0.9364591836929321, - "227": 0.9152469038963318, - "228": 0.7902749180793762, - "229": 0.8063154816627502, - "230": 0.8938575387001038, - "231": 0.8603021502494812, - "232": 0.7564916014671326, - "233": 0.7629519701004028, - "234": 0.8521064519882202, - "235": 0.8194237947463989, - "236": 0.765617847442627, - "237": 0.7324422001838684, - "238": 0.7703807950019836, - "239": 0.826975405216217, - "240": 0.8410595059394836, - "241": 0.7533900141716003, - "242": 0.7005122303962708, - "243": 0.7487949728965759, - "244": 0.7345021963119507, - "245": 0.7100911140441895, - "246": 0.7654435634613037, - "247": 0.7817350029945374, - "248": 0.7169834971427917, - "249": 0.6669204235076904, - "250": 0.7327661514282227, - "251": 0.7401351928710938, - "252": 0.6776663064956665, - "253": 0.7443811297416687, - "254": 0.8012900948524475, - "255": 0.8181213736534119, - "256": 0.7857967019081116, - "257": 0.8760724663734436, - "258": 0.9442354440689087, - "259": 0.9672228097915649, - "260": 0.8491316437721252, - "261": 0.7369986176490784, - "262": 0.7399956583976746, - "263": 0.9814790487289429, - "264": 0.8665817379951477, - "265": 0.6729984879493713, - "266": 0.7656182050704956, - "267": 0.9237598180770874, - "268": 0.8459947109222412, - "269": 0.6302034258842468 - }, - "loss": { - "216": 2.5703470706939697, - "217": 2.5824482440948486, - "218": 2.554396867752075, - "219": 2.7196297645568848, - "220": 2.61134934425354, - "221": 2.6048154830932617, - "222": 2.568915367126465, - "223": 2.5954771041870117, - "224": 2.5981552600860596, - "225": 2.6149373054504395, - "226": 2.5968315601348877, - "227": 2.6212663650512695, - "228": 2.5800042152404785, - "229": 2.5684704780578613, - "230": 2.579496383666992, - "231": 2.6058897972106934, - "232": 2.5478274822235107, - "233": 2.5617589950561523, - "234": 2.5712289810180664, - "235": 2.5806498527526855, - "236": 2.5696377754211426, - "237": 2.5710458755493164, - "238": 2.544456720352173, - "239": 2.575880527496338, - "240": 2.568362236022949, - "241": 2.5800046920776367, - "242": 2.527122974395752, - "243": 2.558391809463501, - "244": 2.5342559814453125, - "245": 2.5380027294158936, - "246": 2.529360771179199, - "247": 2.554863929748535, - "248": 2.557806968688965, - "249": 2.543830394744873, - "250": 2.5246176719665527, - "251": 2.567251682281494, - "252": 2.5534110069274902, - "253": 2.5447916984558105, - "254": 2.54054594039917, - "255": 2.5529799461364746, - "256": 2.547912836074829, - "257": 2.5530014038085938, - "258": 2.5859453678131104, - "259": 2.5768985748291016, - "260": 2.582186698913574, - "261": 2.5333921909332275, - "262": 2.5380496978759766, - "263": 2.562659740447998, - "264": 2.633817672729492, - "265": 2.5268964767456055, - "266": 2.5306854248046875, - "267": 2.561985492706299, - "268": 2.577991008758545, - "269": 2.5246875286102295 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 3.91095, - 3.93163, - 2.3738, - 2.4646, - 3.05111, - 4.0019, - 4.10153, - 3.565, - 3.08271, - 2.76808, - 2.96119, - 3.12921, - 4.13108, - 3.95061, - 3.22848, - 3.5209, - 4.45207, - 4.40092, - 3.54122, - 3.84337, - 4.38377, - 4.79251, - 4.2873, - 3.76652, - 3.6308, - 4.54549, - 5.14985, - 4.5629, - 4.69747, - 5.03343, - 4.31702, - 4.1807, - 4.97565, - 5.71926, - 4.70181, - 4.68648, - 5.56018, - 4.59263, - 3.95683, - 3.81427, - 4.12633, - 3.32637, - 2.90041, - 2.75451, - 3.58127, - 4.66411, - 4.63491, - 2.66029, - 3.50725, - 5.57904, - 4.31731, - 3.00233, - 3.60202, - 6.35691 - ], - "train_epoch_time": 5.055838584899902, - "train_loss": 2.519261949407832, - "train_score": 0.2481068418306463, - "val_loss": 2.5630368206449, - "val_score": 0.2398733499916495 - }, - { - "epoch": 5, - "grad_norm": 0.6994946599006653, - "learning_rate": 0.464, - "model_norm": 87.3882827758789, - "step_logs": { - "grad_norm": { - "270": 0.6422821283340454, - "271": 0.7214401960372925, - "272": 0.693167507648468, - "273": 0.7043965458869934, - "274": 0.8024903535842896, - "275": 0.9068609476089478, - "276": 1.0002779960632324, - "277": 1.037976622581482, - "278": 0.9052839279174805, - "279": 0.6759456396102905, - "280": 0.547254204750061, - "281": 0.5854243636131287, - "282": 0.6983444690704346, - "283": 0.8034915328025818, - "284": 0.8466963768005371, - "285": 0.7880759239196777, - "286": 0.7148194909095764, - "287": 0.7774625420570374, - "288": 0.7428817749023438, - "289": 0.6522318124771118, - "290": 0.7207713723182678, - "291": 0.7975194454193115, - "292": 0.827538251876831, - "293": 0.7705461978912354, - "294": 0.7459602952003479, - "295": 0.7716484665870667, - "296": 0.7719955444335938, - "297": 0.8776944875717163, - "298": 0.8854125738143921, - "299": 0.7333412170410156, - "300": 0.6780964732170105, - "301": 0.6807690262794495, - "302": 0.7484579682350159, - "303": 0.8476479649543762, - "304": 0.823849618434906, - "305": 0.8078522086143494, - "306": 0.7772826552391052, - "307": 0.7639259696006775, - "308": 0.7532915472984314, - "309": 0.881880521774292, - "310": 0.8273184299468994, - "311": 0.7544909119606018, - "312": 0.8302048444747925, - "313": 1.113097071647644, - "314": 0.9231336712837219, - "315": 0.7493406534194946, - "316": 0.7301244139671326, - "317": 0.8729099631309509, - "318": 1.2465481758117676, - "319": 1.1864063739776611, - "320": 0.7233800888061523, - "321": 0.608873724937439, - "322": 0.6392090916633606, - "323": 0.6994946599006653 - }, - "loss": { - "270": 2.524271011352539, - "271": 2.520902156829834, - "272": 2.5403218269348145, - "273": 2.517536163330078, - "274": 2.538515090942383, - "275": 2.53365421295166, - "276": 2.586578845977783, - "277": 2.5857181549072266, - "278": 2.5882375240325928, - "279": 2.5453941822052, - "280": 2.4840569496154785, - "281": 2.501779079437256, - "282": 2.5191338062286377, - "283": 2.5484871864318848, - "284": 2.5336203575134277, - "285": 2.53934907913208, - "286": 2.531764030456543, - "287": 2.528679847717285, - "288": 2.536978006362915, - "289": 2.4981017112731934, - "290": 2.5091042518615723, - "291": 2.516425132751465, - "292": 2.519143581390381, - "293": 2.5180392265319824, - "294": 2.520934581756592, - "295": 2.5044312477111816, - "296": 2.5160160064697266, - "297": 2.5210094451904297, - "298": 2.557377338409424, - "299": 2.507169246673584, - "300": 2.495558977127075, - "301": 2.4843595027923584, - "302": 2.5111827850341797, - "303": 2.4824366569519043, - "304": 2.533926486968994, - "305": 2.4826643466949463, - "306": 2.5055675506591797, - "307": 2.4421491622924805, - "308": 2.5059823989868164, - "309": 2.4810914993286133, - "310": 2.5351505279541016, - "311": 2.4602432250976562, - "312": 2.486154317855835, - "313": 2.48165225982666, - "314": 2.5677247047424316, - "315": 2.4849119186401367, - "316": 2.4324758052825928, - "317": 2.478590726852417, - "318": 2.515430450439453, - "319": 2.5803279876708984, - "320": 2.528080940246582, - "321": 2.4399001598358154, - "322": 2.4211602210998535, - "323": 2.42375111579895 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 6.11905, - 4.84346, - 5.28704, - 5.07389, - 3.94185, - 3.08082, - 2.58514, - 2.39997, - 3.15816, - 5.57098, - 8.29437, - 7.29974, - 5.16549, - 3.94748, - 3.53416, - 4.08871, - 4.95485, - 4.18345, - 4.59703, - 5.87227, - 4.82974, - 3.95641, - 3.67855, - 4.24097, - 4.53033, - 4.20601, - 4.22167, - 3.27256, - 3.26215, - 4.66199, - 5.42731, - 5.36062, - 4.48274, - 3.45499, - 3.73334, - 3.80412, - 4.14713, - 4.18475, - 4.41623, - 3.19024, - 3.70389, - 4.32185, - 3.60709, - 2.00297, - 3.01314, - 4.4254, - 4.56305, - 3.25286, - 1.6188, - 1.83319, - 4.83123, - 6.58139, - 5.92567, - 4.95358 - ], - "train_epoch_time": 5.056430101394653, - "train_loss": 2.4404090834826957, - "train_score": 0.28100789106388174, - "val_loss": 2.4834488711045064, - "val_score": 0.27292623479522327 - }, - { - "epoch": 6, - "grad_norm": 0.785673975944519, - "learning_rate": 0.464, - "model_norm": 87.4376449584961, - "step_logs": { - "grad_norm": { - "324": 0.8277546763420105, - "325": 1.213570475578308, - "326": 1.0964643955230713, - "327": 0.9084147810935974, - "328": 0.7132125496864319, - "329": 0.6408405900001526, - "330": 0.6848652958869934, - "331": 0.9890743494033813, - "332": 0.9521641135215759, - "333": 1.1928578615188599, - "334": 1.180679202079773, - "335": 0.6317273378372192, - "336": 0.5312169194221497, - "337": 0.5352923274040222, - "338": 0.5947969555854797, - "339": 0.8144198060035706, - "340": 0.6884922981262207, - "341": 0.6703402996063232, - "342": 0.7199878692626953, - "343": 1.0107344388961792, - "344": 1.02140212059021, - "345": 0.8379251956939697, - "346": 0.817858099937439, - "347": 1.0277626514434814, - "348": 0.7688915133476257, - "349": 0.8017452359199524, - "350": 0.9028322100639343, - "351": 0.9798122048377991, - "352": 0.9224452376365662, - "353": 0.8207762837409973, - "354": 0.9562183618545532, - "355": 0.9558596611022949, - "356": 0.9506771564483643, - "357": 1.0603376626968384, - "358": 0.9694778919219971, - "359": 0.8912068605422974, - "360": 1.179460048675537, - "361": 0.6402373909950256, - "362": 0.626430332660675, - "363": 0.7695870995521545, - "364": 0.8449531197547913, - "365": 1.1801550388336182, - "366": 0.9892183542251587, - "367": 0.7648640275001526, - "368": 0.761760950088501, - "369": 0.8548610210418701, - "370": 0.8236075639724731, - "371": 0.8261203765869141, - "372": 0.7852128148078918, - "373": 0.7419406175613403, - "374": 0.8111518621444702, - "375": 0.8819378018379211, - "376": 0.8994942307472229, - "377": 0.785673975944519 - }, - "loss": { - "324": 2.442108392715454, - "325": 2.5207409858703613, - "326": 2.569349765777588, - "327": 2.5354785919189453, - "328": 2.456895351409912, - "329": 2.42048978805542, - "330": 2.42295503616333, - "331": 2.43660306930542, - "332": 2.505573272705078, - "333": 2.4801454544067383, - "334": 2.560189723968506, - "335": 2.490452766418457, - "336": 2.4108235836029053, - "337": 2.378988027572632, - "338": 2.434278964996338, - "339": 2.403108596801758, - "340": 2.4642446041107178, - "341": 2.401123523712158, - "342": 2.3979384899139404, - "343": 2.399630308151245, - "344": 2.5165722370147705, - "345": 2.410550594329834, - "346": 2.409177780151367, - "347": 2.420248031616211, - "348": 2.4806342124938965, - "349": 2.4206464290618896, - "350": 2.42093825340271, - "351": 2.45137882232666, - "352": 2.461182117462158, - "353": 2.4123477935791016, - "354": 2.421826124191284, - "355": 2.464864730834961, - "356": 2.4547338485717773, - "357": 2.45949125289917, - "358": 2.4851531982421875, - "359": 2.4057514667510986, - "360": 2.4261910915374756, - "361": 2.434016227722168, - "362": 2.4205543994903564, - "363": 2.4001364707946777, - "364": 2.421926975250244, - "365": 2.4282093048095703, - "366": 2.5044355392456055, - "367": 2.409379482269287, - "368": 2.3911569118499756, - "369": 2.3991599082946777, - "370": 2.4428553581237793, - "371": 2.378005266189575, - "372": 2.4198951721191406, - "373": 2.378296136856079, - "374": 2.3974616527557373, - "375": 2.3770086765289307, - "376": 2.4335920810699463, - "377": 2.395308494567871 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 3.5642, - 1.71158, - 2.13715, - 3.0725, - 4.83002, - 5.89391, - 5.16577, - 2.49073, - 2.76365, - 1.74301, - 1.83657, - 6.24049, - 8.54322, - 8.30253, - 6.8807, - 3.62307, - 5.19859, - 5.34348, - 4.62581, - 2.34893, - 2.41221, - 3.43325, - 3.60174, - 2.29126, - 4.19597, - 3.76581, - 2.97009, - 2.55343, - 2.89243, - 3.58088, - 2.64868, - 2.69777, - 2.71605, - 2.18754, - 2.6441, - 3.02896, - 1.74405, - 5.93802, - 6.16835, - 4.05248, - 3.39231, - 1.74344, - 2.55933, - 4.11848, - 4.1207, - 3.28298, - 3.60128, - 3.48439, - 3.92484, - 4.32044, - 3.64374, - 3.05601, - 3.00781, - 3.8804 - ], - "train_epoch_time": 5.060168266296387, - "train_loss": 2.364733493276785, - "train_score": 0.2980855452278936, - "val_loss": 2.402719494943367, - "val_score": 0.2901478194205003 - }, - { - "epoch": 7, - "grad_norm": 0.7446780800819397, - "learning_rate": 0.464, - "model_norm": 87.49325561523438, - "step_logs": { - "grad_norm": { - "378": 0.7412540912628174, - "379": 0.6758706569671631, - "380": 0.6203067898750305, - "381": 0.7059546709060669, - "382": 0.8634834289550781, - "383": 1.4178122282028198, - "384": 0.8562566637992859, - "385": 0.9920222759246826, - "386": 0.8814330697059631, - "387": 0.8397749066352844, - "388": 0.8395906686782837, - "389": 0.9221208095550537, - "390": 1.078867793083191, - "391": 0.937557578086853, - "392": 0.906349778175354, - "393": 0.9906315803527832, - "394": 0.9654544591903687, - "395": 1.0302199125289917, - "396": 0.9993744492530823, - "397": 0.7764483094215393, - "398": 0.7187926173210144, - "399": 0.835242748260498, - "400": 0.7996591925621033, - "401": 0.7218107581138611, - "402": 0.7053585052490234, - "403": 0.6843826174736023, - "404": 0.7178877592086792, - "405": 0.8363547921180725, - "406": 0.8745712041854858, - "407": 1.0696005821228027, - "408": 0.8717305660247803, - "409": 0.7696921229362488, - "410": 0.7357020378112793, - "411": 0.762376606464386, - "412": 0.7743775248527527, - "413": 0.7805383801460266, - "414": 0.7597991824150085, - "415": 0.8127756118774414, - "416": 0.8563180565834045, - "417": 0.8782772421836853, - "418": 1.390513300895691, - "419": 0.7838559150695801, - "420": 0.9620813727378845, - "421": 1.0983083248138428, - "422": 1.0849192142486572, - "423": 0.8763146996498108, - "424": 0.8500553965568542, - "425": 0.8986727595329285, - "426": 0.9238671064376831, - "427": 0.9122262001037598, - "428": 0.8475171327590942, - "429": 0.7676783204078674, - "430": 0.7072240114212036, - "431": 0.7446780800819397 - }, - "loss": { - "378": 2.3820009231567383, - "379": 2.3586697578430176, - "380": 2.3406410217285156, - "381": 2.327718496322632, - "382": 2.3691883087158203, - "383": 2.4289052486419678, - "384": 2.467930793762207, - "385": 2.4489784240722656, - "386": 2.4793336391448975, - "387": 2.3886806964874268, - "388": 2.3632330894470215, - "389": 2.3746049404144287, - "390": 2.461280345916748, - "391": 2.40838623046875, - "392": 2.376873016357422, - "393": 2.3735203742980957, - "394": 2.41024112701416, - "395": 2.407980442047119, - "396": 2.4014384746551514, - "397": 2.386277675628662, - "398": 2.3386921882629395, - "399": 2.3585050106048584, - "400": 2.3627965450286865, - "401": 2.3423075675964355, - "402": 2.3450348377227783, - "403": 2.3341221809387207, - "404": 2.3485474586486816, - "405": 2.317923069000244, - "406": 2.37357497215271, - "407": 2.362143039703369, - "408": 2.440795660018921, - "409": 2.328968048095703, - "410": 2.3197102546691895, - "411": 2.3233275413513184, - "412": 2.3452911376953125, - "413": 2.3078713417053223, - "414": 2.3342909812927246, - "415": 2.317479372024536, - "416": 2.360175609588623, - "417": 2.350813865661621, - "418": 2.350499391555786, - "419": 2.428170919418335, - "420": 2.407823085784912, - "421": 2.434412956237793, - "422": 2.4489293098449707, - "423": 2.3792316913604736, - "424": 2.3394975662231445, - "425": 2.3434176445007324, - "426": 2.340834140777588, - "427": 2.367696762084961, - "428": 2.3365559577941895, - "429": 2.338250160217285, - "430": 2.304725408554077, - "431": 2.3104665279388428 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 4.33519, - 5.16345, - 6.08306, - 4.67064, - 3.17754, - 1.2083, - 3.36608, - 2.48853, - 3.19122, - 3.38713, - 3.35252, - 2.79264, - 2.11458, - 2.73987, - 2.89344, - 2.41863, - 2.58581, - 2.26878, - 2.40445, - 3.95818, - 4.52653, - 3.38074, - 3.69502, - 4.4957, - 4.71335, - 4.9834, - 4.55708, - 3.31374, - 3.10322, - 2.06473, - 3.21194, - 3.93124, - 4.28578, - 3.99734, - 3.91103, - 3.78811, - 4.0435, - 3.50812, - 3.21865, - 3.04758, - 1.21565, - 3.95191, - 2.60136, - 2.01811, - 2.08057, - 3.09825, - 3.23764, - 2.90166, - 2.74253, - 2.84525, - 3.25296, - 3.96764, - 4.60792, - 4.16642 - ], - "train_epoch_time": 5.05716609954834, - "train_loss": 2.3124927835450797, - "train_score": 0.3259079089636782, - "val_loss": 2.3690079454713246, - "val_score": 0.31208291471894106 - }, - { - "epoch": 8, - "grad_norm": 0.6871085166931152, - "learning_rate": 0.464, - "model_norm": 87.5499496459961, - "step_logs": { - "grad_norm": { - "432": 0.8406437039375305, - "433": 0.9106219410896301, - "434": 0.8679929375648499, - "435": 0.828254759311676, - "436": 0.8065989017486572, - "437": 0.7938262820243835, - "438": 0.7247353792190552, - "439": 0.70723956823349, - "440": 0.6874766945838928, - "441": 0.7215283513069153, - "442": 0.7163143157958984, - "443": 0.6770814061164856, - "444": 0.6427270174026489, - "445": 0.7105298638343811, - "446": 1.0835726261138916, - "447": 0.8117125034332275, - "448": 0.7321748733520508, - "449": 0.7523022890090942, - "450": 0.858869731426239, - "451": 0.8754204511642456, - "452": 0.8608382940292358, - "453": 0.8122220039367676, - "454": 0.6952657103538513, - "455": 0.6551951766014099, - "456": 0.7092158198356628, - "457": 0.7777563333511353, - "458": 0.8984978795051575, - "459": 0.9228598475456238, - "460": 0.8857734799385071, - "461": 0.8402599692344666, - "462": 0.8428342342376709, - "463": 0.9065468311309814, - "464": 0.8301165699958801, - "465": 0.7544244527816772, - "466": 0.7236720323562622, - "467": 0.7239236235618591, - "468": 0.8174001574516296, - "469": 1.2561126947402954, - "470": 1.3807685375213623, - "471": 1.4568482637405396, - "472": 1.2447764873504639, - "473": 1.0912258625030518, - "474": 1.0666472911834717, - "475": 1.0848174095153809, - "476": 1.3703484535217285, - "477": 1.0738643407821655, - "478": 0.8555451035499573, - "479": 0.817487895488739, - "480": 0.7386631965637207, - "481": 0.6398859620094299, - "482": 0.6407454013824463, - "483": 0.6218218803405762, - "484": 0.6573324203491211, - "485": 0.6871085166931152 - }, - "loss": { - "432": 2.3365721702575684, - "433": 2.3326334953308105, - "434": 2.3443045616149902, - "435": 2.3600823879241943, - "436": 2.309640407562256, - "437": 2.3429317474365234, - "438": 2.257517099380493, - "439": 2.299229621887207, - "440": 2.279670000076294, - "441": 2.2828431129455566, - "442": 2.309788703918457, - "443": 2.2910618782043457, - "444": 2.296117067337036, - "445": 2.292524814605713, - "446": 2.2985317707061768, - "447": 2.385573387145996, - "448": 2.2994182109832764, - "449": 2.3080403804779053, - "450": 2.2937612533569336, - "451": 2.316946029663086, - "452": 2.3000221252441406, - "453": 2.3059005737304688, - "454": 2.2904603481292725, - "455": 2.2879676818847656, - "456": 2.2558422088623047, - "457": 2.3028252124786377, - "458": 2.27492094039917, - "459": 2.3308212757110596, - "460": 2.2834296226501465, - "461": 2.319279670715332, - "462": 2.2933809757232666, - "463": 2.2964305877685547, - "464": 2.2942891120910645, - "465": 2.2665939331054688, - "466": 2.2507266998291016, - "467": 2.286299228668213, - "468": 2.2815849781036377, - "469": 2.3181087970733643, - "470": 2.3836045265197754, - "471": 2.4310758113861084, - "472": 2.3726487159729004, - "473": 2.4063267707824707, - "474": 2.370751142501831, - "475": 2.3533759117126465, - "476": 2.3754220008850098, - "477": 2.390937089920044, - "478": 2.3499724864959717, - "479": 2.28918194770813, - "480": 2.3082470893859863, - "481": 2.231879711151123, - "482": 2.269561529159546, - "483": 2.2287118434906006, - "484": 2.2464981079101562, - "485": 2.24623441696167 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 3.3064, - 2.813, - 3.11158, - 3.44032, - 3.55001, - 3.71799, - 4.29806, - 4.59673, - 4.82343, - 4.385, - 4.50158, - 4.99753, - 5.55829, - 4.54098, - 1.95765, - 3.62066, - 4.28932, - 4.07811, - 3.10952, - 3.02331, - 3.10377, - 3.49535, - 4.73828, - 5.32977, - 4.48489, - 3.80692, - 2.81794, - 2.73676, - 2.91033, - 3.28493, - 3.22843, - 2.7943, - 3.32943, - 3.98238, - 4.29773, - 4.36262, - 3.41482, - 1.46919, - 1.25024, - 1.14543, - 1.53127, - 2.02081, - 2.08374, - 1.99976, - 1.26497, - 2.07333, - 3.21053, - 3.42545, - 4.23048, - 5.45087, - 5.52804, - 5.76397, - 5.19919, - 4.75778 - ], - "train_epoch_time": 5.055660724639893, - "train_loss": 2.25958987075937, - "train_score": 0.3259067880286377, - "val_loss": 2.3159942429867733, - "val_score": 0.31041905906663014 - }, - { - "epoch": 9, - "grad_norm": 0.907282829284668, - "learning_rate": 0.464, - "model_norm": 87.61580657958984, - "step_logs": { - "grad_norm": { - "486": 0.712562620639801, - "487": 0.7331148386001587, - "488": 0.7125338315963745, - "489": 0.7049823999404907, - "490": 0.7442678809165955, - "491": 0.7896958589553833, - "492": 0.8199194073677063, - "493": 0.9763243198394775, - "494": 0.8745664954185486, - "495": 0.7932464480400085, - "496": 0.8348462581634521, - "497": 0.8294863700866699, - "498": 0.8065939545631409, - "499": 0.7944414019584656, - "500": 0.9245107173919678, - "501": 1.0332345962524414, - "502": 1.1609129905700684, - "503": 1.1859999895095825, - "504": 1.1004586219787598, - "505": 1.0198930501937866, - "506": 0.8427152037620544, - "507": 0.6482487320899963, - "508": 0.5940559506416321, - "509": 0.6011734008789062, - "510": 0.654750406742096, - "511": 0.6831381916999817, - "512": 0.686622142791748, - "513": 0.71522456407547, - "514": 0.7784596681594849, - "515": 0.7902299761772156, - "516": 0.8374746441841125, - "517": 0.8683791756629944, - "518": 0.8480587601661682, - "519": 0.8314867615699768, - "520": 0.7016273736953735, - "521": 0.6524192690849304, - "522": 0.6798352003097534, - "523": 0.72560054063797, - "524": 0.7582122087478638, - "525": 0.7725090980529785, - "526": 0.9910992383956909, - "527": 0.9848704934120178, - "528": 0.7651165723800659, - "529": 0.7244158983230591, - "530": 0.8009435534477234, - "531": 0.8574590086936951, - "532": 0.8259586095809937, - "533": 0.7582616209983826, - "534": 0.7343605756759644, - "535": 0.7997363209724426, - "536": 0.8006985783576965, - "537": 0.7473467588424683, - "538": 0.8718458414077759, - "539": 0.907282829284668 - }, - "loss": { - "486": 2.2771244049072266, - "487": 2.2598960399627686, - "488": 2.2551674842834473, - "489": 2.2272088527679443, - "490": 2.2353591918945312, - "491": 2.2536027431488037, - "492": 2.285149097442627, - "493": 2.2854652404785156, - "494": 2.2932679653167725, - "495": 2.249964714050293, - "496": 2.278869152069092, - "497": 2.2636592388153076, - "498": 2.2679247856140137, - "499": 2.2530012130737305, - "500": 2.2624945640563965, - "501": 2.3036861419677734, - "502": 2.3021528720855713, - "503": 2.3263773918151855, - "504": 2.309734344482422, - "505": 2.3139700889587402, - "506": 2.2929272651672363, - "507": 2.2095446586608887, - "508": 2.235565185546875, - "509": 2.2293193340301514, - "510": 2.205256938934326, - "511": 2.229058027267456, - "512": 2.2393813133239746, - "513": 2.2362594604492188, - "514": 2.2351841926574707, - "515": 2.2357192039489746, - "516": 2.2410929203033447, - "517": 2.2527952194213867, - "518": 2.262061834335327, - "519": 2.2516250610351562, - "520": 2.218244791030884, - "521": 2.2390406131744385, - "522": 2.2223055362701416, - "523": 2.2392568588256836, - "524": 2.246436357498169, - "525": 2.2348380088806152, - "526": 2.2441329956054688, - "527": 2.296755313873291, - "528": 2.2151851654052734, - "529": 2.2095046043395996, - "530": 2.2269046306610107, - "531": 2.2316040992736816, - "532": 2.2452497482299805, - "533": 2.225214958190918, - "534": 2.2368764877319336, - "535": 2.2347657680511475, - "536": 2.2565245628356934, - "537": 2.2124009132385254, - "538": 2.201300621032715, - "539": 2.284648895263672 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 4.48478, - 4.20479, - 4.44189, - 4.4813, - 4.03542, - 3.61375, - 3.39916, - 2.39765, - 2.99826, - 3.57569, - 3.26969, - 3.28997, - 3.48593, - 3.56975, - 2.64706, - 2.15787, - 1.70818, - 1.65391, - 1.90728, - 2.22458, - 3.22871, - 5.25799, - 6.3348, - 6.1684, - 5.14408, - 4.77644, - 4.74999, - 4.37157, - 3.68843, - 3.58022, - 3.19534, - 2.98747, - 3.14523, - 3.25676, - 4.50605, - 5.26027, - 4.80835, - 4.25313, - 3.90762, - 3.74489, - 2.28462, - 2.36786, - 3.78403, - 4.21036, - 3.47135, - 3.03522, - 3.29115, - 3.8702, - 4.14785, - 3.49412, - 3.51967, - 3.96113, - 2.89601, - 2.77545 - ], - "train_epoch_time": 5.056230306625366, - "train_loss": 2.2159057038415284, - "train_score": 0.35210164095271096, - "val_loss": 2.2818633776444655, - "val_score": 0.33475351634720846 - }, - { - "epoch": 10, - "grad_norm": 0.6942535042762756, - "learning_rate": 0.464, - "model_norm": 87.67610931396484, - "step_logs": { - "grad_norm": { - "540": 0.7585775852203369, - "541": 0.5961969494819641, - "542": 0.5493056178092957, - "543": 0.657780110836029, - "544": 0.7750731110572815, - "545": 0.7865927815437317, - "546": 0.7444025874137878, - "547": 0.6932305693626404, - "548": 0.647156834602356, - "549": 0.708666205406189, - "550": 0.7742327451705933, - "551": 0.8068590760231018, - "552": 0.8598763346672058, - "553": 0.8082388043403625, - "554": 0.7034367322921753, - "555": 0.7649155259132385, - "556": 0.8748824000358582, - "557": 0.8523411750793457, - "558": 0.7931309938430786, - "559": 0.8179984092712402, - "560": 0.7930600047111511, - "561": 0.7689656019210815, - "562": 0.7945387959480286, - "563": 0.8083291053771973, - "564": 0.9241283535957336, - "565": 0.9414832592010498, - "566": 0.7653954029083252, - "567": 0.6973149180412292, - "568": 0.6581932306289673, - "569": 0.6959071755409241, - "570": 0.7117392420768738, - "571": 0.7333704233169556, - "572": 0.7796891331672668, - "573": 0.8865215182304382, - "574": 0.9076765775680542, - "575": 0.8328969478607178, - "576": 0.829623818397522, - "577": 0.8538678884506226, - "578": 0.8449520468711853, - "579": 0.7578330039978027, - "580": 0.7410953044891357, - "581": 0.7811304926872253, - "582": 0.8032299280166626, - "583": 0.7698058485984802, - "584": 0.812824010848999, - "585": 0.7754165530204773, - "586": 0.7512738108634949, - "587": 0.7116155624389648, - "588": 0.7505306601524353, - "589": 0.8330626487731934, - "590": 0.8475196957588196, - "591": 0.8164493441581726, - "592": 0.7601915001869202, - "593": 0.6942535042762756 - }, - "loss": { - "540": 2.2231364250183105, - "541": 2.2061266899108887, - "542": 2.1627893447875977, - "543": 2.194805860519409, - "544": 2.194080114364624, - "545": 2.2225894927978516, - "546": 2.2229509353637695, - "547": 2.2039008140563965, - "548": 2.1704890727996826, - "549": 2.1939315795898438, - "550": 2.202841281890869, - "551": 2.201186418533325, - "552": 2.221682548522949, - "553": 2.238283157348633, - "554": 2.2009692192077637, - "555": 2.2087900638580322, - "556": 2.2394473552703857, - "557": 2.219580888748169, - "558": 2.183645009994507, - "559": 2.2109286785125732, - "560": 2.205620765686035, - "561": 2.192681074142456, - "562": 2.22249698638916, - "563": 2.2442140579223633, - "564": 2.2055916786193848, - "565": 2.2416820526123047, - "566": 2.2224881649017334, - "567": 2.184573173522949, - "568": 2.1638331413269043, - "569": 2.1898579597473145, - "570": 2.212275743484497, - "571": 2.184046745300293, - "572": 2.1922593116760254, - "573": 2.1798362731933594, - "574": 2.224644184112549, - "575": 2.2149720191955566, - "576": 2.1777524948120117, - "577": 2.199397087097168, - "578": 2.1964449882507324, - "579": 2.196183681488037, - "580": 2.1861789226531982, - "581": 2.192002296447754, - "582": 2.170109272003174, - "583": 2.167527675628662, - "584": 2.200174331665039, - "585": 2.1844873428344727, - "586": 2.1879754066467285, - "587": 2.2079055309295654, - "588": 2.1804027557373047, - "589": 2.193552017211914, - "590": 2.2093706130981445, - "591": 2.196868896484375, - "592": 2.1808855533599854, - "593": 2.157546281814575 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 3.86337, - 6.20656, - 7.1678, - 5.07265, - 3.65231, - 3.59219, - 4.01157, - 4.58603, - 5.18249, - 4.36857, - 3.67485, - 3.38113, - 3.00476, - 3.42638, - 4.44799, - 3.77509, - 2.92578, - 3.05523, - 3.4713, - 3.30423, - 3.50686, - 3.70819, - 3.52055, - 3.43469, - 2.58262, - 2.529, - 3.79374, - 4.49271, - 4.99479, - 4.52182, - 4.36714, - 4.06083, - 3.60619, - 2.77361, - 2.70021, - 3.1929, - 3.16407, - 3.01663, - 3.07649, - 3.82403, - 3.9805, - 3.59248, - 3.36358, - 3.65765, - 3.33015, - 3.63312, - 3.87655, - 4.36003, - 3.87079, - 3.16077, - 3.07588, - 3.29568, - 3.77387, - 4.47635 - ], - "train_epoch_time": 5.055509328842163, - "train_loss": 2.1632014810953457, - "train_score": 0.3650062770036235, - "val_loss": 2.2497894342402778, - "val_score": 0.3419964485272474 - }, - { - "epoch": 11, - "grad_norm": 0.7919039130210876, - "learning_rate": 0.464, - "model_norm": 87.73299407958984, - "step_logs": { - "grad_norm": { - "594": 0.6798537373542786, - "595": 0.6490585207939148, - "596": 0.5988166928291321, - "597": 0.6148913502693176, - "598": 0.6750938892364502, - "599": 0.8076092004776001, - "600": 0.9198513627052307, - "601": 0.9107973575592041, - "602": 0.8270330429077148, - "603": 0.7594625949859619, - "604": 0.6789271831512451, - "605": 0.7532307505607605, - "606": 0.8689937591552734, - "607": 0.8897367119789124, - "608": 0.85323566198349, - "609": 0.7796239852905273, - "610": 0.7001063227653503, - "611": 0.6852161884307861, - "612": 0.6914094090461731, - "613": 0.712799608707428, - "614": 0.7351668477058411, - "615": 0.738058865070343, - "616": 0.7064772248268127, - "617": 0.6869327425956726, - "618": 0.7838695645332336, - "619": 0.7908899784088135, - "620": 0.7245451807975769, - "621": 0.7042706608772278, - "622": 0.7856767177581787, - "623": 0.924270510673523, - "624": 2.20139217376709, - "625": 1.6957099437713623, - "626": 1.303147792816162, - "627": 1.4234451055526733, - "628": 1.6826438903808594, - "629": 1.663301706314087, - "630": 1.6491668224334717, - "631": 1.7331079244613647, - "632": 1.1122770309448242, - "633": 0.7747372388839722, - "634": 0.7028419971466064, - "635": 0.7543326616287231, - "636": 0.8162450194358826, - "637": 0.8256059288978577, - "638": 0.7799479365348816, - "639": 0.7498819231987, - "640": 0.7176874279975891, - "641": 0.6859791874885559, - "642": 0.7241498231887817, - "643": 0.9374217391014099, - "644": 0.866894543170929, - "645": 0.7311823964118958, - "646": 0.6802024245262146, - "647": 0.7919039130210876 - }, - "loss": { - "594": 2.1565752029418945, - "595": 2.145916700363159, - "596": 2.1633694171905518, - "597": 2.1479344367980957, - "598": 2.16104793548584, - "599": 2.125016212463379, - "600": 2.199812889099121, - "601": 2.2146387100219727, - "602": 2.1745922565460205, - "603": 2.1782429218292236, - "604": 2.15200138092041, - "605": 2.1815381050109863, - "606": 2.196560859680176, - "607": 2.2157418727874756, - "608": 2.1975088119506836, - "609": 2.156369686126709, - "610": 2.1521029472351074, - "611": 2.161513566970825, - "612": 2.141385078430176, - "613": 2.1641273498535156, - "614": 2.153470516204834, - "615": 2.1648004055023193, - "616": 2.195648431777954, - "617": 2.145448684692383, - "618": 2.1772561073303223, - "619": 2.175151824951172, - "620": 2.1576876640319824, - "621": 2.142158031463623, - "622": 2.154916763305664, - "623": 2.163973331451416, - "624": 2.264434337615967, - "625": 2.461543083190918, - "626": 2.3529388904571533, - "627": 2.3282318115234375, - "628": 2.4070634841918945, - "629": 2.4230785369873047, - "630": 2.4986953735351562, - "631": 2.4733142852783203, - "632": 2.3817098140716553, - "633": 2.305549144744873, - "634": 2.2497317790985107, - "635": 2.208911180496216, - "636": 2.233792304992676, - "637": 2.246324062347412, - "638": 2.230048179626465, - "639": 2.192605495452881, - "640": 2.1972944736480713, - "641": 2.171077013015747, - "642": 2.1710762977600098, - "643": 2.2218165397644043, - "644": 2.2336792945861816, - "645": 2.215351104736328, - "646": 2.16933012008667, - "647": 2.202516555786133 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 4.66588, - 5.09384, - 6.03313, - 5.68099, - 4.74172, - 3.25806, - 2.59986, - 2.66968, - 3.1793, - 3.77654, - 4.6687, - 3.84509, - 2.90877, - 2.79896, - 3.01851, - 3.54775, - 4.39071, - 4.60365, - 4.47944, - 4.2594, - 3.98444, - 3.97407, - 4.39913, - 4.54663, - 3.54341, - 3.47742, - 4.11015, - 4.31889, - 3.49094, - 2.53311, - 0.467267, - 0.85606, - 1.38555, - 1.14907, - 0.850165, - 0.875841, - 0.918722, - 0.823433, - 1.92514, - 3.84119, - 4.55423, - 3.88197, - 3.35275, - 3.29554, - 3.66592, - 3.89919, - 4.26597, - 4.61374, - 4.14017, - 2.52836, - 2.97227, - 4.14373, - 4.68867, - 3.51216 - ], - "train_epoch_time": 5.060882806777954, - "train_loss": 2.188546709761216, - "train_score": 0.3502678890737947, - "val_loss": 2.2784656854228778, - "val_score": 0.32748816019495103 - }, - { - "epoch": 12, - "grad_norm": 0.5168001651763916, - "learning_rate": 0.464, - "model_norm": 87.79352569580078, - "step_logs": { - "grad_norm": { - "648": 0.8322305679321289, - "649": 0.86310875415802, - "650": 0.8546245694160461, - "651": 0.8540962338447571, - "652": 0.7400807738304138, - "653": 0.7047733664512634, - "654": 0.713548481464386, - "655": 0.7104220986366272, - "656": 0.6467399597167969, - "657": 0.5776260495185852, - "658": 0.59811931848526, - "659": 0.6109806895256042, - "660": 0.6301087737083435, - "661": 0.6579259634017944, - "662": 0.7061654925346375, - "663": 0.6898855566978455, - "664": 0.6281511187553406, - "665": 0.6035448908805847, - "666": 0.6117790937423706, - "667": 0.621382474899292, - "668": 0.6453878879547119, - "669": 0.7339179515838623, - "670": 0.7700932025909424, - "671": 0.7408218383789062, - "672": 0.6703903675079346, - "673": 0.6067618727684021, - "674": 0.5832111239433289, - "675": 0.5420410633087158, - "676": 0.5233203172683716, - "677": 0.49592217803001404, - "678": 0.5068687200546265, - "679": 0.5144093036651611, - "680": 0.5519264340400696, - "681": 0.514634370803833, - "682": 0.505577564239502, - "683": 0.4756849706172943, - "684": 0.47411251068115234, - "685": 0.5083258152008057, - "686": 0.5305260419845581, - "687": 0.5018282532691956, - "688": 0.503979504108429, - "689": 0.5388111472129822, - "690": 0.5257241725921631, - "691": 0.4740140438079834, - "692": 0.4806220233440399, - "693": 0.47497254610061646, - "694": 0.48448771238327026, - "695": 0.4545193314552307, - "696": 0.4641974866390228, - "697": 0.4876572787761688, - "698": 0.4619799256324768, - "699": 0.4600135087966919, - "700": 0.4902450740337372, - "701": 0.5168001651763916 - }, - "loss": { - "648": 2.1716971397399902, - "649": 2.1991939544677734, - "650": 2.199986696243286, - "651": 2.1747162342071533, - "652": 2.200076103210449, - "653": 2.1516759395599365, - "654": 2.1451101303100586, - "655": 2.1412830352783203, - "656": 2.153409481048584, - "657": 2.1272125244140625, - "658": 2.1281309127807617, - "659": 2.1119394302368164, - "660": 2.110682725906372, - "661": 2.142961025238037, - "662": 2.160482406616211, - "663": 2.146573305130005, - "664": 2.1289889812469482, - "665": 2.1267123222351074, - "666": 2.1252241134643555, - "667": 2.0985217094421387, - "668": 2.1150336265563965, - "669": 2.1408772468566895, - "670": 2.1396474838256836, - "671": 2.1032190322875977, - "672": 2.1189215183258057, - "673": 2.1333508491516113, - "674": 2.106031894683838, - "675": 2.0892810821533203, - "676": 2.1191277503967285, - "677": 2.0875582695007324, - "678": 2.0943353176116943, - "679": 2.116013526916504, - "680": 2.0936737060546875, - "681": 2.073613405227661, - "682": 2.091026782989502, - "683": 2.0991411209106445, - "684": 2.08347749710083, - "685": 2.076913833618164, - "686": 2.072753429412842, - "687": 2.0714285373687744, - "688": 2.0934219360351562, - "689": 2.067140579223633, - "690": 2.0618412494659424, - "691": 2.0936648845672607, - "692": 2.051088333129883, - "693": 2.042275905609131, - "694": 2.0578064918518066, - "695": 2.054750442504883, - "696": 2.0769400596618652, - "697": 2.0494439601898193, - "698": 2.0533528327941895, - "699": 2.0495636463165283, - "700": 2.073385238647461, - "701": 2.06715726852417 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 3.13554, - 2.95211, - 3.0121, - 2.98119, - 4.01679, - 4.33189, - 4.21311, - 4.24269, - 5.14834, - 6.37555, - 5.94871, - 5.65753, - 5.31608, - 4.95063, - 4.33249, - 4.51016, - 5.39567, - 5.83834, - 5.67826, - 5.43495, - 5.0778, - 3.97463, - 3.60791, - 3.83228, - 4.71476, - 5.79463, - 6.19175, - 7.11103, - 7.73788, - 8.48812, - 8.15183, - 7.99652, - 6.873, - 7.82943, - 8.18058, - 9.2769, - 9.26885, - 8.03774, - 7.36435, - 8.22545, - 8.24197, - 7.12028, - 7.46001, - 9.31804, - 8.87926, - 9.05268, - 8.76676, - 9.94614, - 9.6387, - 8.618, - 9.62094, - 9.68546, - 8.62687, - 7.73977 - ], - "train_epoch_time": 5.0557169914245605, - "train_loss": 2.0586307798601804, - "train_score": 0.3924015871250475, - "val_loss": 2.167074854420466, - "val_score": 0.3644383256799455 - }, - { - "epoch": 13, - "grad_norm": 0.34596049785614014, - "learning_rate": 0.3093333333333334, - "model_norm": 87.8283462524414, - "step_logs": { - "grad_norm": { - "702": 0.494188517332077, - "703": 0.429004967212677, - "704": 0.41396981477737427, - "705": 0.4276958107948303, - "706": 0.4263225197792053, - "707": 0.47544771432876587, - "708": 0.4772648811340332, - "709": 0.5201854705810547, - "710": 0.5692038536071777, - "711": 0.5221180319786072, - "712": 0.4851730763912201, - "713": 0.4506009519100189, - "714": 0.40763771533966064, - "715": 0.39443984627723694, - "716": 0.41046595573425293, - "717": 0.43167757987976074, - "718": 0.4171099066734314, - "719": 0.4390905797481537, - "720": 0.42252224683761597, - "721": 0.41707083582878113, - "722": 0.44261491298675537, - "723": 0.4352918267250061, - "724": 0.3870866596698761, - "725": 0.4072112739086151, - "726": 0.4034639000892639, - "727": 0.40515875816345215, - "728": 0.411587655544281, - "729": 0.4055381417274475, - "730": 0.3796173334121704, - "731": 0.3822411894798279, - "732": 0.387616902589798, - "733": 0.3640688359737396, - "734": 0.36904484033584595, - "735": 0.4091506004333496, - "736": 0.40216633677482605, - "737": 0.4079515337944031, - "738": 0.3719809949398041, - "739": 0.35472241044044495, - "740": 0.34964507818222046, - "741": 0.34832754731178284, - "742": 0.3783370852470398, - "743": 0.39985957741737366, - "744": 0.39254435896873474, - "745": 0.34199583530426025, - "746": 0.35096681118011475, - "747": 0.3403915464878082, - "748": 0.3526018559932709, - "749": 0.36242637038230896, - "750": 0.3520948886871338, - "751": 0.34769758582115173, - "752": 0.3255786895751953, - "753": 0.3424532115459442, - "754": 0.3264871835708618, - "755": 0.34596049785614014 - }, - "loss": { - "702": 2.055676221847534, - "703": 2.057755947113037, - "704": 2.055201530456543, - "705": 2.047668933868408, - "706": 2.040125846862793, - "707": 2.0449862480163574, - "708": 2.03873348236084, - "709": 2.0728325843811035, - "710": 2.0671229362487793, - "711": 2.0537514686584473, - "712": 2.031139850616455, - "713": 2.0507335662841797, - "714": 2.0310213565826416, - "715": 2.0710744857788086, - "716": 2.0247068405151367, - "717": 2.010361433029175, - "718": 2.0242011547088623, - "719": 2.0620484352111816, - "720": 2.0039732456207275, - "721": 2.0215840339660645, - "722": 2.049868106842041, - "723": 2.0240185260772705, - "724": 2.042682647705078, - "725": 2.0658316612243652, - "726": 2.031619071960449, - "727": 2.030827760696411, - "728": 2.0327835083007812, - "729": 2.035027265548706, - "730": 2.0169854164123535, - "731": 2.0529890060424805, - "732": 2.016752243041992, - "733": 2.0340709686279297, - "734": 2.0238027572631836, - "735": 2.042764663696289, - "736": 2.017361640930176, - "737": 2.009218215942383, - "738": 2.0296149253845215, - "739": 2.0462749004364014, - "740": 2.014038562774658, - "741": 2.0566771030426025, - "742": 2.0085175037384033, - "743": 2.0115199089050293, - "744": 2.02402925491333, - "745": 2.0086283683776855, - "746": 2.045577049255371, - "747": 2.0326685905456543, - "748": 2.0015974044799805, - "749": 1.9962797164916992, - "750": 2.036181926727295, - "751": 2.010166645050049, - "752": 2.039108991622925, - "753": 2.0363669395446777, - "754": 2.0303382873535156, - "755": 2.0272130966186523 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 8.41723, - 11.1807, - 11.9927, - 11.1941, - 11.2248, - 9.04659, - 8.95038, - 7.66034, - 6.38015, - 7.53374, - 8.62872, - 10.1001, - 12.2227, - 13.3117, - 12.0173, - 10.7884, - 11.6346, - 10.6952, - 11.2252, - 11.6218, - 10.4634, - 10.682, - 13.6328, - 12.4582, - 12.4805, - 12.3715, - 11.9996, - 12.3739, - 13.9962, - 14.0511, - 13.4229, - 15.3461, - 14.8597, - 12.2026, - 12.473, - 12.0729, - 14.668, - 16.2625, - 16.4745, - 16.9508, - 14.0319, - 12.5808, - 13.1353, - 17.1735, - 16.6067, - 17.5432, - 16.0993, - 15.1978, - 16.4247, - 16.6276, - 19.2366, - 17.3642, - 19.0474, - 16.9374 - ], - "train_epoch_time": 5.055843114852905, - "train_loss": 2.0121118987478863, - "train_score": 0.4051818058514698, - "val_loss": 2.13039591446536, - "val_score": 0.3762198620220276 - }, - { - "epoch": 14, - "grad_norm": 0.3083382546901703, - "learning_rate": 0.1546666666666667, - "model_norm": 87.84034729003906, - "step_logs": { - "grad_norm": { - "756": 0.31816309690475464, - "757": 0.34076759219169617, - "758": 0.3757580518722534, - "759": 0.3717805743217468, - "760": 0.3389896750450134, - "761": 0.33788925409317017, - "762": 0.3298880457878113, - "763": 0.3302595317363739, - "764": 0.3691927492618561, - "765": 0.334699809551239, - "766": 0.3390510380268097, - "767": 0.351433664560318, - "768": 0.34450021386146545, - "769": 0.33897507190704346, - "770": 0.3508661091327667, - "771": 0.3051116168498993, - "772": 0.3122202455997467, - "773": 0.3256988227367401, - "774": 0.32951807975769043, - "775": 0.3455788493156433, - "776": 0.3282933533191681, - "777": 0.29794615507125854, - "778": 0.31040993332862854, - "779": 0.3576716184616089, - "780": 0.33595651388168335, - "781": 0.3316764831542969, - "782": 0.3166547417640686, - "783": 0.3212023079395294, - "784": 0.3121882975101471, - "785": 0.32798659801483154, - "786": 0.32423341274261475, - "787": 0.3123420774936676, - "788": 0.32254958152770996, - "789": 0.3154264986515045, - "790": 0.32257264852523804, - "791": 0.32353290915489197, - "792": 0.3024417459964752, - "793": 0.31519627571105957, - "794": 0.31639960408210754, - "795": 0.301001638174057, - "796": 0.3022189736366272, - "797": 0.3100537359714508, - "798": 0.31416457891464233, - "799": 0.3117527961730957, - "800": 0.32066595554351807, - "801": 0.3516268730163574, - "802": 0.2929776608943939, - "803": 0.324480265378952, - "804": 0.3103832006454468, - "805": 0.3073827922344208, - "806": 0.2938455641269684, - "807": 0.32488566637039185, - "808": 0.29607847332954407, - "809": 0.3083382546901703 - }, - "loss": { - "756": 2.0284624099731445, - "757": 2.030078411102295, - "758": 2.0251684188842773, - "759": 2.037804126739502, - "760": 2.0077157020568848, - "761": 2.0122575759887695, - "762": 2.019298553466797, - "763": 2.032672882080078, - "764": 1.9646873474121094, - "765": 1.9999198913574219, - "766": 2.015259027481079, - "767": 2.016988515853882, - "768": 2.0376081466674805, - "769": 1.979867696762085, - "770": 2.0372753143310547, - "771": 1.9841111898422241, - "772": 2.01918625831604, - "773": 1.9982551336288452, - "774": 2.0085694789886475, - "775": 1.9884827136993408, - "776": 2.016232967376709, - "777": 1.9949437379837036, - "778": 2.009265184402466, - "779": 2.021888256072998, - "780": 2.008274555206299, - "781": 1.966428279876709, - "782": 2.0240259170532227, - "783": 1.971564769744873, - "784": 2.019937515258789, - "785": 2.011720895767212, - "786": 2.0009024143218994, - "787": 2.016679286956787, - "788": 2.009650230407715, - "789": 1.95314359664917, - "790": 1.9766675233840942, - "791": 2.023458957672119, - "792": 2.0272693634033203, - "793": 1.9830944538116455, - "794": 2.0164055824279785, - "795": 1.9970431327819824, - "796": 2.0086779594421387, - "797": 1.9559695720672607, - "798": 1.9930493831634521, - "799": 2.00905442237854, - "800": 1.9889888763427734, - "801": 1.9652135372161865, - "802": 2.0151801109313965, - "803": 1.999463438987732, - "804": 2.0275464057922363, - "805": 2.0180020332336426, - "806": 2.0078988075256348, - "807": 2.0101065635681152, - "808": 1.993089199066162, - "809": 1.9798625707626343 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 20.0386, - 17.4822, - 14.3432, - 14.7431, - 17.4715, - 17.6252, - 18.5553, - 18.6362, - 14.4141, - 17.8526, - 17.5308, - 16.3311, - 17.1689, - 17.2306, - 16.5488, - 21.3132, - 20.7135, - 18.8373, - 18.4982, - 16.6505, - 18.7075, - 22.4727, - 20.8529, - 15.8048, - 17.7933, - 17.8751, - 20.1857, - 19.1097, - 20.7255, - 18.7006, - 19.0331, - 20.6717, - 19.3165, - 19.6308, - 18.9967, - 19.3311, - 22.163, - 19.961, - 20.1422, - 22.0419, - 21.9921, - 20.3464, - 20.1931, - 20.6715, - 19.3431, - 15.8945, - 23.4771, - 18.9905, - 21.0462, - 21.3581, - 23.2543, - 19.044, - 22.7359, - 20.8248 - ], - "train_epoch_time": 5.054449558258057, - "train_loss": 1.998216214994105, - "train_score": 0.4091250448777652, - "val_loss": 2.121286135727174, - "val_score": 0.3789555823556591 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:32:26.513204", - "final_model_norm": 87.84034729003906, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:30:41.548803", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 5.765828609466553, - "learning_rate": 1e-10, - "model_norm": 87.36087036132812, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.300286769866943, - "3": 7.301339626312256, - "4": 14.016050338745117, - "5": 5.431191444396973, - "6": 8.543880462646484, - "7": 4.7641143798828125, - "8": 7.423945903778076, - "9": 2.887894868850708, - "10": 5.924127101898193, - "11": 13.67439079284668, - "12": 6.22044563293457, - "13": 11.159448623657227, - "14": 65.04527282714844, - "15": 4.448390007019043, - "16": 16.901765823364258, - "17": 15.437251091003418, - "18": 6.504454135894775, - "19": 7.35742712020874, - "20": 4.80839729309082, - "21": 12.031474113464355, - "22": 10.740063667297363, - "23": 5.095099925994873, - "24": 15.341471672058105, - "25": 8.56396484375, - "26": 3.7578632831573486, - "27": 12.759321212768555, - "28": 14.070226669311523, - "29": 8.846729278564453, - "30": 3.9992878437042236, - "31": 17.102956771850586, - "32": 13.401613235473633, - "33": 3.3323185443878174, - "34": 15.259978294372559, - "35": 4.542700290679932, - "36": 7.9205002784729, - "37": 5.112588405609131, - "38": 11.688754081726074, - "39": 5.768594264984131, - "40": 7.878995418548584, - "41": 6.576426982879639, - "42": 3.915175437927246, - "43": 5.074714660644531, - "44": 9.314199447631836, - "45": 4.686611175537109, - "46": 8.48810863494873, - "47": 13.346796035766602, - "48": 4.095573902130127, - "49": 9.482538223266602, - "50": 4.848084449768066, - "51": 2.400895118713379, - "52": 18.901315689086914, - "53": 5.765828609466553 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.8053386211395264, - "3": 4.116413593292236, - "4": 4.468705654144287, - "5": 4.331547260284424, - "6": 4.026200771331787, - "7": 4.461686134338379, - "8": 4.410714149475098, - "9": 4.235510349273682, - "10": 4.083352088928223, - "11": 5.169145584106445, - "12": 4.551384925842285, - "13": 4.856448173522949, - "14": 5.497134685516357, - "15": 4.817488193511963, - "16": 7.578564643859863, - "17": 5.290328502655029, - "18": 6.949967384338379, - "19": 6.369999885559082, - "20": 4.6053876876831055, - "21": 4.436546325683594, - "22": 6.863824844360352, - "23": 4.218764305114746, - "24": 7.216196060180664, - "25": 6.5491485595703125, - "26": 4.235875129699707, - "27": 6.50718355178833, - "28": 4.777423858642578, - "29": 3.743546485900879, - "30": 4.372169494628906, - "31": 9.838640213012695, - "32": 4.680777549743652, - "33": 3.572072744369507, - "34": 5.9389214515686035, - "35": 4.366957664489746, - "36": 4.583085060119629, - "37": 4.933143615722656, - "38": 5.612424850463867, - "39": 4.482638359069824, - "40": 4.526904106140137, - "41": 4.726700782775879, - "42": 3.943324327468872, - "43": 4.045255661010742, - "44": 4.196793556213379, - "45": 4.060457229614258, - "46": 4.319828033447266, - "47": 4.913107395172119, - "48": 3.64884090423584, - "49": 4.979071140289307, - "50": 3.9165995121002197, - "51": 3.559251070022583, - "52": 9.414752006530762, - "53": 4.585429668426514 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 0.00839976, - 0.00857642, - 0.0958678, - 0.0772172, - 0.0227473, - 0.146843, - 0.055155, - 0.196578, - 0.0800275, - 0.507859, - 0.11635, - 0.0276441, - 0.117625, - 0.0389972, - 0.00129929, - 0.243453, - 0.0265291, - 0.0221995, - 0.164271, - 0.117676, - 0.199189, - 0.0306484, - 0.0595049, - 0.16251, - 0.0306602, - 0.0892966, - 0.299959, - 0.0399703, - 0.0241319, - 0.0478319, - 0.273358, - 0.0336351, - 0.0260618, - 0.321682, - 0.0255035, - 0.211617, - 0.0730555, - 0.188731, - 0.0410785, - 0.134708, - 0.0729222, - 0.109289, - 0.257253, - 0.157081, - 0.0483756, - 0.184866, - 0.0599576, - 0.0275805, - 0.217533, - 0.0553731, - 0.166636, - 0.617465, - 0.0263527, - 0.137929 - ], - "train_epoch_time": 5.057088375091553, - "train_loss": 4.52632402559606, - "train_score": 0.10379192075736211, - "val_loss": 4.493531253937328, - "val_score": 0.10520863247314632 - }, - { - "epoch": 1, - "grad_norm": 1.433429479598999, - "learning_rate": 1.0, - "model_norm": 87.36151123046875, - "step_logs": { - "grad_norm": { - "54": 4.487692832946777, - "55": 12.362515449523926, - "56": 9.290770530700684, - "57": 3.39971661567688, - "58": 3.6275835037231445, - "59": 3.4811999797821045, - "60": 6.620113849639893, - "61": 13.550787925720215, - "62": 8.233078956604004, - "63": 1.8066459894180298, - "64": 19.582988739013672, - "65": 7.002932548522949, - "66": 7.648651123046875, - "67": 3.182955265045166, - "68": 12.178287506103516, - "69": 4.017116069793701, - "70": 6.878478050231934, - "71": 4.456635475158691, - "72": 2.8061683177948, - "73": 8.096805572509766, - "74": 3.9354281425476074, - "75": 6.565037250518799, - "76": 3.2019972801208496, - "77": 5.413841247558594, - "78": 3.711090564727783, - "79": 4.413052558898926, - "80": 3.263498306274414, - "81": 2.8167335987091064, - "82": 12.143589973449707, - "83": 4.7537102699279785, - "84": 4.162781238555908, - "85": 3.7110908031463623, - "86": 3.107283115386963, - "87": 3.189760208129883, - "88": 4.122842788696289, - "89": 6.179720878601074, - "90": 2.3473868370056152, - "91": 9.49435806274414, - "92": 2.004326820373535, - "93": 3.83107328414917, - "94": 4.368336200714111, - "95": 2.172074794769287, - "96": 12.80830192565918, - "97": 7.9598517417907715, - "98": 4.475029945373535, - "99": 5.5807342529296875, - "100": 4.715944290161133, - "101": 1.104247808456421, - "102": 6.83452033996582, - "103": 3.088413715362549, - "104": 3.1336474418640137, - "105": 1.9625157117843628, - "106": 2.579343318939209, - "107": 1.433429479598999 - }, - "loss": { - "54": 4.529766082763672, - "55": 5.664586067199707, - "56": 5.814496040344238, - "57": 3.8436694145202637, - "58": 3.773864269256592, - "59": 3.608013153076172, - "60": 4.405400276184082, - "61": 6.577364921569824, - "62": 4.018462181091309, - "63": 3.2224414348602295, - "64": 5.774176120758057, - "65": 4.372259140014648, - "66": 4.477726936340332, - "67": 3.5280508995056152, - "68": 5.907240390777588, - "69": 3.836770534515381, - "70": 4.493083953857422, - "71": 4.355321884155273, - "72": 3.2985966205596924, - "73": 4.426855564117432, - "74": 3.898265838623047, - "75": 4.32016134262085, - "76": 3.7217884063720703, - "77": 4.30015754699707, - "78": 3.8478171825408936, - "79": 3.876591682434082, - "80": 3.4705288410186768, - "81": 3.785193681716919, - "82": 6.087458610534668, - "83": 4.790887832641602, - "84": 3.5112109184265137, - "85": 3.6132373809814453, - "86": 3.556417465209961, - "87": 3.5093908309936523, - "88": 3.624424457550049, - "89": 4.082795143127441, - "90": 3.3027796745300293, - "91": 5.520716667175293, - "92": 3.3483028411865234, - "93": 3.8985466957092285, - "94": 4.639792442321777, - "95": 3.4257729053497314, - "96": 5.431247711181641, - "97": 4.581187725067139, - "98": 4.119652271270752, - "99": 4.083475112915039, - "100": 3.6307711601257324, - "101": 3.0953259468078613, - "102": 4.768978595733643, - "103": 3.212509870529175, - "104": 3.23966121673584, - "105": 3.5053300857543945, - "106": 3.384728193283081, - "107": 3.250051259994507 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.224921, - 0.0370642, - 0.067361, - 0.332553, - 0.286782, - 0.297721, - 0.10052, - 0.0358198, - 0.0592837, - 0.987277, - 0.0150568, - 0.0891551, - 0.0765399, - 0.348236, - 0.0398302, - 0.237759, - 0.0949642, - 0.219284, - 0.418892, - 0.0675255, - 0.251702, - 0.100236, - 0.363003, - 0.146715, - 0.27939, - 0.199054, - 0.325858, - 0.477086, - 0.0412802, - 0.212007, - 0.202623, - 0.262357, - 0.368342, - 0.344918, - 0.213229, - 0.10691, - 0.599391, - 0.0612441, - 0.833466, - 0.265621, - 0.243146, - 0.726121, - 0.0331067, - 0.072305, - 0.205716, - 0.131113, - 0.163253, - 2.53848, - 0.102096, - 0.336801, - 0.329913, - 0.910128, - 0.508751, - 1.58175 - ], - "train_epoch_time": 5.06270956993103, - "train_loss": 3.823686411596268, - "train_score": 0.12427479381455239, - "val_loss": 3.796891169761544, - "val_score": 0.12547538736589192 - }, - { - "epoch": 2, - "grad_norm": 5.456674575805664, - "learning_rate": 1.0, - "model_norm": 87.40853881835938, - "step_logs": { - "grad_norm": { - "108": 5.709460258483887, - "109": 2.780217409133911, - "110": 1.8220134973526, - "111": 8.052960395812988, - "112": 1.5795549154281616, - "113": 9.591748237609863, - "114": 2.4433400630950928, - "115": 1.5015907287597656, - "116": 5.175107002258301, - "117": 1.8479372262954712, - "118": 9.004857063293457, - "119": 5.777551174163818, - "120": 2.4527392387390137, - "121": 4.525572776794434, - "122": 1.8827574253082275, - "123": 9.842826843261719, - "124": 5.56711483001709, - "125": 2.6885156631469727, - "126": 2.365950107574463, - "127": 15.701992988586426, - "128": 3.8434715270996094, - "129": 9.37134075164795, - "130": 1.5705770254135132, - "131": 7.028792858123779, - "132": 2.9641029834747314, - "133": 6.267132759094238, - "134": 1.6342846155166626, - "135": 3.1726131439208984, - "136": 1.0219895839691162, - "137": 11.144745826721191, - "138": 1.959206223487854, - "139": 1.9480067491531372, - "140": 16.675106048583984, - "141": 6.225305080413818, - "142": 2.398484945297241, - "143": 3.144951820373535, - "144": 2.075913429260254, - "145": 12.180719375610352, - "146": 5.990593433380127, - "147": 3.4510912895202637, - "148": 2.7853033542633057, - "149": 3.629406690597534, - "150": 1.5068943500518799, - "151": 7.937716484069824, - "152": 3.4166417121887207, - "153": 6.068278789520264, - "154": 2.0362634658813477, - "155": 3.8298466205596924, - "156": 3.7409844398498535, - "157": 1.6658438444137573, - "158": 4.7560577392578125, - "159": 1.2474490404129028, - "160": 1.7983179092407227, - "161": 5.456674575805664 - }, - "loss": { - "108": 3.8252687454223633, - "109": 3.269702911376953, - "110": 3.153294563293457, - "111": 4.942331314086914, - "112": 3.3671398162841797, - "113": 5.608525276184082, - "114": 3.4998526573181152, - "115": 3.3788342475891113, - "116": 4.009459495544434, - "117": 3.241333484649658, - "118": 5.1927056312561035, - "119": 4.227705001831055, - "120": 3.502326726913452, - "121": 4.063412666320801, - "122": 3.116813898086548, - "123": 5.995430946350098, - "124": 3.944603204727173, - "125": 3.3716001510620117, - "126": 3.5189130306243896, - "127": 6.205752372741699, - "128": 3.3606443405151367, - "129": 4.448931694030762, - "130": 2.9477643966674805, - "131": 4.288956165313721, - "132": 3.2721753120422363, - "133": 4.976713180541992, - "134": 2.9573683738708496, - "135": 3.64422345161438, - "136": 3.0619139671325684, - "137": 5.0049004554748535, - "138": 3.1333329677581787, - "139": 3.0090208053588867, - "140": 9.09743881225586, - "141": 5.671896934509277, - "142": 3.4275870323181152, - "143": 3.2397115230560303, - "144": 3.2290961742401123, - "145": 5.568178176879883, - "146": 3.4855480194091797, - "147": 3.347588539123535, - "148": 3.7354249954223633, - "149": 3.313076972961426, - "150": 2.966855764389038, - "151": 4.197963237762451, - "152": 3.1319057941436768, - "153": 4.345335960388184, - "154": 3.05686092376709, - "155": 3.5193305015563965, - "156": 3.561901569366455, - "157": 2.991948366165161, - "158": 4.493997573852539, - "159": 3.201479434967041, - "160": 3.1088132858276367, - "161": 4.162567138671875 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 0.117347, - 0.42301, - 0.949864, - 0.0762115, - 1.34956, - 0.0609611, - 0.586249, - 1.49852, - 0.149709, - 0.949182, - 0.0640383, - 0.126653, - 0.582175, - 0.198401, - 0.87927, - 0.0618843, - 0.127275, - 0.466456, - 0.628633, - 0.0251701, - 0.227497, - 0.0506585, - 1.19502, - 0.0868141, - 0.372435, - 0.126708, - 1.10726, - 0.362052, - 2.93157, - 0.0402954, - 0.816293, - 0.792947, - 0.0327176, - 0.146355, - 0.595819, - 0.327551, - 0.749312, - 0.037529, - 0.0971251, - 0.281073, - 0.481499, - 0.251513, - 1.30656, - 0.0666266, - 0.268293, - 0.118003, - 0.737238, - 0.239937, - 0.254513, - 1.07817, - 0.198673, - 2.05734, - 0.961306, - 0.139799 - ], - "train_epoch_time": 5.056260108947754, - "train_loss": 3.2343624127304538, - "train_score": 0.1522114866860799, - "val_loss": 3.25644829845319, - "val_score": 0.15120909858311085 - }, - { - "epoch": 3, - "grad_norm": 15.653616905212402, - "learning_rate": 1.0, - "model_norm": 87.44014739990234, - "step_logs": { - "grad_norm": { - "162": 1.7654263973236084, - "163": 3.2836763858795166, - "164": 4.768516540527344, - "165": 1.2640773057937622, - "166": 3.1585166454315186, - "167": 4.194464206695557, - "168": 1.0103245973587036, - "169": 3.5152182579040527, - "170": 1.4377050399780273, - "171": 10.770119667053223, - "172": 2.7500264644622803, - "173": 3.5346522331237793, - "174": 2.2370402812957764, - "175": 10.087536811828613, - "176": 2.886577606201172, - "177": 2.4982433319091797, - "178": 1.0791091918945312, - "179": 2.3742637634277344, - "180": 4.836894989013672, - "181": 1.3748149871826172, - "182": 1.0512306690216064, - "183": 1.7874820232391357, - "184": 12.56811237335205, - "185": 2.213726282119751, - "186": 6.64543342590332, - "187": 2.425590991973877, - "188": 2.9407246112823486, - "189": 12.566993713378906, - "190": 2.8077874183654785, - "191": 1.7313799858093262, - "192": 1.6948881149291992, - "193": 8.366637229919434, - "194": 3.905769109725952, - "195": 2.00282621383667, - "196": 1.3125073909759521, - "197": 11.505144119262695, - "198": 1.131795048713684, - "199": 1.2343827486038208, - "200": 1.2906420230865479, - "201": 1.2155762910842896, - "202": 0.8945650458335876, - "203": 0.6896288990974426, - "204": 2.5183982849121094, - "205": 5.40286922454834, - "206": 1.7006113529205322, - "207": 3.429755926132202, - "208": 16.784442901611328, - "209": 3.5395991802215576, - "210": 3.0890109539031982, - "211": 1.4761098623275757, - "212": 10.205368041992188, - "213": 0.6079724431037903, - "214": 2.004495620727539, - "215": 15.653616905212402 - }, - "loss": { - "162": 3.2351136207580566, - "163": 3.216046094894409, - "164": 4.396238327026367, - "165": 2.87646746635437, - "166": 3.5071041584014893, - "167": 3.5331854820251465, - "168": 2.9195051193237305, - "169": 3.314512252807617, - "170": 2.895765781402588, - "171": 5.235464096069336, - "172": 2.8600525856018066, - "173": 3.5523056983947754, - "174": 3.1644835472106934, - "175": 5.578177452087402, - "176": 2.9697630405426025, - "177": 3.391751766204834, - "178": 2.770040988922119, - "179": 3.3368654251098633, - "180": 3.568667411804199, - "181": 3.020956039428711, - "182": 2.926006317138672, - "183": 2.8224878311157227, - "184": 6.070935249328613, - "185": 2.8124871253967285, - "186": 4.371795654296875, - "187": 3.254304885864258, - "188": 3.483398199081421, - "189": 7.132011413574219, - "190": 3.204470634460449, - "191": 3.081040143966675, - "192": 3.048574686050415, - "193": 5.798521041870117, - "194": 4.38168478012085, - "195": 3.0108323097229004, - "196": 2.982576370239258, - "197": 5.273983955383301, - "198": 2.873634099960327, - "199": 3.1454124450683594, - "200": 2.952078342437744, - "201": 2.826287269592285, - "202": 2.982473373413086, - "203": 2.719486713409424, - "204": 3.003004789352417, - "205": 3.5425233840942383, - "206": 2.876796245574951, - "207": 3.1663661003112793, - "208": 8.677878379821777, - "209": 3.081639289855957, - "210": 3.453032970428467, - "211": 2.9425806999206543, - "212": 5.308302879333496, - "213": 2.687962532043457, - "214": 2.797548294067383, - "215": 9.18140983581543 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 1.03798, - 0.298265, - 0.193337, - 1.80016, - 0.351546, - 0.200823, - 2.86014, - 0.268235, - 1.40095, - 0.0451351, - 0.378181, - 0.284326, - 0.632347, - 0.0548179, - 0.356414, - 0.543444, - 2.37879, - 0.591944, - 0.152536, - 1.59829, - 2.64776, - 0.883382, - 0.038434, - 0.573909, - 0.098995, - 0.553125, - 0.402805, - 0.0451595, - 0.406469, - 1.02781, - 1.06124, - 0.0828353, - 0.287229, - 0.750585, - 1.73136, - 0.0398432, - 2.24334, - 2.06432, - 1.77221, - 1.91272, - 3.72694, - 5.71816, - 0.473486, - 0.121357, - 0.994716, - 0.269175, - 0.0308035, - 0.245965, - 0.361878, - 1.35049, - 0.0509681, - 7.27203, - 0.696253, - 0.0374697 - ], - "train_epoch_time": 5.055885076522827, - "train_loss": 4.309854684498594, - "train_score": 0.19230182922553468, - "val_loss": 4.315080170790172, - "val_score": 0.18784084389185113 - }, - { - "epoch": 4, - "grad_norm": 0.5307763814926147, - "learning_rate": 1.0, - "model_norm": 87.4842529296875, - "step_logs": { - "grad_norm": { - "216": 7.264382839202881, - "217": 3.016519546508789, - "218": 2.009855031967163, - "219": 1.2515026330947876, - "220": 5.147071361541748, - "221": 1.1269272565841675, - "222": 2.9056191444396973, - "223": 1.3414016962051392, - "224": 6.588144302368164, - "225": 1.778350591659546, - "226": 4.2911272048950195, - "227": 7.035225868225098, - "228": 0.7696641087532043, - "229": 1.9932821989059448, - "230": 4.497763633728027, - "231": 1.2307993173599243, - "232": 1.7367925643920898, - "233": 1.1126596927642822, - "234": 1.053977370262146, - "235": 1.603644847869873, - "236": 1.641119122505188, - "237": 14.096592903137207, - "238": 1.7305207252502441, - "239": 8.768121719360352, - "240": 2.761319160461426, - "241": 2.863421678543091, - "242": 1.2003724575042725, - "243": 1.185773491859436, - "244": 2.565387725830078, - "245": 0.809199869632721, - "246": 0.7568778991699219, - "247": 1.031437635421753, - "248": 0.7673467397689819, - "249": 0.718783438205719, - "250": 0.8749423027038574, - "251": 0.8532248139381409, - "252": 0.6159232258796692, - "253": 0.6433970928192139, - "254": 0.7505868077278137, - "255": 0.8013109564781189, - "256": 0.6608864068984985, - "257": 0.6655183434486389, - "258": 0.7530298829078674, - "259": 0.8087952733039856, - "260": 0.7129753828048706, - "261": 0.5630432367324829, - "262": 0.3851890563964844, - "263": 0.43649014830589294, - "264": 0.5901908874511719, - "265": 0.6514227986335754, - "266": 0.8378380537033081, - "267": 0.8493033051490784, - "268": 0.6025286316871643, - "269": 0.5307763814926147 - }, - "loss": { - "216": 4.313517093658447, - "217": 2.972172498703003, - "218": 3.3032941818237305, - "219": 2.791508197784424, - "220": 4.311266899108887, - "221": 2.7992138862609863, - "222": 3.243692398071289, - "223": 2.982128620147705, - "224": 3.80674409866333, - "225": 2.820188283920288, - "226": 3.479891777038574, - "227": 3.8139331340789795, - "228": 2.721371650695801, - "229": 2.8763344287872314, - "230": 3.9594669342041016, - "231": 3.052639961242676, - "232": 3.0569255352020264, - "233": 3.190089225769043, - "234": 2.9966259002685547, - "235": 2.92966890335083, - "236": 3.07914400100708, - "237": 6.789307594299316, - "238": 2.9938905239105225, - "239": 5.213659286499023, - "240": 3.7490806579589844, - "241": 3.0940699577331543, - "242": 3.018930673599243, - "243": 2.842966318130493, - "244": 3.148573160171509, - "245": 2.912014961242676, - "246": 2.6648151874542236, - "247": 2.790278911590576, - "248": 2.7896454334259033, - "249": 2.690268039703369, - "250": 2.6885101795196533, - "251": 2.815004587173462, - "252": 2.627066135406494, - "253": 2.6681008338928223, - "254": 2.6348934173583984, - "255": 2.7513134479522705, - "256": 2.6638846397399902, - "257": 2.670868396759033, - "258": 2.6168901920318604, - "259": 2.721342086791992, - "260": 2.657644271850586, - "261": 2.6717302799224854, - "262": 2.572261333465576, - "263": 2.5654335021972656, - "264": 2.5559775829315186, - "265": 2.64631724357605, - "266": 2.626314640045166, - "267": 2.7487263679504395, - "268": 2.627315044403076, - "269": 2.599977970123291 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 0.0817399, - 0.326634, - 0.817745, - 1.78228, - 0.162736, - 2.20417, - 0.384204, - 1.65733, - 0.0877056, - 0.89175, - 0.188983, - 0.0770579, - 4.59394, - 0.723939, - 0.195724, - 2.01512, - 1.01342, - 2.57678, - 2.69755, - 1.13921, - 1.14327, - 0.0341662, - 0.999729, - 0.0678156, - 0.49169, - 0.377363, - 2.09518, - 2.02194, - 0.478418, - 4.44715, - 4.65174, - 2.62278, - 4.73768, - 5.20714, - 3.51199, - 3.8668, - 6.92497, - 6.44531, - 4.67693, - 4.28487, - 6.09904, - 6.03021, - 4.61489, - 4.16012, - 5.22815, - 8.4277, - 17.3367, - 13.4652, - 7.3379, - 6.23614, - 3.74133, - 3.81071, - 7.23697, - 9.22883 - ], - "train_epoch_time": 5.055017471313477, - "train_loss": 2.57431765382567, - "train_score": 0.2514156653130243, - "val_loss": 2.6006610806856036, - "val_score": 0.2447393443328505 - }, - { - "epoch": 5, - "grad_norm": 0.6712852716445923, - "learning_rate": 1.0, - "model_norm": 87.5792007446289, - "step_logs": { - "grad_norm": { - "270": 0.46761178970336914, - "271": 0.5040448307991028, - "272": 0.5838162899017334, - "273": 0.6276520490646362, - "274": 0.7859411835670471, - "275": 0.7103044986724854, - "276": 0.45779117941856384, - "277": 0.4440959692001343, - "278": 0.5940555334091187, - "279": 0.6490227580070496, - "280": 0.7053412199020386, - "281": 0.7040328979492188, - "282": 0.5998298525810242, - "283": 0.594127893447876, - "284": 0.7145086526870728, - "285": 0.6666063070297241, - "286": 0.5562670230865479, - "287": 0.554636538028717, - "288": 0.6019771099090576, - "289": 0.6950008869171143, - "290": 0.8517137169837952, - "291": 0.6693003177642822, - "292": 0.46996134519577026, - "293": 0.49752071499824524, - "294": 0.5653920769691467, - "295": 0.5723724365234375, - "296": 0.6215378046035767, - "297": 0.603495180606842, - "298": 0.5566955804824829, - "299": 0.5698720812797546, - "300": 0.6220775246620178, - "301": 0.6228402256965637, - "302": 0.608216404914856, - "303": 0.5949286222457886, - "304": 0.5897703170776367, - "305": 0.6326521635055542, - "306": 0.6502781510353088, - "307": 0.7034972310066223, - "308": 0.6854209899902344, - "309": 0.659753143787384, - "310": 0.5547028183937073, - "311": 0.5182896256446838, - "312": 0.5836197733879089, - "313": 0.8791462182998657, - "314": 0.6663444638252258, - "315": 0.6161668300628662, - "316": 0.5707175731658936, - "317": 0.6068826913833618, - "318": 0.6142266392707825, - "319": 0.6523970365524292, - "320": 0.6213333010673523, - "321": 0.6482954025268555, - "322": 0.6766254901885986, - "323": 0.6712852716445923 - }, - "loss": { - "270": 2.5571160316467285, - "271": 2.5527584552764893, - "272": 2.607752799987793, - "273": 2.605752944946289, - "274": 2.6232571601867676, - "275": 2.7005960941314697, - "276": 2.5935139656066895, - "277": 2.5643858909606934, - "278": 2.5594587326049805, - "279": 2.659778118133545, - "280": 2.5996789932250977, - "281": 2.6754727363586426, - "282": 2.5792224407196045, - "283": 2.593381404876709, - "284": 2.594367504119873, - "285": 2.665796995162964, - "286": 2.581049919128418, - "287": 2.571040153503418, - "288": 2.5634853839874268, - "289": 2.611403226852417, - "290": 2.6381478309631348, - "291": 2.7140612602233887, - "292": 2.5541610717773438, - "293": 2.572448492050171, - "294": 2.5328023433685303, - "295": 2.6092112064361572, - "296": 2.552314519882202, - "297": 2.6138644218444824, - "298": 2.54337739944458, - "299": 2.566132068634033, - "300": 2.5600030422210693, - "301": 2.6317367553710938, - "302": 2.566258192062378, - "303": 2.591238260269165, - "304": 2.559795379638672, - "305": 2.5886669158935547, - "306": 2.5549769401550293, - "307": 2.6214754581451416, - "308": 2.5994701385498047, - "309": 2.586261749267578, - "310": 2.5598111152648926, - "311": 2.5569286346435547, - "312": 2.5062479972839355, - "313": 2.570241928100586, - "314": 2.596400022506714, - "315": 2.593870162963867, - "316": 2.5686187744140625, - "317": 2.5699620246887207, - "318": 2.5624499320983887, - "319": 2.6041147708892822, - "320": 2.5424764156341553, - "321": 2.5638246536254883, - "322": 2.563735008239746, - "323": 2.561342239379883 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 11.6944, - 10.0478, - 7.65093, - 6.61447, - 4.24679, - 5.35267, - 12.3752, - 13.0026, - 7.25261, - 6.3143, - 5.22542, - 5.39777, - 7.16857, - 7.34694, - 5.08179, - 5.99913, - 8.34122, - 8.3578, - 7.07409, - 5.40634, - 3.63674, - 6.05867, - 11.5644, - 10.3926, - 7.92322, - 7.96438, - 6.60692, - 7.17688, - 8.20683, - 7.90177, - 6.61533, - 6.78405, - 6.9372, - 7.32112, - 7.35935, - 6.46765, - 6.04211, - 5.29689, - 5.53312, - 5.94168, - 8.31931, - 9.51861, - 7.35807, - 3.32546, - 5.84755, - 6.83206, - 7.88601, - 6.97778, - 6.79201, - 6.11838, - 6.58579, - 6.10018, - 5.59985, - 5.68399 - ], - "train_epoch_time": 5.055832147598267, - "train_loss": 2.584099040585577, - "train_score": 0.2575681492536619, - "val_loss": 2.619497704861769, - "val_score": 0.24536721476799586 - }, - { - "epoch": 6, - "grad_norm": 0.7342073917388916, - "learning_rate": 1.0, - "model_norm": 87.63153076171875, - "step_logs": { - "grad_norm": { - "324": 1.411607027053833, - "325": 5.936882019042969, - "326": 1.0874830484390259, - "327": 1.1495829820632935, - "328": 2.0652434825897217, - "329": 1.788154125213623, - "330": 2.5353336334228516, - "331": 9.002483367919922, - "332": 1.5860260725021362, - "333": 2.067275047302246, - "334": 2.326392412185669, - "335": 1.2262860536575317, - "336": 1.1714569330215454, - "337": 1.292888879776001, - "338": 1.0985143184661865, - "339": 2.9780819416046143, - "340": 1.0537818670272827, - "341": 0.7638192772865295, - "342": 1.1163246631622314, - "343": 1.7499282360076904, - "344": 0.8857640027999878, - "345": 0.9231863617897034, - "346": 0.7891698479652405, - "347": 0.8837443590164185, - "348": 0.7318992018699646, - "349": 0.5817071795463562, - "350": 0.6186779737472534, - "351": 0.5380474328994751, - "352": 0.4428447186946869, - "353": 0.47682973742485046, - "354": 0.5803439617156982, - "355": 0.5815485715866089, - "356": 0.5424709916114807, - "357": 0.5546571612358093, - "358": 0.5741904973983765, - "359": 0.5957809686660767, - "360": 0.5432384610176086, - "361": 0.5247166156768799, - "362": 0.5479821562767029, - "363": 0.5592751502990723, - "364": 0.624038577079773, - "365": 0.6352159976959229, - "366": 0.5675089955329895, - "367": 0.5796844959259033, - "368": 0.590277910232544, - "369": 0.5761415362358093, - "370": 0.5166272521018982, - "371": 0.5033401250839233, - "372": 0.5725637674331665, - "373": 0.6078308820724487, - "374": 0.4935343265533447, - "375": 0.48055365681648254, - "376": 0.5760100483894348, - "377": 0.7342073917388916 - }, - "loss": { - "324": 2.5620944499969482, - "325": 3.287554979324341, - "326": 2.700340509414673, - "327": 2.6189565658569336, - "328": 2.732553005218506, - "329": 2.6074466705322266, - "330": 3.005770444869995, - "331": 4.07434606552124, - "332": 2.9722516536712646, - "333": 2.9607715606689453, - "334": 3.0647597312927246, - "335": 2.727865219116211, - "336": 2.898146629333496, - "337": 2.8877696990966797, - "338": 2.7290725708007812, - "339": 2.994896650314331, - "340": 2.703648567199707, - "341": 2.6480765342712402, - "342": 2.7425317764282227, - "343": 2.867856502532959, - "344": 2.7015531063079834, - "345": 2.731201171875, - "346": 2.6815595626831055, - "347": 2.6596436500549316, - "348": 2.635072708129883, - "349": 2.5978190898895264, - "350": 2.6004061698913574, - "351": 2.606260299682617, - "352": 2.5429019927978516, - "353": 2.546093463897705, - "354": 2.5586318969726562, - "355": 2.5845251083374023, - "356": 2.5404248237609863, - "357": 2.563230276107788, - "358": 2.566281318664551, - "359": 2.5606184005737305, - "360": 2.521711826324463, - "361": 2.543833017349243, - "362": 2.5183985233306885, - "363": 2.567601442337036, - "364": 2.508497953414917, - "365": 2.5992743968963623, - "366": 2.5552685260772705, - "367": 2.550887107849121, - "368": 2.5539798736572266, - "369": 2.544661283493042, - "370": 2.515361785888672, - "371": 2.52848744392395, - "372": 2.5067756175994873, - "373": 2.5384414196014404, - "374": 2.516066551208496, - "375": 2.4882755279541016, - "376": 2.4791855812072754, - "377": 2.522308349609375 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 1.28578, - 0.0932731, - 2.28336, - 1.98174, - 0.640658, - 0.815465, - 0.467612, - 0.0502728, - 1.18158, - 0.692801, - 0.566279, - 1.81401, - 2.11187, - 1.72759, - 2.26154, - 0.337683, - 2.43472, - 4.53889, - 2.20075, - 0.93652, - 3.44332, - 3.20461, - 4.30573, - 3.40542, - 4.91915, - 7.67715, - 6.79379, - 9.00278, - 12.9666, - 11.1982, - 7.59691, - 7.64203, - 8.63283, - 8.33179, - 7.78381, - 7.21392, - 8.54505, - 9.2393, - 8.38671, - 8.20874, - 6.44156, - 6.44183, - 7.93398, - 7.59116, - 7.33001, - 7.66605, - 9.42423, - 9.98016, - 7.64659, - 6.87071, - 10.3297, - 10.7749, - 7.47221, - 4.67908 - ], - "train_epoch_time": 5.055371284484863, - "train_loss": 2.526646657106353, - "train_score": 0.2635155127927597, - "val_loss": 2.5588604218650493, - "val_score": 0.25524271691023415 - }, - { - "epoch": 7, - "grad_norm": 0.5210717916488647, - "learning_rate": 1.0, - "model_norm": 87.73303985595703, - "step_logs": { - "grad_norm": { - "378": 0.602575421333313, - "379": 0.5277130603790283, - "380": 0.5227085947990417, - "381": 0.71000075340271, - "382": 0.6231046915054321, - "383": 0.6421539783477783, - "384": 0.7804208397865295, - "385": 0.7286295294761658, - "386": 0.6196457147598267, - "387": 0.6605471968650818, - "388": 0.7016088366508484, - "389": 0.6097334623336792, - "390": 0.5113112926483154, - "391": 0.6405589580535889, - "392": 0.6165372729301453, - "393": 0.5911381840705872, - "394": 0.7189100384712219, - "395": 0.7505009770393372, - "396": 0.8339499831199646, - "397": 0.666320264339447, - "398": 0.7846480011940002, - "399": 0.6380552053451538, - "400": 0.5929601192474365, - "401": 0.6609094738960266, - "402": 0.9639849662780762, - "403": 0.6394780278205872, - "404": 0.605449914932251, - "405": 0.5324991345405579, - "406": 0.4752742648124695, - "407": 0.5447858572006226, - "408": 0.8335340619087219, - "409": 0.5592840909957886, - "410": 0.6268500089645386, - "411": 1.0854082107543945, - "412": 0.6443994641304016, - "413": 0.5713462829589844, - "414": 0.5971332788467407, - "415": 0.6650311946868896, - "416": 0.7565819025039673, - "417": 1.3656957149505615, - "418": 0.7001889944076538, - "419": 0.47380998730659485, - "420": 0.38597381114959717, - "421": 0.42630496621131897, - "422": 0.5677585005760193, - "423": 0.5772583484649658, - "424": 0.5121479630470276, - "425": 0.5330544710159302, - "426": 0.5912601351737976, - "427": 0.6170485615730286, - "428": 0.6126735806465149, - "429": 0.7320794463157654, - "430": 0.5360994935035706, - "431": 0.5210717916488647 - }, - "loss": { - "378": 2.5299153327941895, - "379": 2.493353843688965, - "380": 2.4669981002807617, - "381": 2.4794561862945557, - "382": 2.5080678462982178, - "383": 2.521314859390259, - "384": 2.4862966537475586, - "385": 2.526296377182007, - "386": 2.498164176940918, - "387": 2.475986957550049, - "388": 2.5053858757019043, - "389": 2.512781858444214, - "390": 2.433912992477417, - "391": 2.4429068565368652, - "392": 2.4752416610717773, - "393": 2.4649362564086914, - "394": 2.4418139457702637, - "395": 2.5195460319519043, - "396": 2.4962844848632812, - "397": 2.5044150352478027, - "398": 2.4830188751220703, - "399": 2.500295877456665, - "400": 2.445892095565796, - "401": 2.462839126586914, - "402": 2.490461826324463, - "403": 2.5121071338653564, - "404": 2.436614513397217, - "405": 2.4701952934265137, - "406": 2.406588315963745, - "407": 2.4216835498809814, - "408": 2.426663637161255, - "409": 2.5226311683654785, - "410": 2.4596550464630127, - "411": 2.5497500896453857, - "412": 2.552874803543091, - "413": 2.4740447998046875, - "414": 2.4519267082214355, - "415": 2.4554038047790527, - "416": 2.4858202934265137, - "417": 2.555680274963379, - "418": 2.535151958465576, - "419": 2.515368938446045, - "420": 2.437239408493042, - "421": 2.4427995681762695, - "422": 2.4465582370758057, - "423": 2.50795316696167, - "424": 2.450932502746582, - "425": 2.4523844718933105, - "426": 2.4315786361694336, - "427": 2.4662482738494873, - "428": 2.4301352500915527, - "429": 2.450061082839966, - "430": 2.446341037750244, - "431": 2.4056081771850586 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 6.9676, - 8.95341, - 9.02921, - 4.91857, - 6.45977, - 6.11433, - 4.08221, - 4.75851, - 6.5063, - 5.67467, - 5.08961, - 6.75888, - 9.30967, - 5.95372, - 6.51177, - 7.05387, - 4.72458, - 4.47322, - 3.58934, - 5.64079, - 4.03302, - 6.14151, - 6.95643, - 5.63835, - 2.68003, - 6.14309, - 6.64707, - 8.71151, - 10.654, - 8.15954, - 3.49271, - 8.06471, - 6.2596, - 2.16427, - 6.14779, - 7.57894, - 6.87646, - 5.55186, - 4.34268, - 1.37025, - 5.17099, - 11.2045, - 16.36, - 13.4415, - 7.58976, - 7.52626, - 9.34416, - 8.63069, - 6.95554, - 6.47736, - 6.47399, - 4.57152, - 8.51189, - 8.85992 - ], - "train_epoch_time": 5.055718898773193, - "train_loss": 2.400428195663299, - "train_score": 0.30167682931960227, - "val_loss": 2.4404160708154796, - "val_score": 0.2931660803464743 - }, - { - "epoch": 8, - "grad_norm": 0.7102885246276855, - "learning_rate": 1.0, - "model_norm": 87.84078979492188, - "step_logs": { - "grad_norm": { - "432": 0.5291587114334106, - "433": 0.5080680847167969, - "434": 0.5914666056632996, - "435": 0.6211421489715576, - "436": 0.6727566719055176, - "437": 0.5964329838752747, - "438": 0.5353344678878784, - "439": 0.5753464698791504, - "440": 0.6464072465896606, - "441": 0.6724745035171509, - "442": 0.6912034749984741, - "443": 0.647833526134491, - "444": 0.584959864616394, - "445": 0.5919318199157715, - "446": 0.6382439732551575, - "447": 0.6012136340141296, - "448": 0.5454508066177368, - "449": 0.6023106575012207, - "450": 0.6230897903442383, - "451": 0.5920654535293579, - "452": 0.7590808868408203, - "453": 0.6764283180236816, - "454": 0.5318419337272644, - "455": 0.5110647678375244, - "456": 0.6268112063407898, - "457": 0.6288836598396301, - "458": 0.6159000396728516, - "459": 0.6438373923301697, - "460": 0.6103591322898865, - "461": 0.5652045607566833, - "462": 0.5193692445755005, - "463": 0.547308087348938, - "464": 0.5841281414031982, - "465": 0.5704706907272339, - "466": 0.4866780936717987, - "467": 0.5057941675186157, - "468": 0.5549024939537048, - "469": 0.5555694699287415, - "470": 0.5647054314613342, - "471": 0.5819074511528015, - "472": 0.6872416734695435, - "473": 0.5997867584228516, - "474": 0.6576035022735596, - "475": 0.5740867853164673, - "476": 0.5741524696350098, - "477": 0.57319176197052, - "478": 0.6367378234863281, - "479": 0.5653982758522034, - "480": 0.4593934118747711, - "481": 0.48867759108543396, - "482": 0.6057224273681641, - "483": 0.7308072447776794, - "484": 0.7546696066856384, - "485": 0.7102885246276855 - }, - "loss": { - "432": 2.4098615646362305, - "433": 2.399606704711914, - "434": 2.382483959197998, - "435": 2.4312515258789062, - "436": 2.409421920776367, - "437": 2.437469005584717, - "438": 2.389492988586426, - "439": 2.3906571865081787, - "440": 2.413485050201416, - "441": 2.4353957176208496, - "442": 2.39896821975708, - "443": 2.414506673812866, - "444": 2.385709762573242, - "445": 2.4022057056427, - "446": 2.370846748352051, - "447": 2.3770575523376465, - "448": 2.3413352966308594, - "449": 2.3526244163513184, - "450": 2.388529062271118, - "451": 2.365694046020508, - "452": 2.359882354736328, - "453": 2.4510598182678223, - "454": 2.3693623542785645, - "455": 2.33272123336792, - "456": 2.341805934906006, - "457": 2.4156875610351562, - "458": 2.353476047515869, - "459": 2.398212432861328, - "460": 2.3470890522003174, - "461": 2.366288185119629, - "462": 2.3093433380126953, - "463": 2.330211639404297, - "464": 2.325465679168701, - "465": 2.370218276977539, - "466": 2.323535680770874, - "467": 2.317265272140503, - "468": 2.3108184337615967, - "469": 2.328054904937744, - "470": 2.3119654655456543, - "471": 2.324441909790039, - "472": 2.332365036010742, - "473": 2.38411021232605, - "474": 2.2995452880859375, - "475": 2.3521382808685303, - "476": 2.3074820041656494, - "477": 2.3330323696136475, - "478": 2.3407046794891357, - "479": 2.3514766693115234, - "480": 2.3115811347961426, - "481": 2.2945120334625244, - "482": 2.3046698570251465, - "483": 2.334651470184326, - "484": 2.3936595916748047, - "485": 2.395461082458496 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 8.60637, - 9.296, - 6.81035, - 6.30156, - 5.32349, - 6.85198, - 8.33787, - 7.22201, - 5.77607, - 5.3854, - 5.02126, - 5.7531, - 6.97213, - 6.85594, - 5.82009, - 6.57631, - 7.8696, - 6.48502, - 6.15218, - 6.74868, - 4.09557, - 5.35686, - 8.37657, - 8.93122, - 5.96043, - 6.10802, - 6.20425, - 5.78543, - 6.30026, - 7.40724, - 8.56123, - 7.77914, - 6.81544, - 7.28319, - 9.80993, - 9.05791, - 7.50468, - 7.54252, - 7.24999, - 6.86453, - 4.9383, - 6.62724, - 5.31758, - 7.13687, - 6.99977, - 7.10102, - 5.77332, - 7.35583, - 10.9532, - 9.60828, - 6.28147, - 4.37136, - 4.2029, - 4.7481 - ], - "train_epoch_time": 5.055600881576538, - "train_loss": 2.364268625278555, - "train_score": 0.2852156564443662, - "val_loss": 2.4210136120135965, - "val_score": 0.2710022609488425 - }, - { - "epoch": 9, - "grad_norm": 0.6309716701507568, - "learning_rate": 1.0, - "model_norm": 87.9674072265625, - "step_logs": { - "grad_norm": { - "486": 0.6555119156837463, - "487": 0.6065738201141357, - "488": 0.6766089797019958, - "489": 0.6968892216682434, - "490": 0.8175331354141235, - "491": 0.6847348809242249, - "492": 0.6094216108322144, - "493": 0.570482611656189, - "494": 0.6077772378921509, - "495": 0.6636857986450195, - "496": 0.6628976464271545, - "497": 0.6685439348220825, - "498": 0.6621301770210266, - "499": 0.6826894879341125, - "500": 0.7284846305847168, - "501": 0.6596152186393738, - "502": 0.555325984954834, - "503": 0.49520543217658997, - "504": 0.5145333409309387, - "505": 0.6164749264717102, - "506": 0.6721234917640686, - "507": 0.7313554883003235, - "508": 0.7766849398612976, - "509": 0.6086581349372864, - "510": 0.5633295774459839, - "511": 0.6057594418525696, - "512": 0.6070159077644348, - "513": 0.5995933413505554, - "514": 0.5777207612991333, - "515": 0.5417271256446838, - "516": 0.6076132655143738, - "517": 0.6441335678100586, - "518": 0.6251640319824219, - "519": 0.6875194311141968, - "520": 0.6550372838973999, - "521": 0.623849093914032, - "522": 0.5926386713981628, - "523": 0.5614331364631653, - "524": 0.5475859642028809, - "525": 0.5731501579284668, - "526": 0.5876085162162781, - "527": 0.6310720443725586, - "528": 0.5888630151748657, - "529": 0.5407215356826782, - "530": 0.5888542532920837, - "531": 0.6244917511940002, - "532": 0.6489545106887817, - "533": 0.5564259886741638, - "534": 0.48338624835014343, - "535": 0.5624719858169556, - "536": 0.6948518753051758, - "537": 0.8001134991645813, - "538": 0.6677476167678833, - "539": 0.6309716701507568 - }, - "loss": { - "486": 2.3659470081329346, - "487": 2.3216490745544434, - "488": 2.331204891204834, - "489": 2.3603601455688477, - "490": 2.3416693210601807, - "491": 2.3846657276153564, - "492": 2.3112521171569824, - "493": 2.2980527877807617, - "494": 2.3205103874206543, - "495": 2.332919120788574, - "496": 2.3330044746398926, - "497": 2.319004535675049, - "498": 2.35805344581604, - "499": 2.2802436351776123, - "500": 2.3656234741210938, - "501": 2.336944103240967, - "502": 2.3160767555236816, - "503": 2.265007972717285, - "504": 2.291518449783325, - "505": 2.2510130405426025, - "506": 2.3210577964782715, - "507": 2.3007216453552246, - "508": 2.342376232147217, - "509": 2.3094828128814697, - "510": 2.2949156761169434, - "511": 2.2486014366149902, - "512": 2.295055866241455, - "513": 2.3104748725891113, - "514": 2.2957658767700195, - "515": 2.2595434188842773, - "516": 2.256690740585327, - "517": 2.2956085205078125, - "518": 2.2952845096588135, - "519": 2.2891552448272705, - "520": 2.328775405883789, - "521": 2.2614190578460693, - "522": 2.2824063301086426, - "523": 2.276197671890259, - "524": 2.25631046295166, - "525": 2.263183355331421, - "526": 2.276244640350342, - "527": 2.2238001823425293, - "528": 2.2641139030456543, - "529": 2.2153940200805664, - "530": 2.27239727973938, - "531": 2.252530097961426, - "532": 2.274031639099121, - "533": 2.2768046855926514, - "534": 2.203139305114746, - "535": 2.199826955795288, - "536": 2.263258934020996, - "537": 2.2897324562072754, - "538": 2.2780838012695312, - "539": 2.2499399185180664 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 5.5061, - 6.31, - 5.09219, - 4.86016, - 3.5036, - 5.08607, - 6.22317, - 7.06115, - 6.28195, - 5.29633, - 5.30912, - 5.1885, - 5.37857, - 4.89254, - 4.45764, - 5.37115, - 7.51029, - 9.23632, - 8.65558, - 5.92307, - 5.13793, - 4.30137, - 3.883, - 6.23402, - 7.23172, - 6.12791, - 6.22864, - 6.42669, - 6.87847, - 7.69945, - 6.11248, - 5.53281, - 5.87285, - 4.8429, - 5.42745, - 5.81061, - 6.4985, - 7.22128, - 7.52479, - 6.88942, - 6.59239, - 5.5839, - 6.52935, - 7.57711, - 6.55343, - 5.77587, - 5.39968, - 7.35379, - 9.42873, - 6.95323, - 4.68759, - 3.57669, - 5.10911, - 5.65134 - ], - "train_epoch_time": 5.059556007385254, - "train_loss": 2.248896524040737, - "train_score": 0.3355664903583964, - "val_loss": 2.320340652279684, - "val_score": 0.31700721140159666 - }, - { - "epoch": 10, - "grad_norm": 0.6014808416366577, - "learning_rate": 1.0, - "model_norm": 88.1033706665039, - "step_logs": { - "grad_norm": { - "540": 0.556962788105011, - "541": 0.4922509789466858, - "542": 0.46269673109054565, - "543": 0.4437045753002167, - "544": 0.46495282649993896, - "545": 0.5269874930381775, - "546": 0.6012157797813416, - "547": 0.6110914945602417, - "548": 0.5938258767127991, - "549": 0.5913670659065247, - "550": 0.5411767363548279, - "551": 0.48985549807548523, - "552": 0.49795278906822205, - "553": 0.5598718523979187, - "554": 0.7001277804374695, - "555": 0.6461341381072998, - "556": 0.5456030368804932, - "557": 0.5796665549278259, - "558": 0.6592769622802734, - "559": 0.70893394947052, - "560": 0.631971538066864, - "561": 0.5096566677093506, - "562": 0.5030943155288696, - "563": 0.5146854519844055, - "564": 0.5239552855491638, - "565": 0.5368452072143555, - "566": 0.5880934000015259, - "567": 0.6512694954872131, - "568": 0.6467437148094177, - "569": 0.601622998714447, - "570": 0.6149700880050659, - "571": 0.594676673412323, - "572": 0.5724245309829712, - "573": 0.5899494290351868, - "574": 0.599740207195282, - "575": 0.604749858379364, - "576": 0.6063494086265564, - "577": 0.6455590128898621, - "578": 0.7162782549858093, - "579": 0.7747810482978821, - "580": 0.6035782098770142, - "581": 0.6915206909179688, - "582": 0.6963188648223877, - "583": 0.7954487800598145, - "584": 0.6717667579650879, - "585": 0.5934470295906067, - "586": 0.5472848415374756, - "587": 0.6223129034042358, - "588": 0.6549533009529114, - "589": 0.7186363935470581, - "590": 0.715755045413971, - "591": 0.6313241720199585, - "592": 0.5664232969284058, - "593": 0.6014808416366577 - }, - "loss": { - "540": 2.2582836151123047, - "541": 2.225924253463745, - "542": 2.2293286323547363, - "543": 2.2244949340820312, - "544": 2.1676442623138428, - "545": 2.203770160675049, - "546": 2.2265186309814453, - "547": 2.2523059844970703, - "548": 2.2320499420166016, - "549": 2.230039119720459, - "550": 2.237853527069092, - "551": 2.2208521366119385, - "552": 2.1823201179504395, - "553": 2.215790271759033, - "554": 2.227344512939453, - "555": 2.301483631134033, - "556": 2.2014963626861572, - "557": 2.2087697982788086, - "558": 2.1855506896972656, - "559": 2.275979995727539, - "560": 2.204580545425415, - "561": 2.2193448543548584, - "562": 2.1965672969818115, - "563": 2.1859724521636963, - "564": 2.2182860374450684, - "565": 2.1638054847717285, - "566": 2.1911654472351074, - "567": 2.2026207447052, - "568": 2.2449052333831787, - "569": 2.236891269683838, - "570": 2.218660593032837, - "571": 2.238295555114746, - "572": 2.221579074859619, - "573": 2.230358600616455, - "574": 2.1804862022399902, - "575": 2.1922364234924316, - "576": 2.183056354522705, - "577": 2.214650869369507, - "578": 2.2242043018341064, - "579": 2.274226665496826, - "580": 2.2473220825195312, - "581": 2.1946420669555664, - "582": 2.2512881755828857, - "583": 2.2347519397735596, - "584": 2.2603650093078613, - "585": 2.196855068206787, - "586": 2.164666175842285, - "587": 2.149979829788208, - "588": 2.2092437744140625, - "589": 2.21726655960083, - "590": 2.24027681350708, - "591": 2.221879720687866, - "592": 2.1838583946228027, - "593": 2.1815268993377686 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 7.27991, - 9.18623, - 10.4131, - 11.2991, - 10.027, - 7.93534, - 6.15979, - 6.03136, - 6.32974, - 6.37673, - 7.64106, - 9.25515, - 8.8012, - 7.06889, - 4.54394, - 5.51268, - 7.39545, - 6.57346, - 5.02835, - 4.52853, - 5.5199, - 8.54416, - 8.67852, - 8.25203, - 8.08033, - 7.50793, - 6.33552, - 5.193, - 5.36702, - 6.18011, - 5.86655, - 6.3293, - 6.77993, - 6.40833, - 6.06215, - 5.99426, - 5.93771, - 5.31414, - 4.33522, - 3.78857, - 6.16876, - 4.58937, - 4.64317, - 3.53187, - 5.00889, - 6.23789, - 7.2271, - 5.55158, - 5.15019, - 4.29338, - 4.37293, - 5.57463, - 6.80679, - 6.02999 - ], - "train_epoch_time": 5.066505193710327, - "train_loss": 2.1937105183621903, - "train_score": 0.3679463324451036, - "val_loss": 2.2935922800057518, - "val_score": 0.339991748606707 - }, - { - "epoch": 11, - "grad_norm": 0.4858298897743225, - "learning_rate": 1.0, - "model_norm": 88.24640655517578, - "step_logs": { - "grad_norm": { - "594": 0.5973585844039917, - "595": 0.5813920497894287, - "596": 0.5318682789802551, - "597": 0.5529276728630066, - "598": 0.6715949773788452, - "599": 0.7248722910881042, - "600": 0.7247588038444519, - "601": 0.7221391201019287, - "602": 0.6781218647956848, - "603": 0.7130929231643677, - "604": 0.6518683433532715, - "605": 0.6411283612251282, - "606": 0.6091272234916687, - "607": 0.563484251499176, - "608": 0.5655494928359985, - "609": 0.5641517639160156, - "610": 0.5741521716117859, - "611": 0.5244921445846558, - "612": 0.48222866654396057, - "613": 0.551892876625061, - "614": 0.6759764552116394, - "615": 0.7844778299331665, - "616": 0.79837566614151, - "617": 0.7085427045822144, - "618": 0.6418455839157104, - "619": 0.6136366128921509, - "620": 0.5651835203170776, - "621": 0.5280206799507141, - "622": 0.5549091696739197, - "623": 0.5597411394119263, - "624": 0.5573664307594299, - "625": 0.5344337224960327, - "626": 0.5359423160552979, - "627": 0.5972744822502136, - "628": 0.620598554611206, - "629": 0.6067497730255127, - "630": 0.5799751281738281, - "631": 0.575186014175415, - "632": 0.6336397528648376, - "633": 0.6197266578674316, - "634": 0.6179752945899963, - "635": 0.5942938327789307, - "636": 0.5791890025138855, - "637": 0.5822694301605225, - "638": 0.5426585674285889, - "639": 0.5100070238113403, - "640": 0.5412781834602356, - "641": 0.6127952337265015, - "642": 0.6500569581985474, - "643": 0.6201475858688354, - "644": 0.6241342425346375, - "645": 0.6081094741821289, - "646": 0.5275270342826843, - "647": 0.4858298897743225 - }, - "loss": { - "594": 2.1896016597747803, - "595": 2.2009925842285156, - "596": 2.1809935569763184, - "597": 2.1452231407165527, - "598": 2.1617276668548584, - "599": 2.1946449279785156, - "600": 2.18570613861084, - "601": 2.242854118347168, - "602": 2.205009937286377, - "603": 2.2188169956207275, - "604": 2.1857519149780273, - "605": 2.2027411460876465, - "606": 2.1736578941345215, - "607": 2.1626667976379395, - "608": 2.135918140411377, - "609": 2.185819625854492, - "610": 2.157137870788574, - "611": 2.133901596069336, - "612": 2.141611099243164, - "613": 2.1238460540771484, - "614": 2.1684212684631348, - "615": 2.210869312286377, - "616": 2.243278980255127, - "617": 2.2225825786590576, - "618": 2.2220232486724854, - "619": 2.195187568664551, - "620": 2.154479503631592, - "621": 2.15898060798645, - "622": 2.128307819366455, - "623": 2.1356043815612793, - "624": 2.141587972640991, - "625": 2.1088027954101562, - "626": 2.1308999061584473, - "627": 2.163480520248413, - "628": 2.1570582389831543, - "629": 2.133984088897705, - "630": 2.1156363487243652, - "631": 2.1106784343719482, - "632": 2.157172441482544, - "633": 2.1684060096740723, - "634": 2.120621919631958, - "635": 2.1279208660125732, - "636": 2.1433229446411133, - "637": 2.151803493499756, - "638": 2.140707492828369, - "639": 2.0865628719329834, - "640": 2.107567071914673, - "641": 2.122465133666992, - "642": 2.1497349739074707, - "643": 2.1593289375305176, - "644": 2.1215906143188477, - "645": 2.1376967430114746, - "646": 2.1326732635498047, - "647": 2.087991237640381 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 6.13614, - 6.51149, - 7.70986, - 7.01675, - 4.79276, - 4.17678, - 4.16107, - 4.3009, - 4.79507, - 4.36344, - 5.14376, - 5.35887, - 5.85835, - 6.81124, - 6.67795, - 6.86788, - 6.54371, - 7.75705, - 9.20947, - 6.9729, - 4.74548, - 3.59254, - 3.5194, - 4.42717, - 5.39371, - 5.82974, - 6.74471, - 7.74367, - 6.91179, - 6.81626, - 6.89373, - 7.38326, - 7.41869, - 6.06464, - 5.60067, - 5.79658, - 6.28959, - 6.37978, - 5.37279, - 5.64599, - 5.55292, - 6.02494, - 6.38921, - 6.3468, - 7.26949, - 8.02194, - 7.1935, - 5.6521, - 5.08724, - 5.61473, - 5.44635, - 5.78073, - 7.66364, - 8.84627 - ], - "train_epoch_time": 5.060525178909302, - "train_loss": 2.10171169666854, - "train_score": 0.3767216642925695, - "val_loss": 2.2307200163568615, - "val_score": 0.3445931402399674 - }, - { - "epoch": 12, - "grad_norm": 0.5706313848495483, - "learning_rate": 1.0, - "model_norm": 88.36199951171875, - "step_logs": { - "grad_norm": { - "648": 0.5740098357200623, - "649": 0.6466655731201172, - "650": 0.6953812837600708, - "651": 0.6530663371086121, - "652": 0.6339909434318542, - "653": 0.6604085564613342, - "654": 0.6708871126174927, - "655": 0.6575053930282593, - "656": 0.5776991844177246, - "657": 0.5261021256446838, - "658": 0.5378570556640625, - "659": 0.4962868094444275, - "660": 0.498409241437912, - "661": 0.5092989206314087, - "662": 0.5121369361877441, - "663": 0.5202082395553589, - "664": 0.5542728900909424, - "665": 0.506466269493103, - "666": 0.4806251525878906, - "667": 0.4512490928173065, - "668": 0.47063738107681274, - "669": 0.4600653052330017, - "670": 0.4330039322376251, - "671": 0.4742850363254547, - "672": 0.5568130612373352, - "673": 0.6384620666503906, - "674": 0.6680463552474976, - "675": 0.6430903673171997, - "676": 0.6576550006866455, - "677": 0.6591098308563232, - "678": 0.5544489622116089, - "679": 0.4980860948562622, - "680": 0.4885252118110657, - "681": 0.5175431370735168, - "682": 0.579421877861023, - "683": 0.5397631525993347, - "684": 0.4855535924434662, - "685": 0.4905700385570526, - "686": 0.45717519521713257, - "687": 0.4416581392288208, - "688": 0.45960694551467896, - "689": 0.5193267464637756, - "690": 0.5908728241920471, - "691": 0.5973421931266785, - "692": 0.5113508105278015, - "693": 0.4968108534812927, - "694": 0.481393039226532, - "695": 0.448138028383255, - "696": 0.42587414383888245, - "697": 0.4288692772388458, - "698": 0.460195928812027, - "699": 0.5171209573745728, - "700": 0.5499134659767151, - "701": 0.5706313848495483 - }, - "loss": { - "648": 2.0968985557556152, - "649": 2.1383843421936035, - "650": 2.1135270595550537, - "651": 2.143108606338501, - "652": 2.138307571411133, - "653": 2.1188862323760986, - "654": 2.140519618988037, - "655": 2.1357192993164062, - "656": 2.1380577087402344, - "657": 2.0793752670288086, - "658": 2.0889854431152344, - "659": 2.083698034286499, - "660": 2.0854196548461914, - "661": 2.058002471923828, - "662": 2.1096043586730957, - "663": 2.110630750656128, - "664": 2.0826213359832764, - "665": 2.102409839630127, - "666": 2.0383758544921875, - "667": 2.021230697631836, - "668": 2.061807155609131, - "669": 2.068986415863037, - "670": 2.0378987789154053, - "671": 2.0598032474517822, - "672": 2.074517250061035, - "673": 2.0568490028381348, - "674": 2.091447353363037, - "675": 2.0944764614105225, - "676": 2.0467898845672607, - "677": 2.083927631378174, - "678": 2.04795241355896, - "679": 2.0639595985412598, - "680": 2.0506954193115234, - "681": 2.056553602218628, - "682": 2.0537021160125732, - "683": 2.0639634132385254, - "684": 2.0526318550109863, - "685": 2.036045789718628, - "686": 2.060379981994629, - "687": 2.0722928047180176, - "688": 2.0301663875579834, - "689": 2.0304300785064697, - "690": 2.042562961578369, - "691": 2.057929277420044, - "692": 2.016390323638916, - "693": 1.987950086593628, - "694": 2.023526191711426, - "695": 2.035890579223633, - "696": 1.9924757480621338, - "697": 2.03464937210083, - "698": 2.0130343437194824, - "699": 2.040757179260254, - "700": 2.0372366905212402, - "701": 2.0226988792419434 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 6.36413, - 5.11359, - 4.37081, - 5.02492, - 5.31991, - 4.85828, - 4.75576, - 4.94021, - 6.40643, - 7.51264, - 7.22107, - 8.45998, - 8.39501, - 7.93415, - 8.0432, - 7.79934, - 6.77896, - 8.19627, - 8.82412, - 9.9262, - 9.3084, - 9.77504, - 10.8692, - 9.15687, - 6.69111, - 5.04583, - 4.68634, - 5.06444, - 4.73235, - 4.79697, - 6.66188, - 8.31941, - 8.59265, - 7.67798, - 6.11713, - 7.08428, - 8.70636, - 8.46029, - 9.85785, - 10.6238, - 9.61078, - 7.52847, - 5.85043, - 5.76745, - 7.71146, - 8.05422, - 8.73191, - 10.1375, - 10.9858, - 11.0622, - 9.5053, - 7.63145, - 6.73679, - 6.21184 - ], - "train_epoch_time": 5.060104608535767, - "train_loss": 2.0253352661214907, - "train_score": 0.3991896073223016, - "val_loss": 2.165483419164314, - "val_score": 0.36478365415409975 - }, - { - "epoch": 13, - "grad_norm": 0.317649245262146, - "learning_rate": 0.6666666666666667, - "model_norm": 88.42914581298828, - "step_logs": { - "grad_norm": { - "702": 0.5659703612327576, - "703": 0.5189346671104431, - "704": 0.47177356481552124, - "705": 0.417879581451416, - "706": 0.40076515078544617, - "707": 0.4317120611667633, - "708": 0.4694533050060272, - "709": 0.4872726798057556, - "710": 0.48583105206489563, - "711": 0.45588475465774536, - "712": 0.4326063096523285, - "713": 0.4227367639541626, - "714": 0.3997606933116913, - "715": 0.39034223556518555, - "716": 0.3973705470561981, - "717": 0.38979142904281616, - "718": 0.3650878071784973, - "719": 0.3651696443557739, - "720": 0.3692726492881775, - "721": 0.3553318977355957, - "722": 0.40644368529319763, - "723": 0.45729878544807434, - "724": 0.4858042895793915, - "725": 0.4603227376937866, - "726": 0.40636059641838074, - "727": 0.35133248567581177, - "728": 0.3609521985054016, - "729": 0.3591592311859131, - "730": 0.34919023513793945, - "731": 0.3697125017642975, - "732": 0.3958294093608856, - "733": 0.4055362045764923, - "734": 0.39539051055908203, - "735": 0.388135701417923, - "736": 0.3824511468410492, - "737": 0.4091372489929199, - "738": 0.43867823481559753, - "739": 0.4322429895401001, - "740": 0.4089679718017578, - "741": 0.39429357647895813, - "742": 0.384443461894989, - "743": 0.3956994116306305, - "744": 0.38095951080322266, - "745": 0.31946438550949097, - "746": 0.34223631024360657, - "747": 0.3230850100517273, - "748": 0.31484317779541016, - "749": 0.30294936895370483, - "750": 0.3151356279850006, - "751": 0.3215409815311432, - "752": 0.32952404022216797, - "753": 0.29319998621940613, - "754": 0.31509700417518616, - "755": 0.317649245262146 - }, - "loss": { - "702": 2.017223358154297, - "703": 2.0105414390563965, - "704": 1.9751551151275635, - "705": 1.9659652709960938, - "706": 1.9936549663543701, - "707": 1.993722677230835, - "708": 2.018618583679199, - "709": 1.9937139749526978, - "710": 2.004532814025879, - "711": 2.0115342140197754, - "712": 2.001229763031006, - "713": 1.9961116313934326, - "714": 1.973388910293579, - "715": 1.9947682619094849, - "716": 1.9720706939697266, - "717": 1.9973225593566895, - "718": 1.9759191274642944, - "719": 1.9842751026153564, - "720": 1.9865155220031738, - "721": 1.975325584411621, - "722": 1.9642441272735596, - "723": 1.950222373008728, - "724": 1.9741594791412354, - "725": 1.9610042572021484, - "726": 1.9816988706588745, - "727": 1.9468576908111572, - "728": 1.9696624279022217, - "729": 1.975189447402954, - "730": 1.9782061576843262, - "731": 1.9392387866973877, - "732": 1.9939148426055908, - "733": 1.957559585571289, - "734": 1.9628493785858154, - "735": 1.952294111251831, - "736": 1.9606735706329346, - "737": 1.9367647171020508, - "738": 1.9504387378692627, - "739": 1.9885499477386475, - "740": 1.9830780029296875, - "741": 1.9423038959503174, - "742": 1.9734399318695068, - "743": 1.9925496578216553, - "744": 1.9633898735046387, - "745": 1.9748756885528564, - "746": 1.9599859714508057, - "747": 1.9530096054077148, - "748": 1.9434850215911865, - "749": 1.9006482362747192, - "750": 1.944066047668457, - "751": 1.9230057001113892, - "752": 1.912054419517517, - "753": 1.9313993453979492, - "754": 1.915595293045044, - "755": 1.9286563396453857 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 6.29748, - 7.46599, - 8.8743, - 11.2583, - 12.4128, - 10.6974, - 9.15945, - 8.39689, - 8.49264, - 9.67871, - 10.6933, - 11.1698, - 12.3485, - 13.0919, - 12.4891, - 13.1457, - 14.8243, - 14.8803, - 14.5679, - 15.6448, - 11.8904, - 9.32576, - 8.36488, - 9.25452, - 12.0009, - 15.7724, - 15.1179, - 15.3121, - 16.2236, - 14.1874, - 12.726, - 11.903, - 12.5555, - 12.9592, - 13.4046, - 11.5701, - 10.1354, - 10.6434, - 11.8566, - 12.4933, - 13.3524, - 12.7256, - 13.5285, - 19.3506, - 16.734, - 18.7099, - 19.6062, - 20.7091, - 19.5756, - 18.5998, - 17.6086, - 22.467, - 19.2937, - 19.1143 - ], - "train_epoch_time": 5.057008504867554, - "train_loss": 1.932453947806119, - "train_score": 0.4267115764330586, - "val_loss": 2.091862884646305, - "val_score": 0.3893692590954931 - }, - { - "epoch": 14, - "grad_norm": 0.2712746560573578, - "learning_rate": 0.33333333333333337, - "model_norm": 88.4502944946289, - "step_logs": { - "grad_norm": { - "756": 0.3041907250881195, - "757": 0.28176945447921753, - "758": 0.2795979380607605, - "759": 0.30784180760383606, - "760": 0.31386154890060425, - "761": 0.30351075530052185, - "762": 0.29732781648635864, - "763": 0.3302968442440033, - "764": 0.3865208923816681, - "765": 0.3399239480495453, - "766": 0.3118269145488739, - "767": 0.2835691273212433, - "768": 0.29692161083221436, - "769": 0.2873125970363617, - "770": 0.3080272078514099, - "771": 0.304837167263031, - "772": 0.2905215620994568, - "773": 0.2977312207221985, - "774": 0.28075090050697327, - "775": 0.28373321890830994, - "776": 0.29116812348365784, - "777": 0.28700709342956543, - "778": 0.27636873722076416, - "779": 0.294653058052063, - "780": 0.282988578081131, - "781": 0.28469496965408325, - "782": 0.2952168583869934, - "783": 0.3174051344394684, - "784": 0.2970033586025238, - "785": 0.28664711117744446, - "786": 0.2819790542125702, - "787": 0.2900996208190918, - "788": 0.2898542582988739, - "789": 0.28095611929893494, - "790": 0.2739872634410858, - "791": 0.295239120721817, - "792": 0.2741515338420868, - "793": 0.26222220063209534, - "794": 0.28728288412094116, - "795": 0.2734343707561493, - "796": 0.27362772822380066, - "797": 0.2574091851711273, - "798": 0.2741304636001587, - "799": 0.2779829204082489, - "800": 0.26481491327285767, - "801": 0.2693365812301636, - "802": 0.28728002309799194, - "803": 0.2559102475643158, - "804": 0.26661020517349243, - "805": 0.28670406341552734, - "806": 0.2667236030101776, - "807": 0.27433377504348755, - "808": 0.2881845235824585, - "809": 0.2712746560573578 - }, - "loss": { - "756": 1.9308396577835083, - "757": 1.9228020906448364, - "758": 1.9220657348632812, - "759": 1.915724515914917, - "760": 1.9114307165145874, - "761": 1.9362471103668213, - "762": 1.9335390329360962, - "763": 1.9228084087371826, - "764": 1.9295916557312012, - "765": 1.9304677248001099, - "766": 1.9512678384780884, - "767": 1.9297728538513184, - "768": 1.9571216106414795, - "769": 1.8947694301605225, - "770": 1.9244377613067627, - "771": 1.8902053833007812, - "772": 1.9376835823059082, - "773": 1.9160189628601074, - "774": 1.947719931602478, - "775": 1.9249310493469238, - "776": 1.922447681427002, - "777": 1.9170377254486084, - "778": 1.9406498670578003, - "779": 1.930999994277954, - "780": 1.9417288303375244, - "781": 1.908583402633667, - "782": 1.8922275304794312, - "783": 1.9377039670944214, - "784": 1.924414873123169, - "785": 1.9473352432250977, - "786": 1.886911153793335, - "787": 1.934464454650879, - "788": 1.9261902570724487, - "789": 1.9330294132232666, - "790": 1.911017894744873, - "791": 1.9252042770385742, - "792": 1.925364375114441, - "793": 1.9318610429763794, - "794": 1.9344477653503418, - "795": 1.921375036239624, - "796": 1.9071916341781616, - "797": 1.9250377416610718, - "798": 1.924267053604126, - "799": 1.9045524597167969, - "800": 1.9065269231796265, - "801": 1.9118849039077759, - "802": 1.9054911136627197, - "803": 1.8885326385498047, - "804": 1.8974063396453857, - "805": 1.9161100387573242, - "806": 1.9150605201721191, - "807": 1.8886849880218506, - "808": 1.9351091384887695, - "809": 1.9303133487701416 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 20.8667, - 24.2185, - 24.5867, - 20.2152, - 19.4036, - 21.019, - 21.8717, - 17.6249, - 12.9157, - 16.707, - 20.0673, - 23.9987, - 22.199, - 22.9534, - 20.2827, - 20.341, - 22.9576, - 21.6148, - 24.7107, - 23.9108, - 22.676, - 23.2726, - 25.4079, - 22.2413, - 24.2466, - 23.5479, - 21.7116, - 19.2336, - 21.816, - 23.6998, - 23.7311, - 22.9862, - 22.9266, - 24.4885, - 25.4568, - 22.0866, - 25.6172, - 28.0955, - 23.4389, - 25.6984, - 25.4726, - 29.053, - 25.6065, - 24.6466, - 27.1868, - 26.3555, - 23.0885, - 28.8369, - 26.6936, - 23.3106, - 26.919, - 25.0958, - 23.3004, - 26.2307 - ], - "train_epoch_time": 5.059141159057617, - "train_loss": 1.9118006241372874, - "train_score": 0.4325513360832138, - "val_loss": 2.0735781036753607, - "val_score": 0.39506494040877893 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:34:11.713210", - "final_model_norm": 88.4502944946289, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:32:26.669694", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 9.14046573638916, - "learning_rate": 1e-10, - "model_norm": 87.33110809326172, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.540498733520508, - "3": 7.735904216766357, - "4": 10.549470901489258, - "5": 5.151391506195068, - "6": 3.74910306930542, - "7": 5.637049674987793, - "8": 14.351365089416504, - "9": 5.641451835632324, - "10": 14.016672134399414, - "11": 6.264270782470703, - "12": 11.634200096130371, - "13": 8.58130931854248, - "14": 13.615200996398926, - "15": 5.19003438949585, - "16": 12.74057388305664, - "17": 14.252779960632324, - "18": 4.026570796966553, - "19": 21.443078994750977, - "20": 19.168149948120117, - "21": 6.144132614135742, - "22": 18.513757705688477, - "23": 14.245088577270508, - "24": 4.473949909210205, - "25": 8.687564849853516, - "26": 6.300623893737793, - "27": 12.811702728271484, - "28": 10.243854522705078, - "29": 5.129880428314209, - "30": 5.583020210266113, - "31": 7.059929370880127, - "32": 6.936434268951416, - "33": 5.920306205749512, - "34": 6.535542011260986, - "35": 3.8587324619293213, - "36": 17.113021850585938, - "37": 12.387015342712402, - "38": 22.945186614990234, - "39": 3.712006092071533, - "40": 15.06403636932373, - "41": 9.611918449401855, - "42": 3.478496789932251, - "43": 14.003998756408691, - "44": 5.800577163696289, - "45": 11.2997465133667, - "46": 14.906519889831543, - "47": 4.250407695770264, - "48": 16.47998046875, - "49": 8.455068588256836, - "50": 6.812468528747559, - "51": 3.6709096431732178, - "52": 15.428959846496582, - "53": 9.14046573638916 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.8002490997314453, - "3": 4.169719219207764, - "4": 4.191576957702637, - "5": 4.636710166931152, - "6": 3.619516611099243, - "7": 4.080648899078369, - "8": 5.285762786865234, - "9": 4.630393981933594, - "10": 5.848069667816162, - "11": 5.102724552154541, - "12": 4.933624267578125, - "13": 6.367702960968018, - "14": 3.974186420440674, - "15": 5.206032752990723, - "16": 4.3080339431762695, - "17": 4.3890910148620605, - "18": 3.9542670249938965, - "19": 8.378786087036133, - "20": 6.230710029602051, - "21": 4.233414173126221, - "22": 7.307021617889404, - "23": 4.967606544494629, - "24": 3.9721381664276123, - "25": 4.917096138000488, - "26": 5.059874534606934, - "27": 7.570440292358398, - "28": 4.816390514373779, - "29": 4.824719429016113, - "30": 7.568645477294922, - "31": 4.005067348480225, - "32": 5.854215621948242, - "33": 4.211982250213623, - "34": 4.359143257141113, - "35": 5.44965934753418, - "36": 8.726181030273438, - "37": 5.648348808288574, - "38": 5.257631301879883, - "39": 3.895961284637451, - "40": 6.710821151733398, - "41": 5.615940570831299, - "42": 4.509154796600342, - "43": 6.425422191619873, - "44": 4.653565406799316, - "45": 4.685014724731445, - "46": 5.431200981140137, - "47": 4.134403228759766, - "48": 9.482399940490723, - "49": 4.99036979675293, - "50": 4.327738285064697, - "51": 4.721538066864014, - "52": 8.824649810791016, - "53": 5.54055118560791 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 0.00869338, - 0.00874607, - 0.0888363, - 0.0696762, - 0.0376631, - 0.174727, - 0.257511, - 0.128418, - 0.0256638, - 0.145491, - 0.0297662, - 0.130035, - 0.0364496, - 0.086472, - 0.0214388, - 0.193271, - 0.02654, - 0.0216061, - 0.243891, - 0.0182225, - 0.0169581, - 0.112142, - 0.0213182, - 0.0244803, - 0.198446, - 0.0651497, - 0.12746, - 0.0461219, - 0.0458981, - 0.18334, - 0.242817, - 0.0803543, - 0.121674, - 0.120171, - 0.102056, - 0.365999, - 0.0297969, - 0.0368119, - 0.00998635, - 0.282747, - 0.0295728, - 0.0607858, - 0.372659, - 0.032764, - 0.138307, - 0.0366922, - 0.0244424, - 0.22885, - 0.0349144, - 0.0698069, - 0.0932507, - 0.350377, - 0.0370701, - 0.0663157 - ], - "train_epoch_time": 5.061728000640869, - "train_loss": 4.287693012188291, - "train_score": 0.15268673783053285, - "val_loss": 4.306716903068989, - "val_score": 0.15118667517545986 - }, - { - "epoch": 1, - "grad_norm": 7.175811767578125, - "learning_rate": 1.0, - "model_norm": 87.40782165527344, - "step_logs": { - "grad_norm": { - "54": 3.4528262615203857, - "55": 6.201093673706055, - "56": 7.180618762969971, - "57": 1.6493779420852661, - "58": 10.742693901062012, - "59": 8.11362075805664, - "60": 9.391319274902344, - "61": 6.1381988525390625, - "62": 7.680230617523193, - "63": 2.422365188598633, - "64": 9.0484619140625, - "65": 5.7457170486450195, - "66": 11.505653381347656, - "67": 2.1069905757904053, - "68": 20.572898864746094, - "69": 25.69393539428711, - "70": 4.730662822723389, - "71": 8.376721382141113, - "72": 8.69029426574707, - "73": 3.0753259658813477, - "74": 11.962082862854004, - "75": 1.7809722423553467, - "76": 8.898144721984863, - "77": 2.424302816390991, - "78": 6.994433879852295, - "79": 1.733713150024414, - "80": 6.690541744232178, - "81": 7.088050842285156, - "82": 11.619805335998535, - "83": 2.3821053504943848, - "84": 17.625072479248047, - "85": 16.93046760559082, - "86": 4.84429407119751, - "87": 2.1030845642089844, - "88": 8.235093116760254, - "89": 4.266262054443359, - "90": 2.3467159271240234, - "91": 8.875101089477539, - "92": 4.1571855545043945, - "93": 1.907282829284668, - "94": 2.7073915004730225, - "95": 12.969512939453125, - "96": 2.8833096027374268, - "97": 1.6625070571899414, - "98": 10.158586502075195, - "99": 3.3437583446502686, - "100": 3.261718273162842, - "101": 9.503498077392578, - "102": 2.379850149154663, - "103": 6.041413307189941, - "104": 3.3136770725250244, - "105": 1.3911731243133545, - "106": 5.885049343109131, - "107": 7.175811767578125 - }, - "loss": { - "54": 4.309749603271484, - "55": 4.78863525390625, - "56": 4.782060623168945, - "57": 3.8678321838378906, - "58": 4.864871978759766, - "59": 4.850564002990723, - "60": 4.199524402618408, - "61": 4.783591270446777, - "62": 4.48616886138916, - "63": 3.7532734870910645, - "64": 4.647047996520996, - "65": 5.3974385261535645, - "66": 5.241326332092285, - "67": 3.7134509086608887, - "68": 11.796606063842773, - "69": 9.279533386230469, - "70": 4.44937801361084, - "71": 5.696907043457031, - "72": 4.520971298217773, - "73": 4.343443870544434, - "74": 7.670567512512207, - "75": 3.5260934829711914, - "76": 5.6596479415893555, - "77": 4.112434387207031, - "78": 4.969648361206055, - "79": 3.5763425827026367, - "80": 4.389827728271484, - "81": 6.5713348388671875, - "82": 5.505765914916992, - "83": 4.025215148925781, - "84": 11.941483497619629, - "85": 7.970841884613037, - "86": 4.357478141784668, - "87": 3.6590209007263184, - "88": 5.739189147949219, - "89": 4.797023773193359, - "90": 3.9198288917541504, - "91": 5.6885528564453125, - "92": 4.415287017822266, - "93": 3.770115375518799, - "94": 4.294294834136963, - "95": 4.677727699279785, - "96": 4.070676803588867, - "97": 3.5197064876556396, - "98": 6.075753688812256, - "99": 3.819824695587158, - "100": 4.312519073486328, - "101": 6.454775810241699, - "102": 3.901374340057373, - "103": 4.629861831665039, - "104": 4.020097732543945, - "105": 3.722615957260132, - "106": 3.950429916381836, - "107": 5.197689056396484 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.361495, - 0.12453, - 0.0927452, - 1.42176, - 0.0421546, - 0.0736822, - 0.0476153, - 0.126962, - 0.0760549, - 0.639633, - 0.0567581, - 0.163493, - 0.039593, - 0.836474, - 0.0278719, - 0.0140561, - 0.198818, - 0.0811878, - 0.0598636, - 0.459253, - 0.0536061, - 1.11168, - 0.071481, - 0.699722, - 0.101583, - 1.18983, - 0.0980675, - 0.130798, - 0.0407774, - 0.709361, - 0.0384412, - 0.0278078, - 0.185684, - 0.827278, - 0.0846279, - 0.263558, - 0.71178, - 0.0722196, - 0.255482, - 1.03639, - 0.585854, - 0.0278091, - 0.489648, - 1.27344, - 0.0588754, - 0.341644, - 0.405357, - 0.0714684, - 0.68884, - 0.12685, - 0.366114, - 1.92347, - 0.114063, - 0.100941 - ], - "train_epoch_time": 5.054845571517944, - "train_loss": 3.5755339094350806, - "train_score": 0.085475699445353, - "val_loss": 3.594644247322214, - "val_score": 0.08256493986987357 - }, - { - "epoch": 2, - "grad_norm": 7.0098371505737305, - "learning_rate": 1.0, - "model_norm": 87.48278045654297, - "step_logs": { - "grad_norm": { - "108": 1.2204278707504272, - "109": 5.494037628173828, - "110": 4.409607410430908, - "111": 0.7654702067375183, - "112": 1.916879653930664, - "113": 12.691763877868652, - "114": 2.0235939025878906, - "115": 11.483722686767578, - "116": 3.067296266555786, - "117": 3.7349209785461426, - "118": 17.036848068237305, - "119": 7.859272003173828, - "120": 1.3270570039749146, - "121": 6.711935997009277, - "122": 15.696311950683594, - "123": 1.587878942489624, - "124": 17.753625869750977, - "125": 10.150809288024902, - "126": 5.839665412902832, - "127": 8.229072570800781, - "128": 3.3307056427001953, - "129": 7.483287811279297, - "130": 3.436750888824463, - "131": 6.053197383880615, - "132": 2.3088631629943848, - "133": 6.7248616218566895, - "134": 6.891343116760254, - "135": 3.3974430561065674, - "136": 6.117650032043457, - "137": 2.8520541191101074, - "138": 3.0273396968841553, - "139": 11.214179992675781, - "140": 3.588993549346924, - "141": 15.84964370727539, - "142": 2.3070456981658936, - "143": 6.358824729919434, - "144": 2.4433462619781494, - "145": 3.083045244216919, - "146": 12.42273998260498, - "147": 3.2101223468780518, - "148": 1.143220067024231, - "149": 9.141404151916504, - "150": 0.843994140625, - "151": 7.48250150680542, - "152": 1.2340359687805176, - "153": 3.6514508724212646, - "154": 2.6564178466796875, - "155": 6.637094497680664, - "156": 0.8658096194267273, - "157": 2.854630470275879, - "158": 1.4180574417114258, - "159": 13.111741065979004, - "160": 4.767974853515625, - "161": 7.0098371505737305 - }, - "loss": { - "108": 3.5613646507263184, - "109": 5.051475524902344, - "110": 3.8872427940368652, - "111": 3.353987693786621, - "112": 3.6256444454193115, - "113": 5.47585916519165, - "114": 3.7858524322509766, - "115": 6.3127031326293945, - "116": 4.1142168045043945, - "117": 4.043586730957031, - "118": 8.346199035644531, - "119": 4.9846296310424805, - "120": 3.797414779663086, - "121": 5.008763790130615, - "122": 4.252381801605225, - "123": 3.622262716293335, - "124": 8.223060607910156, - "125": 5.423888206481934, - "126": 5.160830020904541, - "127": 5.00778341293335, - "128": 4.168197154998779, - "129": 4.644274711608887, - "130": 4.079484939575195, - "131": 4.279280662536621, - "132": 3.867823600769043, - "133": 4.053799629211426, - "134": 4.461296558380127, - "135": 4.58814811706543, - "136": 4.663360595703125, - "137": 4.162822723388672, - "138": 3.6984639167785645, - "139": 6.408550262451172, - "140": 4.045624732971191, - "141": 9.524209976196289, - "142": 3.49643874168396, - "143": 3.620353937149048, - "144": 3.507394790649414, - "145": 3.743192672729492, - "146": 8.14825439453125, - "147": 3.5900979042053223, - "148": 3.5978665351867676, - "149": 6.260908603668213, - "150": 3.3536481857299805, - "151": 4.870438575744629, - "152": 3.466395139694214, - "153": 3.8796536922454834, - "154": 3.573780059814453, - "155": 4.511680603027344, - "156": 3.4413130283355713, - "157": 3.4824066162109375, - "158": 3.356820583343506, - "159": 8.899965286254883, - "160": 3.874288558959961, - "161": 4.553544998168945 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 2.39107, - 0.167354, - 0.199913, - 5.72407, - 0.986724, - 0.0339945, - 0.924521, - 0.0478685, - 0.437296, - 0.28987, - 0.0287548, - 0.080699, - 2.1563, - 0.111182, - 0.0172598, - 1.43663, - 0.0260891, - 0.0526392, - 0.151336, - 0.0739509, - 0.37573, - 0.0829341, - 0.34539, - 0.116789, - 0.725555, - 0.0896387, - 0.0939406, - 0.397496, - 0.124603, - 0.511767, - 0.403552, - 0.0509594, - 0.31408, - 0.0379132, - 0.656922, - 0.0895359, - 0.587509, - 0.393806, - 0.0527995, - 0.348388, - 2.75287, - 0.0749224, - 4.70802, - 0.086991, - 2.27626, - 0.290979, - 0.506448, - 0.102419, - 4.5907, - 0.427346, - 1.66932, - 0.0517687, - 0.170421, - 0.0926688 - ], - "train_epoch_time": 5.053528785705566, - "train_loss": 3.23747347109287, - "train_score": 0.17411226684941108, - "val_loss": 3.259279743264655, - "val_score": 0.16584744532811793 - }, - { - "epoch": 3, - "grad_norm": 7.853468418121338, - "learning_rate": 1.0, - "model_norm": 87.54737091064453, - "step_logs": { - "grad_norm": { - "162": 0.6172052621841431, - "163": 1.9305968284606934, - "164": 4.235742568969727, - "165": 2.452990770339966, - "166": 5.52947998046875, - "167": 1.1597410440444946, - "168": 9.82834243774414, - "169": 0.4273718297481537, - "170": 1.0338839292526245, - "171": 2.1340601444244385, - "172": 4.069834232330322, - "173": 11.8688383102417, - "174": 3.339928388595581, - "175": 1.7839720249176025, - "176": 3.062002658843994, - "177": 3.803767204284668, - "178": 16.14332389831543, - "179": 1.9862242937088013, - "180": 14.544238090515137, - "181": 1.266656756401062, - "182": 2.9746577739715576, - "183": 6.9587273597717285, - "184": 1.6131359338760376, - "185": 1.3693383932113647, - "186": 4.1614179611206055, - "187": 3.629279851913452, - "188": 9.446269989013672, - "189": 2.237483024597168, - "190": 2.3593883514404297, - "191": 23.537370681762695, - "192": 12.847769737243652, - "193": 4.78884744644165, - "194": 0.6743913888931274, - "195": 3.7378122806549072, - "196": 1.1711188554763794, - "197": 17.905345916748047, - "198": 2.683520555496216, - "199": 6.063100337982178, - "200": 0.49248263239860535, - "201": 5.227361679077148, - "202": 1.9489352703094482, - "203": 7.9462151527404785, - "204": 1.9341949224472046, - "205": 14.63154411315918, - "206": 0.8991637825965881, - "207": 5.2558159828186035, - "208": 8.559276580810547, - "209": 0.727784276008606, - "210": 2.4869818687438965, - "211": 17.560007095336914, - "212": 12.949395179748535, - "213": 4.641385078430176, - "214": 1.5345288515090942, - "215": 7.853468418121338 - }, - "loss": { - "162": 3.2352874279022217, - "163": 3.313533067703247, - "164": 3.998837947845459, - "165": 3.569653034210205, - "166": 3.628875255584717, - "167": 3.510059118270874, - "168": 5.282551288604736, - "169": 3.336914300918579, - "170": 3.3781416416168213, - "171": 4.039737224578857, - "172": 4.64213752746582, - "173": 6.292857646942139, - "174": 3.917433977127075, - "175": 3.502185821533203, - "176": 3.984590768814087, - "177": 4.618399620056152, - "178": 7.850765705108643, - "179": 3.507638692855835, - "180": 6.019065856933594, - "181": 3.395009756088257, - "182": 4.153111457824707, - "183": 4.425516128540039, - "184": 3.4384937286376953, - "185": 3.6666247844696045, - "186": 3.678715705871582, - "187": 4.557547569274902, - "188": 4.31423282623291, - "189": 3.6561923027038574, - "190": 3.309612274169922, - "191": 12.58102035522461, - "192": 8.059301376342773, - "193": 5.145270347595215, - "194": 3.1643738746643066, - "195": 3.66489577293396, - "196": 3.1843864917755127, - "197": 9.492039680480957, - "198": 3.327655792236328, - "199": 4.9989519119262695, - "200": 3.0658178329467773, - "201": 3.711319923400879, - "202": 3.4502482414245605, - "203": 4.297966957092285, - "204": 3.5015501976013184, - "205": 8.624146461486816, - "206": 3.18898868560791, - "207": 3.6747875213623047, - "208": 5.067270278930664, - "209": 3.1893138885498047, - "210": 3.2587647438049316, - "211": 8.330520629882812, - "212": 5.538828372955322, - "213": 3.901963233947754, - "214": 3.286574125289917, - "215": 3.848623275756836 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 8.49285, - 0.889013, - 0.222882, - 0.593245, - 0.118687, - 2.60971, - 0.0546869, - 18.2698, - 3.16034, - 0.887033, - 0.280262, - 0.0446716, - 0.351178, - 1.10043, - 0.424984, - 0.319201, - 0.0301249, - 0.889116, - 0.0284542, - 2.11604, - 0.469353, - 0.0913912, - 1.32138, - 1.95544, - 0.212429, - 0.346011, - 0.0483485, - 0.730314, - 0.594536, - 0.0227091, - 0.048825, - 0.22436, - 6.95767, - 0.262317, - 2.32179, - 0.029607, - 0.462092, - 0.135985, - 12.6405, - 0.13582, - 0.908355, - 0.0680679, - 0.935966, - 0.0402843, - 3.94435, - 0.133031, - 0.0691672, - 6.02132, - 0.526875, - 0.0270161, - 0.0330308, - 0.181129, - 1.3957, - 0.0623997 - ], - "train_epoch_time": 5.054205894470215, - "train_loss": 3.422062549221635, - "train_score": 0.16294050397968704, - "val_loss": 3.4332452540282676, - "val_score": 0.161725925540404 - }, - { - "epoch": 4, - "grad_norm": 3.5653417110443115, - "learning_rate": 1.0, - "model_norm": 87.57437896728516, - "step_logs": { - "grad_norm": { - "216": 2.9874424934387207, - "217": 12.020308494567871, - "218": 3.519164800643921, - "219": 1.7579094171524048, - "220": 4.699622631072998, - "221": 3.8211545944213867, - "222": 7.332284927368164, - "223": 1.3821289539337158, - "224": 13.866406440734863, - "225": 6.391863822937012, - "226": 0.764019250869751, - "227": 1.0673123598098755, - "228": 3.289612293243408, - "229": 2.5527477264404297, - "230": 15.946304321289062, - "231": 1.965239405632019, - "232": 14.026849746704102, - "233": 4.282284736633301, - "234": 13.360882759094238, - "235": 2.3937058448791504, - "236": 12.24718952178955, - "237": 5.173635005950928, - "238": 3.0230350494384766, - "239": 2.0358736515045166, - "240": 3.061434745788574, - "241": 1.8501865863800049, - "242": 7.4878644943237305, - "243": 7.070363998413086, - "244": 4.0442962646484375, - "245": 8.527976989746094, - "246": 1.5660851001739502, - "247": 4.070375442504883, - "248": 2.791182518005371, - "249": 2.0854952335357666, - "250": 5.372145652770996, - "251": 5.553926467895508, - "252": 2.3045144081115723, - "253": 6.567760944366455, - "254": 1.954593539237976, - "255": 19.65473747253418, - "256": 3.2429943084716797, - "257": 12.28372859954834, - "258": 4.283907413482666, - "259": 9.966032981872559, - "260": 0.7446930408477783, - "261": 1.3271123170852661, - "262": 7.779701232910156, - "263": 3.4554829597473145, - "264": 5.531050205230713, - "265": 0.6556902527809143, - "266": 0.7802390456199646, - "267": 1.6575236320495605, - "268": 2.9647600650787354, - "269": 3.5653417110443115 - }, - "loss": { - "216": 3.4151387214660645, - "217": 5.445035457611084, - "218": 4.039470672607422, - "219": 3.2380259037017822, - "220": 4.49238395690918, - "221": 4.033121109008789, - "222": 4.075538635253906, - "223": 3.3841769695281982, - "224": 7.204192161560059, - "225": 3.931335926055908, - "226": 3.2176685333251953, - "227": 3.145339012145996, - "228": 3.6017677783966064, - "229": 3.7597885131835938, - "230": 5.91054630279541, - "231": 3.1509499549865723, - "232": 10.538995742797852, - "233": 3.3791840076446533, - "234": 6.149799346923828, - "235": 3.2908077239990234, - "236": 5.412066459655762, - "237": 4.008817195892334, - "238": 3.629178524017334, - "239": 3.342593193054199, - "240": 4.230134963989258, - "241": 3.3233747482299805, - "242": 4.715473175048828, - "243": 3.988399028778076, - "244": 3.8643691539764404, - "245": 5.3102498054504395, - "246": 3.37919545173645, - "247": 4.587761878967285, - "248": 3.408756732940674, - "249": 3.3481478691101074, - "250": 3.5039312839508057, - "251": 4.441200256347656, - "252": 3.299842357635498, - "253": 4.784389495849609, - "254": 3.215451717376709, - "255": 9.556995391845703, - "256": 3.1535205841064453, - "257": 7.965487003326416, - "258": 4.3026018142700195, - "259": 4.868965148925781, - "260": 3.0050177574157715, - "261": 3.155625820159912, - "262": 3.924818277359009, - "263": 3.555025577545166, - "264": 4.410586357116699, - "265": 2.9707541465759277, - "266": 3.061960220336914, - "267": 3.229783296585083, - "268": 3.5198092460632324, - "269": 4.313325881958008 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 0.382657, - 0.0376851, - 0.326171, - 1.04782, - 0.2034, - 0.276218, - 0.0758065, - 1.77156, - 0.0374677, - 0.0962244, - 5.5123, - 2.76111, - 0.332833, - 0.576963, - 0.0232438, - 0.81585, - 0.0535647, - 0.184273, - 0.0344501, - 0.574329, - 0.036082, - 0.14977, - 0.39712, - 0.806458, - 0.45134, - 0.970841, - 0.0841026, - 0.0797839, - 0.236261, - 0.0730168, - 1.37779, - 0.276906, - 0.437542, - 0.769815, - 0.121412, - 0.143979, - 0.621347, - 0.110915, - 0.841645, - 0.0247393, - 0.299849, - 0.05279, - 0.23445, - 0.0490221, - 5.41867, - 1.79172, - 0.0648476, - 0.297732, - 0.144172, - 6.90986, - 5.02972, - 1.17558, - 0.400442, - 0.33932 - ], - "train_epoch_time": 5.053279638290405, - "train_loss": 4.454620310292182, - "train_score": 0.15660980094539556, - "val_loss": 4.4871344747006825, - "val_score": 0.15454129624927906 - }, - { - "epoch": 5, - "grad_norm": 1.597386360168457, - "learning_rate": 1.0, - "model_norm": 87.60677337646484, - "step_logs": { - "grad_norm": { - "270": 8.417795181274414, - "271": 1.4617302417755127, - "272": 1.4719135761260986, - "273": 15.30471420288086, - "274": 4.0817766189575195, - "275": 3.172008752822876, - "276": 3.6194591522216797, - "277": 8.161170959472656, - "278": 0.9640328884124756, - "279": 1.392067313194275, - "280": 8.410759925842285, - "281": 2.810370683670044, - "282": 12.157135009765625, - "283": 0.6151496171951294, - "284": 7.482866287231445, - "285": 2.229586601257324, - "286": 12.283149719238281, - "287": 9.091983795166016, - "288": 2.0408763885498047, - "289": 20.326557159423828, - "290": 6.192249298095703, - "291": 6.54166841506958, - "292": 2.60191011428833, - "293": 6.936756610870361, - "294": 2.6483194828033447, - "295": 1.08181631565094, - "296": 4.590422630310059, - "297": 1.1043760776519775, - "298": 4.337271690368652, - "299": 2.220872640609741, - "300": 3.5218417644500732, - "301": 5.153324604034424, - "302": 1.065319538116455, - "303": 2.6600210666656494, - "304": 8.69677734375, - "305": 3.3303956985473633, - "306": 9.6586332321167, - "307": 1.1139414310455322, - "308": 7.699582099914551, - "309": 0.9463465809822083, - "310": 2.01292085647583, - "311": 7.939146995544434, - "312": 2.6454808712005615, - "313": 14.22165298461914, - "314": 2.000232458114624, - "315": 7.018811225891113, - "316": 2.581660509109497, - "317": 9.128364562988281, - "318": 2.0029964447021484, - "319": 20.82325553894043, - "320": 17.0695743560791, - "321": 6.073815822601318, - "322": 5.015772342681885, - "323": 1.597386360168457 - }, - "loss": { - "270": 4.441112995147705, - "271": 3.257725238800049, - "272": 3.1244821548461914, - "273": 9.042449951171875, - "274": 4.215341091156006, - "275": 3.6792783737182617, - "276": 4.04018497467041, - "277": 4.592798709869385, - "278": 3.0867416858673096, - "279": 3.073683261871338, - "280": 4.694775581359863, - "281": 3.5920286178588867, - "282": 5.0841474533081055, - "283": 2.957658290863037, - "284": 3.9417872428894043, - "285": 3.2137694358825684, - "286": 5.38819694519043, - "287": 3.749476909637451, - "288": 3.2391600608825684, - "289": 11.951048851013184, - "290": 3.421565055847168, - "291": 4.073790550231934, - "292": 3.080479383468628, - "293": 4.6739654541015625, - "294": 3.5670626163482666, - "295": 2.984158515930176, - "296": 4.135107517242432, - "297": 3.0487303733825684, - "298": 3.6981968879699707, - "299": 3.355250835418701, - "300": 3.2816882133483887, - "301": 4.190012454986572, - "302": 3.058805465698242, - "303": 3.6626811027526855, - "304": 3.765085220336914, - "305": 3.299260377883911, - "306": 4.9024786949157715, - "307": 3.06119704246521, - "308": 4.217517852783203, - "309": 2.9054903984069824, - "310": 3.2250277996063232, - "311": 4.552387237548828, - "312": 3.392378330230713, - "313": 6.536539554595947, - "314": 3.0595862865448, - "315": 5.461810111999512, - "316": 3.5451297760009766, - "317": 4.10023307800293, - "318": 3.104283332824707, - "319": 11.160408020019531, - "320": 6.498387336730957, - "321": 3.823298692703247, - "322": 3.855466842651367, - "323": 3.2338428497314453 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 0.0626751, - 1.52468, - 1.44216, - 0.0386043, - 0.253008, - 0.365674, - 0.3084, - 0.0689561, - 3.32136, - 1.58613, - 0.0663658, - 0.454792, - 0.0343998, - 7.81603, - 0.0703975, - 0.646496, - 0.0357128, - 0.0453579, - 0.777677, - 0.0289253, - 0.0892335, - 0.0951966, - 0.455023, - 0.0971343, - 0.508593, - 2.54985, - 0.196237, - 2.49968, - 0.196588, - 0.680264, - 0.26458, - 0.157776, - 2.69521, - 0.517641, - 0.0497804, - 0.297457, - 0.0525514, - 2.46699, - 0.0711414, - 3.24429, - 0.79594, - 0.0722257, - 0.484725, - 0.0323182, - 0.764719, - 0.110869, - 0.531905, - 0.0492065, - 0.773751, - 0.0257385, - 0.0223028, - 0.103637, - 0.15325, - 1.26736 - ], - "train_epoch_time": 5.054124355316162, - "train_loss": 3.313465996965274, - "train_score": 0.13426963772886624, - "val_loss": 3.340253053384862, - "val_score": 0.13163748585397697 - }, - { - "epoch": 6, - "grad_norm": 1.318442702293396, - "learning_rate": 1.0, - "model_norm": 87.65409088134766, - "step_logs": { - "grad_norm": { - "324": 3.0638318061828613, - "325": 12.547501564025879, - "326": 1.7084776163101196, - "327": 1.8790518045425415, - "328": 19.083650588989258, - "329": 5.657773494720459, - "330": 5.736194610595703, - "331": 2.032508611679077, - "332": 9.56373119354248, - "333": 3.7583446502685547, - "334": 11.385799407958984, - "335": 1.3768935203552246, - "336": 1.9528535604476929, - "337": 6.754176616668701, - "338": 1.1516849994659424, - "339": 2.5240859985351562, - "340": 2.83101224899292, - "341": 4.5815815925598145, - "342": 2.5633962154388428, - "343": 2.7312276363372803, - "344": 9.310015678405762, - "345": 1.2251770496368408, - "346": 14.864018440246582, - "347": 5.785479545593262, - "348": 1.253085970878601, - "349": 0.7698249220848083, - "350": 1.2421190738677979, - "351": 7.74329948425293, - "352": 1.8452045917510986, - "353": 7.990381240844727, - "354": 1.5456805229187012, - "355": 12.205272674560547, - "356": 5.901059627532959, - "357": 5.625377178192139, - "358": 2.415379285812378, - "359": 3.196598768234253, - "360": 3.5479352474212646, - "361": 1.6618934869766235, - "362": 4.429972171783447, - "363": 1.192750096321106, - "364": 1.450696349143982, - "365": 3.5292797088623047, - "366": 5.045935153961182, - "367": 1.5372174978256226, - "368": 3.057121992111206, - "369": 4.3452229499816895, - "370": 1.2983942031860352, - "371": 2.991034507751465, - "372": 2.1848366260528564, - "373": 7.562220096588135, - "374": 1.6354453563690186, - "375": 1.0294127464294434, - "376": 0.9437535405158997, - "377": 1.318442702293396 - }, - "loss": { - "324": 3.3156137466430664, - "325": 6.474240303039551, - "326": 3.3303921222686768, - "327": 3.134063243865967, - "328": 11.810436248779297, - "329": 3.640613317489624, - "330": 4.041092872619629, - "331": 3.205493688583374, - "332": 4.940890789031982, - "333": 3.2587618827819824, - "334": 5.539989471435547, - "335": 3.1151323318481445, - "336": 3.157496452331543, - "337": 4.483071327209473, - "338": 3.2040624618530273, - "339": 3.286912202835083, - "340": 3.587984800338745, - "341": 3.3555707931518555, - "342": 3.631004810333252, - "343": 3.1962809562683105, - "344": 5.258813858032227, - "345": 2.946056604385376, - "346": 7.268603324890137, - "347": 3.9211862087249756, - "348": 2.9738659858703613, - "349": 2.8974318504333496, - "350": 2.9549481868743896, - "351": 4.75447940826416, - "352": 2.9924001693725586, - "353": 4.7252912521362305, - "354": 2.9017269611358643, - "355": 6.49427604675293, - "356": 4.867671012878418, - "357": 4.351002216339111, - "358": 3.3011906147003174, - "359": 3.2043204307556152, - "360": 4.115439414978027, - "361": 2.9570465087890625, - "362": 3.3966400623321533, - "363": 3.3509864807128906, - "364": 3.225102424621582, - "365": 3.329432487487793, - "366": 4.254251480102539, - "367": 3.0636508464813232, - "368": 3.234981060028076, - "369": 3.8633975982666016, - "370": 2.89851713180542, - "371": 3.677140712738037, - "372": 3.105072021484375, - "373": 4.470493316650391, - "374": 3.2047791481018066, - "375": 3.0857763290405273, - "376": 2.8954758644104004, - "377": 2.9634528160095215 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 0.353211, - 0.041122, - 1.14098, - 0.887626, - 0.0324297, - 0.113732, - 0.122815, - 0.775944, - 0.0540195, - 0.230706, - 0.0427348, - 1.64314, - 0.827949, - 0.0982722, - 2.41565, - 0.515917, - 0.447679, - 0.159858, - 0.55258, - 0.428479, - 0.0606718, - 1.96265, - 0.0328987, - 0.117149, - 1.89391, - 4.8891, - 1.91524, - 0.0792959, - 0.878882, - 0.0740105, - 1.21455, - 0.0435949, - 0.139785, - 0.137495, - 0.565848, - 0.313588, - 0.326938, - 1.07066, - 0.17308, - 2.35545, - 1.53246, - 0.267299, - 0.167086, - 1.29649, - 0.346136, - 0.204619, - 1.71934, - 0.411024, - 0.65048, - 0.078173, - 1.19819, - 2.91196, - 3.25089, - 1.70481 - ], - "train_epoch_time": 5.054388761520386, - "train_loss": 7.305970393910128, - "train_score": 0.09277483861107737, - "val_loss": 7.319182059247513, - "val_score": 0.08958811701267924 - }, - { - "epoch": 7, - "grad_norm": 10.765946388244629, - "learning_rate": 1.0, - "model_norm": 87.68486785888672, - "step_logs": { - "grad_norm": { - "378": 16.040483474731445, - "379": 6.54575252532959, - "380": 3.660334825515747, - "381": 1.4622583389282227, - "382": 3.6394519805908203, - "383": 2.0608949661254883, - "384": 2.3969147205352783, - "385": 17.816192626953125, - "386": 4.788029193878174, - "387": 0.9748720526695251, - "388": 1.0093756914138794, - "389": 1.5624254941940308, - "390": 4.910091400146484, - "391": 2.506113290786743, - "392": 3.3157408237457275, - "393": 6.0191802978515625, - "394": 1.1205449104309082, - "395": 1.7486339807510376, - "396": 3.960408926010132, - "397": 7.913658618927002, - "398": 1.0244340896606445, - "399": 14.30500602722168, - "400": 1.8058902025222778, - "401": 2.6652021408081055, - "402": 17.545543670654297, - "403": 4.092648029327393, - "404": 0.9539400339126587, - "405": 0.704377293586731, - "406": 1.2798709869384766, - "407": 1.3182331323623657, - "408": 11.263158798217773, - "409": 1.4286426305770874, - "410": 4.004367351531982, - "411": 1.3061292171478271, - "412": 2.056056261062622, - "413": 0.5090538263320923, - "414": 0.37913328409194946, - "415": 3.504302740097046, - "416": 7.081801891326904, - "417": 0.7794700860977173, - "418": 1.9352850914001465, - "419": 1.1940546035766602, - "420": 3.6309051513671875, - "421": 0.9127716422080994, - "422": 1.2052929401397705, - "423": 1.1224899291992188, - "424": 0.6754429340362549, - "425": 0.7979111671447754, - "426": 2.139202356338501, - "427": 1.3098818063735962, - "428": 13.790217399597168, - "429": 2.17396879196167, - "430": 1.4117320775985718, - "431": 10.765946388244629 - }, - "loss": { - "378": 7.28767728805542, - "379": 4.843731880187988, - "380": 3.365016222000122, - "381": 2.9327170848846436, - "382": 3.264479160308838, - "383": 3.3833961486816406, - "384": 3.165174961090088, - "385": 10.999656677246094, - "386": 3.9248909950256348, - "387": 2.911363124847412, - "388": 2.8497610092163086, - "389": 3.074125289916992, - "390": 3.433293342590332, - "391": 3.32497501373291, - "392": 3.445816993713379, - "393": 4.257643699645996, - "394": 2.925107002258301, - "395": 2.9123294353485107, - "396": 4.722896099090576, - "397": 3.664672374725342, - "398": 2.894896984100342, - "399": 6.906097412109375, - "400": 2.846384286880493, - "401": 3.2363619804382324, - "402": 10.042871475219727, - "403": 3.3986458778381348, - "404": 3.043595314025879, - "405": 2.8186025619506836, - "406": 2.8277218341827393, - "407": 2.88370943069458, - "408": 5.247016429901123, - "409": 2.9663054943084717, - "410": 4.31719446182251, - "411": 2.9837493896484375, - "412": 3.439638137817383, - "413": 2.7930190563201904, - "414": 2.6830191612243652, - "415": 3.115664482116699, - "416": 4.403307914733887, - "417": 2.753737688064575, - "418": 3.036257266998291, - "419": 2.8469786643981934, - "420": 3.9103312492370605, - "421": 2.8575730323791504, - "422": 2.954301118850708, - "423": 2.9263386726379395, - "424": 2.8519067764282227, - "425": 2.7864508628845215, - "426": 2.9868521690368652, - "427": 2.929961919784546, - "428": 7.111339569091797, - "429": 3.026057243347168, - "430": 3.060291290283203, - "431": 5.268321990966797 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 0.028324, - 0.113047, - 0.251157, - 1.37158, - 0.246457, - 0.796601, - 0.550925, - 0.0346537, - 0.171204, - 3.06338, - 2.79707, - 1.25928, - 0.142407, - 0.529404, - 0.313423, - 0.117515, - 2.32961, - 0.952451, - 0.301112, - 0.0585168, - 2.75845, - 0.0337487, - 0.872792, - 0.455614, - 0.032623, - 0.202907, - 3.3446, - 5.68098, - 1.72625, - 1.65946, - 0.0413611, - 1.45334, - 0.269236, - 1.749, - 0.81366, - 10.7782, - 18.6655, - 0.253716, - 0.0877994, - 4.53235, - 0.810678, - 1.99681, - 0.296609, - 3.42983, - 2.03362, - 2.32252, - 6.25112, - 4.37665, - 0.652695, - 1.70765, - 0.0373946, - 0.640281, - 1.53553, - 0.0454536 - ], - "train_epoch_time": 5.054681062698364, - "train_loss": 2.857751656741631, - "train_score": 0.2232077205754079, - "val_loss": 2.8952823470845974, - "val_score": 0.2193509616753264 - }, - { - "epoch": 8, - "grad_norm": 0.39214810729026794, - "learning_rate": 1.0, - "model_norm": 87.74516296386719, - "step_logs": { - "grad_norm": { - "432": 2.168149471282959, - "433": 1.7605563402175903, - "434": 1.1056766510009766, - "435": 5.546317100524902, - "436": 0.8200904726982117, - "437": 2.1366324424743652, - "438": 1.5437718629837036, - "439": 1.4036411046981812, - "440": 2.254242181777954, - "441": 1.900120735168457, - "442": 4.486093997955322, - "443": 1.224945068359375, - "444": 1.8282771110534668, - "445": 1.2649118900299072, - "446": 1.4886144399642944, - "447": 1.285316824913025, - "448": 0.9994505643844604, - "449": 1.1726855039596558, - "450": 1.128950834274292, - "451": 0.6170597672462463, - "452": 0.6761702299118042, - "453": 0.8361582159996033, - "454": 0.567862868309021, - "455": 0.30281925201416016, - "456": 0.29304444789886475, - "457": 0.38171258568763733, - "458": 0.4615105986595154, - "459": 0.6041643619537354, - "460": 0.5766599178314209, - "461": 0.46891769766807556, - "462": 0.5205958485603333, - "463": 0.5562890768051147, - "464": 0.4942656457424164, - "465": 0.5113962292671204, - "466": 0.48930346965789795, - "467": 0.45395800471305847, - "468": 0.5059782862663269, - "469": 0.6073412299156189, - "470": 0.5460482239723206, - "471": 0.4364083707332611, - "472": 0.43995529413223267, - "473": 0.48216062784194946, - "474": 0.4718942642211914, - "475": 0.46095019578933716, - "476": 0.4751294255256653, - "477": 0.5134651064872742, - "478": 0.5055081844329834, - "479": 0.4690570831298828, - "480": 0.4867781698703766, - "481": 0.48497480154037476, - "482": 0.4860299527645111, - "483": 0.5182611346244812, - "484": 0.471593976020813, - "485": 0.39214810729026794 - }, - "loss": { - "432": 2.8666603565216064, - "433": 3.3808634281158447, - "434": 2.907029867172241, - "435": 4.311890602111816, - "436": 2.824399948120117, - "437": 3.127277374267578, - "438": 3.1276135444641113, - "439": 3.1792473793029785, - "440": 3.2935683727264404, - "441": 3.2622523307800293, - "442": 3.510226011276245, - "443": 3.0254292488098145, - "444": 3.121299982070923, - "445": 3.1505966186523438, - "446": 3.000731945037842, - "447": 3.341097593307495, - "448": 2.9106626510620117, - "449": 2.8891658782958984, - "450": 3.0368666648864746, - "451": 2.789989471435547, - "452": 2.7675275802612305, - "453": 2.7667741775512695, - "454": 2.7946126461029053, - "455": 2.6461939811706543, - "456": 2.6305489540100098, - "457": 2.6349916458129883, - "458": 2.671107769012451, - "459": 2.67372989654541, - "460": 2.7174861431121826, - "461": 2.6675281524658203, - "462": 2.689272403717041, - "463": 2.6624982357025146, - "464": 2.672647476196289, - "465": 2.670224189758301, - "466": 2.680692672729492, - "467": 2.642289638519287, - "468": 2.6854379177093506, - "469": 2.67608642578125, - "470": 2.7051339149475098, - "471": 2.675262928009033, - "472": 2.6756863594055176, - "473": 2.6470866203308105, - "474": 2.6868438720703125, - "475": 2.6295394897460938, - "476": 2.645143508911133, - "477": 2.650545120239258, - "478": 2.67291259765625, - "479": 2.6213717460632324, - "480": 2.6770386695861816, - "481": 2.634006977081299, - "482": 2.666285991668701, - "483": 2.658201217651367, - "484": 2.672632932662964, - "485": 2.6321921348571777 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 0.609815, - 1.09076, - 2.3779, - 0.140171, - 4.19955, - 0.685026, - 1.31234, - 1.61366, - 0.648135, - 0.903556, - 0.174421, - 2.01629, - 0.933795, - 1.96912, - 1.35414, - 2.02241, - 2.91386, - 2.10092, - 2.38273, - 7.32737, - 6.05313, - 3.95728, - 8.66632, - 28.8572, - 30.6323, - 18.0845, - 12.5409, - 7.32499, - 8.17199, - 12.1315, - 9.92278, - 8.60376, - 10.9401, - 10.2102, - 11.1967, - 12.8218, - 10.4894, - 7.25495, - 9.0725, - 14.0469, - 13.8235, - 11.3864, - 12.0657, - 12.3758, - 11.7172, - 10.0534, - 10.4599, - 11.9145, - 11.2978, - 11.199, - 11.2871, - 9.8967, - 12.0172, - 17.1166 - ], - "train_epoch_time": 5.055560350418091, - "train_loss": 2.6317582880236325, - "train_score": 0.2569628766568183, - "val_loss": 2.6588520338559944, - "val_score": 0.2486590485245708 - }, - { - "epoch": 9, - "grad_norm": 0.44535982608795166, - "learning_rate": 1.0, - "model_norm": 87.81460571289062, - "step_logs": { - "grad_norm": { - "486": 0.3982353210449219, - "487": 0.43557921051979065, - "488": 0.4927448034286499, - "489": 0.5576047301292419, - "490": 0.5290592908859253, - "491": 0.5076530575752258, - "492": 0.5319492220878601, - "493": 0.5007034540176392, - "494": 0.5210025310516357, - "495": 0.5394535660743713, - "496": 0.5030139088630676, - "497": 0.4605294466018677, - "498": 0.4453807473182678, - "499": 0.44898661971092224, - "500": 0.46897217631340027, - "501": 0.48422494530677795, - "502": 0.4780116081237793, - "503": 0.4895212948322296, - "504": 0.535971999168396, - "505": 0.5532742142677307, - "506": 0.49585941433906555, - "507": 0.4018837511539459, - "508": 0.402686208486557, - "509": 0.487864226102829, - "510": 0.54140305519104, - "511": 0.5641275644302368, - "512": 0.5425588488578796, - "513": 0.48240816593170166, - "514": 0.4451873004436493, - "515": 0.43826135993003845, - "516": 0.49162229895591736, - "517": 0.5225799679756165, - "518": 0.4846140742301941, - "519": 0.4425986409187317, - "520": 0.44406652450561523, - "521": 0.4758024215698242, - "522": 0.49207353591918945, - "523": 0.48803049325942993, - "524": 0.4920251965522766, - "525": 0.4774007797241211, - "526": 0.48840782046318054, - "527": 0.46008068323135376, - "528": 0.4562215209007263, - "529": 0.46872478723526, - "530": 0.4545108675956726, - "531": 0.453834593296051, - "532": 0.44453826546669006, - "533": 0.46896350383758545, - "534": 0.5051932334899902, - "535": 0.5283986330032349, - "536": 0.5464285016059875, - "537": 0.5510775446891785, - "538": 0.5074120163917542, - "539": 0.44535982608795166 - }, - "loss": { - "486": 2.6298608779907227, - "487": 2.61812686920166, - "488": 2.661372184753418, - "489": 2.642263650894165, - "490": 2.6732637882232666, - "491": 2.6382131576538086, - "492": 2.6739726066589355, - "493": 2.6452417373657227, - "494": 2.6538732051849365, - "495": 2.63507080078125, - "496": 2.659754514694214, - "497": 2.621670722961426, - "498": 2.630413770675659, - "499": 2.6134023666381836, - "500": 2.64987850189209, - "501": 2.6161062717437744, - "502": 2.6288294792175293, - "503": 2.6221654415130615, - "504": 2.6408352851867676, - "505": 2.6397414207458496, - "506": 2.653784990310669, - "507": 2.5975451469421387, - "508": 2.609415292739868, - "509": 2.598310947418213, - "510": 2.637651205062866, - "511": 2.64570951461792, - "512": 2.6593527793884277, - "513": 2.6202707290649414, - "514": 2.6349387168884277, - "515": 2.609262466430664, - "516": 2.617173671722412, - "517": 2.6070337295532227, - "518": 2.639909029006958, - "519": 2.5958285331726074, - "520": 2.6046042442321777, - "521": 2.5836737155914307, - "522": 2.6052651405334473, - "523": 2.5893874168395996, - "524": 2.6298346519470215, - "525": 2.6113293170928955, - "526": 2.60847544670105, - "527": 2.5925660133361816, - "528": 2.609440803527832, - "529": 2.602233409881592, - "530": 2.622786283493042, - "531": 2.577503204345703, - "532": 2.5837182998657227, - "533": 2.5699374675750732, - "534": 2.61978816986084, - "535": 2.6043004989624023, - "536": 2.6448514461517334, - "537": 2.6181392669677734, - "538": 2.64224910736084, - "539": 2.574448585510254 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 16.5826, - 13.7993, - 10.9613, - 8.49813, - 9.55065, - 10.2371, - 9.44967, - 10.5513, - 9.77689, - 9.05491, - 10.5119, - 12.3613, - 13.2605, - 12.964, - 12.0485, - 11.1574, - 11.505, - 10.9425, - 9.193, - 8.62344, - 10.7932, - 16.0828, - 16.092, - 10.9167, - 8.99862, - 8.31357, - 9.03404, - 11.2594, - 13.2949, - 13.5847, - 10.8285, - 9.54643, - 11.2408, - 13.2512, - 13.2083, - 11.4126, - 10.7595, - 10.8718, - 10.8631, - 11.4576, - 10.9351, - 12.2479, - 12.5371, - 11.8443, - 12.6962, - 12.5142, - 13.0746, - 11.6854, - 10.2648, - 9.32755, - 8.85798, - 8.62119, - 10.2625, - 12.9796 - ], - "train_epoch_time": 5.059179782867432, - "train_loss": 2.5937873890957497, - "train_score": 0.2638618634027593, - "val_loss": 2.62206916283521, - "val_score": 0.25666439396368784 - }, - { - "epoch": 10, - "grad_norm": 0.5642505884170532, - "learning_rate": 1.0, - "model_norm": 87.89739227294922, - "step_logs": { - "grad_norm": { - "540": 0.43889129161834717, - "541": 0.4413491189479828, - "542": 0.44447293877601624, - "543": 0.4841398596763611, - "544": 0.5043590664863586, - "545": 0.5050851702690125, - "546": 0.5263308882713318, - "547": 0.5236912965774536, - "548": 0.5113937258720398, - "549": 0.4963991343975067, - "550": 0.49282658100128174, - "551": 0.4909944236278534, - "552": 0.49198561906814575, - "553": 0.4985528588294983, - "554": 0.5279410481452942, - "555": 0.5446391701698303, - "556": 0.5695044994354248, - "557": 0.6038550734519958, - "558": 0.6127456426620483, - "559": 0.5691845417022705, - "560": 0.5303208827972412, - "561": 0.4937804937362671, - "562": 0.4819372892379761, - "563": 0.5437955260276794, - "564": 0.5550265312194824, - "565": 0.5230482220649719, - "566": 0.5679828524589539, - "567": 0.653569221496582, - "568": 0.6410107016563416, - "569": 0.5594778656959534, - "570": 0.5172213912010193, - "571": 0.5734394192695618, - "572": 0.5833762884140015, - "573": 0.5947811007499695, - "574": 0.5898205637931824, - "575": 0.5590900182723999, - "576": 0.5380058288574219, - "577": 0.5170160531997681, - "578": 0.5525601506233215, - "579": 0.6289882659912109, - "580": 0.6177880167961121, - "581": 0.5601961016654968, - "582": 0.5203932523727417, - "583": 0.6104792952537537, - "584": 0.7420664429664612, - "585": 0.9171600937843323, - "586": 0.6264526844024658, - "587": 0.41215798258781433, - "588": 0.3642028272151947, - "589": 0.38623979687690735, - "590": 0.4369104504585266, - "591": 0.5044211149215698, - "592": 0.5352712869644165, - "593": 0.5642505884170532 - }, - "loss": { - "540": 2.5873799324035645, - "541": 2.561875820159912, - "542": 2.588291645050049, - "543": 2.591524600982666, - "544": 2.5904970169067383, - "545": 2.6010489463806152, - "546": 2.619211196899414, - "547": 2.5873899459838867, - "548": 2.5964908599853516, - "549": 2.580134868621826, - "550": 2.5930261611938477, - "551": 2.5900847911834717, - "552": 2.5755648612976074, - "553": 2.5766239166259766, - "554": 2.5866951942443848, - "555": 2.5667924880981445, - "556": 2.6079423427581787, - "557": 2.5970170497894287, - "558": 2.594780683517456, - "559": 2.583996057510376, - "560": 2.5805587768554688, - "561": 2.559217929840088, - "562": 2.5547826290130615, - "563": 2.560436248779297, - "564": 2.5935473442077637, - "565": 2.540757656097412, - "566": 2.5761923789978027, - "567": 2.565190315246582, - "568": 2.613154888153076, - "569": 2.5507757663726807, - "570": 2.5534820556640625, - "571": 2.543264150619507, - "572": 2.5765066146850586, - "573": 2.5365328788757324, - "574": 2.562178134918213, - "575": 2.533876419067383, - "576": 2.5681610107421875, - "577": 2.5384161472320557, - "578": 2.538158416748047, - "579": 2.5635013580322266, - "580": 2.580650568008423, - "581": 2.5635955333709717, - "582": 2.533137321472168, - "583": 2.518244981765747, - "584": 2.584805727005005, - "585": 2.62091326713562, - "586": 2.6482770442962646, - "587": 2.5014262199401855, - "588": 2.4759087562561035, - "589": 2.505929470062256, - "590": 2.501068353652954, - "591": 2.498375415802002, - "592": 2.5276522636413574, - "593": 2.505748748779297 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 13.4322, - 13.1521, - 13.1015, - 11.0564, - 10.1836, - 10.1958, - 9.45481, - 9.43433, - 9.92833, - 10.4708, - 10.6762, - 10.7439, - 10.6406, - 10.3664, - 9.28056, - 8.65313, - 8.04088, - 7.12212, - 6.91099, - 7.97601, - 9.17564, - 10.4964, - 10.9995, - 8.6585, - 8.41912, - 9.28709, - 7.98561, - 6.00532, - 6.35967, - 8.14904, - 9.54509, - 7.73422, - 7.57066, - 7.17012, - 7.36494, - 8.10629, - 8.87255, - 9.49631, - 8.31304, - 6.4796, - 6.76161, - 8.16901, - 9.35396, - 6.75703, - 4.69399, - 3.11575, - 6.74818, - 14.7252, - 18.6659, - 16.7979, - 13.1021, - 9.81909, - 8.82205, - 7.87034 - ], - "train_epoch_time": 5.054889917373657, - "train_loss": 2.532563376939793, - "train_score": 0.2531956151576774, - "val_loss": 2.5702949673382203, - "val_score": 0.244730374826082 - }, - { - "epoch": 11, - "grad_norm": 0.39967411756515503, - "learning_rate": 1.0, - "model_norm": 87.97118377685547, - "step_logs": { - "grad_norm": { - "594": 0.5848799347877502, - "595": 0.5964754223823547, - "596": 0.5421091914176941, - "597": 0.5192058086395264, - "598": 0.5717551708221436, - "599": 0.7164661288261414, - "600": 0.6851852536201477, - "601": 0.6123928427696228, - "602": 0.5692559480667114, - "603": 0.5325015783309937, - "604": 0.5284161567687988, - "605": 0.5314343571662903, - "606": 0.46375802159309387, - "607": 0.38732707500457764, - "608": 0.39238420128822327, - "609": 0.4661814272403717, - "610": 0.5565862059593201, - "611": 0.622113823890686, - "612": 0.662469208240509, - "613": 0.7418915629386902, - "614": 0.695763349533081, - "615": 0.4969223141670227, - "616": 0.3927744925022125, - "617": 0.38481831550598145, - "618": 0.40111711621284485, - "619": 0.44783347845077515, - "620": 0.47042691707611084, - "621": 0.4632425606250763, - "622": 0.4563848674297333, - "623": 0.4969564974308014, - "624": 0.5564669966697693, - "625": 0.553923487663269, - "626": 0.5589980483055115, - "627": 0.5743780136108398, - "628": 0.5629103183746338, - "629": 0.530710756778717, - "630": 0.5712909698486328, - "631": 0.601861298084259, - "632": 0.5945495963096619, - "633": 0.5405967831611633, - "634": 0.49745386838912964, - "635": 0.4777105152606964, - "636": 0.4817655384540558, - "637": 0.5321722030639648, - "638": 0.5701903700828552, - "639": 0.638569176197052, - "640": 0.6125555634498596, - "641": 0.5752057433128357, - "642": 0.6520606279373169, - "643": 0.7927195429801941, - "644": 0.7485238313674927, - "645": 0.6046181321144104, - "646": 0.4587094187736511, - "647": 0.39967411756515503 - }, - "loss": { - "594": 2.5547661781311035, - "595": 2.516893148422241, - "596": 2.5100326538085938, - "597": 2.508277177810669, - "598": 2.5054330825805664, - "599": 2.520761489868164, - "600": 2.587801694869995, - "601": 2.535466194152832, - "602": 2.5401253700256348, - "603": 2.508542537689209, - "604": 2.4997620582580566, - "605": 2.498105764389038, - "606": 2.512206554412842, - "607": 2.4507334232330322, - "608": 2.461714267730713, - "609": 2.4511613845825195, - "610": 2.4950764179229736, - "611": 2.518521547317505, - "612": 2.532031536102295, - "613": 2.5306496620178223, - "614": 2.5719969272613525, - "615": 2.5061893463134766, - "616": 2.478519916534424, - "617": 2.4409000873565674, - "618": 2.4431161880493164, - "619": 2.4791605472564697, - "620": 2.461848020553589, - "621": 2.4595818519592285, - "622": 2.4788684844970703, - "623": 2.4791648387908936, - "624": 2.492696762084961, - "625": 2.4767537117004395, - "626": 2.481961727142334, - "627": 2.492222785949707, - "628": 2.4911341667175293, - "629": 2.4656450748443604, - "630": 2.474794387817383, - "631": 2.498361110687256, - "632": 2.497685432434082, - "633": 2.489813804626465, - "634": 2.4618020057678223, - "635": 2.4729981422424316, - "636": 2.436277389526367, - "637": 2.4613890647888184, - "638": 2.476503372192383, - "639": 2.478748083114624, - "640": 2.519442319869995, - "641": 2.481924057006836, - "642": 2.4866180419921875, - "643": 2.5106842517852783, - "644": 2.5524468421936035, - "645": 2.4838905334472656, - "646": 2.4614758491516113, - "647": 2.432997465133667 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 7.46823, - 7.07424, - 8.54094, - 9.30457, - 7.66413, - 4.91067, - 5.51207, - 6.76079, - 7.83863, - 8.84667, - 8.95255, - 8.84528, - 11.6808, - 16.3358, - 15.9888, - 11.2788, - 8.05414, - 6.50738, - 5.76949, - 4.59781, - 5.31309, - 10.1493, - 16.0659, - 16.4831, - 15.1845, - 12.3615, - 11.1244, - 11.4616, - 11.9012, - 10.0385, - 8.0499, - 8.07204, - 7.94282, - 7.55425, - 7.86174, - 8.75417, - 7.58271, - 6.89703, - 7.0658, - 8.51962, - 9.94827, - 10.8366, - 10.4968, - 8.69112, - 7.61727, - 6.07878, - 6.7145, - 7.5014, - 5.84835, - 3.99533, - 4.5556, - 6.7947, - 11.6982, - 15.231 - ], - "train_epoch_time": 5.055654525756836, - "train_loss": 2.439288756495056, - "train_score": 0.2868398044424043, - "val_loss": 2.482028426861243, - "val_score": 0.27646024663741225 - }, - { - "epoch": 12, - "grad_norm": 0.29364854097366333, - "learning_rate": 1.0, - "model_norm": 88.0263442993164, - "step_logs": { - "grad_norm": { - "648": 0.42541980743408203, - "649": 0.4486834406852722, - "650": 0.45945242047309875, - "651": 0.49960291385650635, - "652": 0.5295918583869934, - "653": 0.48951247334480286, - "654": 0.4583229422569275, - "655": 0.4312009811401367, - "656": 0.44353899359703064, - "657": 0.4767831861972809, - "658": 0.5039674639701843, - "659": 0.4627559185028076, - "660": 0.40119317173957825, - "661": 0.4087030291557312, - "662": 0.4404308497905731, - "663": 0.44678232073783875, - "664": 0.4529418349266052, - "665": 0.46593615412712097, - "666": 0.44451114535331726, - "667": 0.38647735118865967, - "668": 0.40457549691200256, - "669": 0.46550610661506653, - "670": 0.4510818421840668, - "671": 0.38932713866233826, - "672": 0.38247546553611755, - "673": 0.3675960898399353, - "674": 0.3833872079849243, - "675": 0.4027199149131775, - "676": 0.3951297700405121, - "677": 0.38941389322280884, - "678": 0.3857887387275696, - "679": 0.39228710532188416, - "680": 0.3770066797733307, - "681": 0.3638162612915039, - "682": 0.2915111184120178, - "683": 0.22512826323509216, - "684": 0.23857611417770386, - "685": 0.24426469206809998, - "686": 0.2337995320558548, - "687": 0.2389509081840515, - "688": 0.27739131450653076, - "689": 0.37564846873283386, - "690": 0.33689069747924805, - "691": 0.31788650155067444, - "692": 0.27095070481300354, - "693": 0.2696477174758911, - "694": 0.2522718608379364, - "695": 0.22648663818836212, - "696": 0.1982397884130478, - "697": 0.1943386346101761, - "698": 0.18440209329128265, - "699": 0.17222952842712402, - "700": 0.2079312950372696, - "701": 0.29364854097366333 - }, - "loss": { - "648": 2.470357656478882, - "649": 2.4453601837158203, - "650": 2.449160099029541, - "651": 2.4454240798950195, - "652": 2.460860013961792, - "653": 2.4324779510498047, - "654": 2.436105251312256, - "655": 2.4385781288146973, - "656": 2.4207332134246826, - "657": 2.417686939239502, - "658": 2.447439670562744, - "659": 2.4163479804992676, - "660": 2.4013919830322266, - "661": 2.4144887924194336, - "662": 2.4173641204833984, - "663": 2.411128520965576, - "664": 2.4195351600646973, - "665": 2.4303739070892334, - "666": 2.418672800064087, - "667": 2.410890579223633, - "668": 2.38808012008667, - "669": 2.4081008434295654, - "670": 2.4407973289489746, - "671": 2.396388530731201, - "672": 2.377713918685913, - "673": 2.3852338790893555, - "674": 2.387624979019165, - "675": 2.386573314666748, - "676": 2.386322498321533, - "677": 2.39239764213562, - "678": 2.4039902687072754, - "679": 2.3619823455810547, - "680": 2.373314380645752, - "681": 2.3773021697998047, - "682": 2.4054088592529297, - "683": 2.3658714294433594, - "684": 2.3728911876678467, - "685": 2.3722269535064697, - "686": 2.36580228805542, - "687": 2.3664608001708984, - "688": 2.3748106956481934, - "689": 2.3627021312713623, - "690": 2.3584699630737305, - "691": 2.384561777114868, - "692": 2.3721556663513184, - "693": 2.3655025959014893, - "694": 2.3643574714660645, - "695": 2.3771610260009766, - "696": 2.3603529930114746, - "697": 2.344078540802002, - "698": 2.361901044845581, - "699": 2.3433241844177246, - "700": 2.369680643081665, - "701": 2.346346855163574 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 13.6497, - 12.1468, - 11.6021, - 9.79725, - 8.77413, - 10.1513, - 11.5972, - 13.1153, - 12.305, - 10.6355, - 9.63623, - 11.2838, - 14.9196, - 14.4547, - 12.462, - 12.0789, - 11.7936, - 11.1949, - 12.2409, - 16.141, - 14.5898, - 11.1128, - 11.9956, - 15.8099, - 16.2537, - 17.6518, - 16.2439, - 14.7153, - 15.2844, - 15.7765, - 16.1523, - 15.3486, - 16.6977, - 17.9606, - 28.306, - 46.68, - 41.6892, - 39.7589, - 43.2804, - 41.4459, - 30.8634, - 16.7435, - 20.7803, - 23.5974, - 32.3119, - 32.5334, - 37.1514, - 46.3419, - 60.0614, - 62.066, - 69.4592, - 78.9982, - 54.8088, - 27.2105 - ], - "train_epoch_time": 5.059138059616089, - "train_loss": 2.3581412921505986, - "train_score": 0.3145422346195155, - "val_loss": 2.3948335491557073, - "val_score": 0.30483549802503135 - }, - { - "epoch": 13, - "grad_norm": 0.25887343287467957, - "learning_rate": 0.6666666666666667, - "model_norm": 88.05181121826172, - "step_logs": { - "grad_norm": { - "702": 0.2517961263656616, - "703": 0.2711848020553589, - "704": 0.28986310958862305, - "705": 0.387401282787323, - "706": 0.31486833095550537, - "707": 0.2716244161128998, - "708": 0.2819242477416992, - "709": 0.29586946964263916, - "710": 0.2761354446411133, - "711": 0.2607029974460602, - "712": 0.24697215855121613, - "713": 0.24466226994991302, - "714": 0.24863287806510925, - "715": 0.4027062654495239, - "716": 0.5951622724533081, - "717": 0.5314468741416931, - "718": 0.3024754226207733, - "719": 0.1868147999048233, - "720": 0.16853132843971252, - "721": 0.15246227383613586, - "722": 0.14348968863487244, - "723": 0.1538223773241043, - "724": 0.16693183779716492, - "725": 0.1562102735042572, - "726": 0.15216726064682007, - "727": 0.14588244259357452, - "728": 0.1636030673980713, - "729": 0.17198902368545532, - "730": 0.18787144124507904, - "731": 0.1787651926279068, - "732": 0.21472664177417755, - "733": 0.1897151619195938, - "734": 0.20950596034526825, - "735": 0.21321755647659302, - "736": 0.18100839853286743, - "737": 0.1854843646287918, - "738": 0.20567253232002258, - "739": 0.21263569593429565, - "740": 0.19190005958080292, - "741": 0.19911877810955048, - "742": 0.16771019995212555, - "743": 0.16397693753242493, - "744": 0.17830683290958405, - "745": 0.14768682420253754, - "746": 0.15644720196723938, - "747": 0.18825316429138184, - "748": 0.1873304843902588, - "749": 0.1808556467294693, - "750": 0.17900241911411285, - "751": 0.17609266936779022, - "752": 0.20888206362724304, - "753": 0.19842274487018585, - "754": 0.20021870732307434, - "755": 0.25887343287467957 - }, - "loss": { - "702": 2.3515613079071045, - "703": 2.352108955383301, - "704": 2.3576269149780273, - "705": 2.353095054626465, - "706": 2.3777709007263184, - "707": 2.348574161529541, - "708": 2.3504819869995117, - "709": 2.333197593688965, - "710": 2.346602439880371, - "711": 2.35115122795105, - "712": 2.342665195465088, - "713": 2.346747875213623, - "714": 2.3737337589263916, - "715": 2.3537206649780273, - "716": 2.35170578956604, - "717": 2.3858892917633057, - "718": 2.368903160095215, - "719": 2.35884428024292, - "720": 2.3632116317749023, - "721": 2.3307394981384277, - "722": 2.347752094268799, - "723": 2.3527705669403076, - "724": 2.363231897354126, - "725": 2.349132537841797, - "726": 2.32879638671875, - "727": 2.3327298164367676, - "728": 2.3728489875793457, - "729": 2.3306446075439453, - "730": 2.3423075675964355, - "731": 2.349453926086426, - "732": 2.3523170948028564, - "733": 2.335592269897461, - "734": 2.348813056945801, - "735": 2.351442813873291, - "736": 2.317401885986328, - "737": 2.341111898422241, - "738": 2.326826572418213, - "739": 2.3484549522399902, - "740": 2.3195347785949707, - "741": 2.3290226459503174, - "742": 2.3366763591766357, - "743": 2.333981513977051, - "744": 2.343405246734619, - "745": 2.326702117919922, - "746": 2.3418684005737305, - "747": 2.32875394821167, - "748": 2.3379621505737305, - "749": 2.3344545364379883, - "750": 2.3377208709716797, - "751": 2.3253703117370605, - "752": 2.3394603729248047, - "753": 2.3347830772399902, - "754": 2.328230381011963, - "755": 2.326993227005005 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 37.0901, - 31.9836, - 28.0601, - 15.679, - 23.9835, - 31.8322, - 29.5728, - 26.6533, - 30.7748, - 34.593, - 38.4073, - 39.2042, - 38.3986, - 14.5137, - 6.63915, - 8.44755, - 25.8921, - 67.5891, - 83.2034, - 100.27, - 114.028, - 99.4353, - 84.8063, - 96.2694, - 100.575, - 109.612, - 88.6517, - 78.7906, - 66.3624, - 73.5192, - 51.0181, - 64.8923, - 53.5126, - 51.7236, - 70.73, - 68.0468, - 55.0062, - 51.941, - 62.987, - 58.7421, - 83.0768, - 86.8024, - 73.7075, - 106.674, - 95.6813, - 65.7111, - 66.6224, - 71.3709, - 72.9583, - 74.9911, - 53.6183, - 59.3012, - 58.0787, - 34.7232 - ], - "train_epoch_time": 5.059368371963501, - "train_loss": 2.331776088074256, - "train_score": 0.31920283353824697, - "val_loss": 2.369336447677437, - "val_score": 0.3103024545890182 - }, - { - "epoch": 14, - "grad_norm": 0.14123967289924622, - "learning_rate": 0.33333333333333337, - "model_norm": 88.06018829345703, - "step_logs": { - "grad_norm": { - "756": 0.16919466853141785, - "757": 0.1527901291847229, - "758": 0.16814734041690826, - "759": 0.14379505813121796, - "760": 0.15826955437660217, - "761": 0.16517005860805511, - "762": 0.17118597030639648, - "763": 0.17684140801429749, - "764": 0.2197951078414917, - "765": 0.1795675903558731, - "766": 0.15357689559459686, - "767": 0.15348678827285767, - "768": 0.1800214797258377, - "769": 0.1680680364370346, - "770": 0.15507693588733673, - "771": 0.16153189539909363, - "772": 0.13893342018127441, - "773": 0.15212024748325348, - "774": 0.16563336551189423, - "775": 0.21204416453838348, - "776": 0.2171325534582138, - "777": 0.17124004662036896, - "778": 0.15722417831420898, - "779": 0.1687515527009964, - "780": 0.20852503180503845, - "781": 0.1994674801826477, - "782": 0.1389341801404953, - "783": 0.18625572323799133, - "784": 0.16316533088684082, - "785": 0.15010342001914978, - "786": 0.17625698447227478, - "787": 0.14616595208644867, - "788": 0.15358756482601166, - "789": 0.1764170080423355, - "790": 0.16849461197853088, - "791": 0.1559830605983734, - "792": 0.1519165337085724, - "793": 0.1520063877105713, - "794": 0.16567540168762207, - "795": 0.1518193483352661, - "796": 0.13931231200695038, - "797": 0.15041916072368622, - "798": 0.13906803727149963, - "799": 0.13872286677360535, - "800": 0.15496313571929932, - "801": 0.14736458659172058, - "802": 0.15293791890144348, - "803": 0.1286572813987732, - "804": 0.15545165538787842, - "805": 0.14150285720825195, - "806": 0.12946298718452454, - "807": 0.14146363735198975, - "808": 0.14373642206192017, - "809": 0.14123967289924622 - }, - "loss": { - "756": 2.3208799362182617, - "757": 2.323700189590454, - "758": 2.3460261821746826, - "759": 2.3306610584259033, - "760": 2.2934744358062744, - "761": 2.3358302116394043, - "762": 2.3395979404449463, - "763": 2.322214126586914, - "764": 2.322810649871826, - "765": 2.320448875427246, - "766": 2.3309519290924072, - "767": 2.330899715423584, - "768": 2.3207345008850098, - "769": 2.3087668418884277, - "770": 2.3470306396484375, - "771": 2.3387796878814697, - "772": 2.3258748054504395, - "773": 2.3206069469451904, - "774": 2.3296265602111816, - "775": 2.325906753540039, - "776": 2.3431174755096436, - "777": 2.336763381958008, - "778": 2.303090810775757, - "779": 2.329092502593994, - "780": 2.312742233276367, - "781": 2.3172004222869873, - "782": 2.324556827545166, - "783": 2.32741117477417, - "784": 2.336142063140869, - "785": 2.312987804412842, - "786": 2.3157477378845215, - "787": 2.3127593994140625, - "788": 2.3509538173675537, - "789": 2.317056179046631, - "790": 2.328007936477661, - "791": 2.322556734085083, - "792": 2.3323583602905273, - "793": 2.321770668029785, - "794": 2.318619728088379, - "795": 2.3258066177368164, - "796": 2.3157896995544434, - "797": 2.3239922523498535, - "798": 2.326474905014038, - "799": 2.313377857208252, - "800": 2.346735715866089, - "801": 2.326061964035034, - "802": 2.3059816360473633, - "803": 2.311988353729248, - "804": 2.327362060546875, - "805": 2.318028211593628, - "806": 2.3194198608398438, - "807": 2.3261709213256836, - "808": 2.311486005783081, - "809": 2.333019256591797 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 81.0736, - 99.5381, - 82.9761, - 112.717, - 91.5586, - 85.6207, - 79.8371, - 74.2565, - 48.0815, - 71.9641, - 98.8283, - 98.9422, - 71.6105, - 81.7353, - 97.5943, - 89.634, - 120.496, - 100.283, - 84.9162, - 51.7297, - 49.6986, - 79.69, - 93.1692, - 81.7883, - 53.1877, - 58.2397, - 120.426, - 67.0894, - 87.7493, - 102.658, - 74.5416, - 108.253, - 99.6626, - 74.4484, - 81.9997, - 95.4578, - 101.061, - 100.484, - 84.4721, - 100.907, - 119.322, - 102.714, - 120.294, - 120.213, - 97.7254, - 107.111, - 98.5883, - 139.674, - 96.3104, - 115.768, - 138.385, - 116.239, - 111.881, - 116.951 - ], - "train_epoch_time": 5.06749415397644, - "train_loss": 2.3210341138169275, - "train_score": 0.3224399210048714, - "val_loss": 2.361155847731195, - "val_score": 0.3132175665293167 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:35:56.893715", - "final_model_norm": 88.06018829345703, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:34:11.873439", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "prox-sps", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 16.325838088989258, - "learning_rate": 1e-10, - "model_norm": 87.34168243408203, - "step_logs": { - "grad_norm": { - "0": 22.7664794921875, - "1": 23.4499454498291, - "2": 6.298552989959717, - "3": 7.566888809204102, - "4": 14.49585247039795, - "5": 5.620354175567627, - "6": 10.216382026672363, - "7": 4.534229755401611, - "8": 11.747849464416504, - "9": 5.449873924255371, - "10": 13.424872398376465, - "11": 6.095279216766357, - "12": 11.157068252563477, - "13": 5.757944107055664, - "14": 10.918036460876465, - "15": 4.400775909423828, - "16": 20.614965438842773, - "17": 5.2823591232299805, - "18": 20.4493465423584, - "19": 21.37148666381836, - "20": 24.3276424407959, - "21": 3.937091112136841, - "22": 11.294756889343262, - "23": 19.098108291625977, - "24": 15.733933448791504, - "25": 5.697427749633789, - "26": 16.87322425842285, - "27": 7.170352935791016, - "28": 10.589845657348633, - "29": 4.0917582511901855, - "30": 17.086423873901367, - "31": 6.23681640625, - "32": 23.630807876586914, - "33": 3.5906713008880615, - "34": 19.72943115234375, - "35": 10.054206848144531, - "36": 8.447721481323242, - "37": 5.513556957244873, - "38": 16.578889846801758, - "39": 4.4890360832214355, - "40": 16.984851837158203, - "41": 13.220318794250488, - "42": 4.785301208496094, - "43": 16.19448471069336, - "44": 12.653915405273438, - "45": 3.955620288848877, - "46": 16.038116455078125, - "47": 14.444923400878906, - "48": 7.361199855804443, - "49": 9.815719604492188, - "50": 17.361591339111328, - "51": 4.043751239776611, - "52": 18.901411056518555, - "53": 16.325838088989258 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.7870278358459473, - "3": 4.075554847717285, - "4": 4.519266128540039, - "5": 4.4081573486328125, - "6": 4.090362548828125, - "7": 4.264717102050781, - "8": 4.81776237487793, - "9": 4.620635509490967, - "10": 5.196425914764404, - "11": 4.694077491760254, - "12": 5.102016448974609, - "13": 5.162631034851074, - "14": 4.536558151245117, - "15": 4.507107734680176, - "16": 7.70632266998291, - "17": 7.171968460083008, - "18": 6.168941974639893, - "19": 5.053447723388672, - "20": 5.458719253540039, - "21": 3.8100318908691406, - "22": 4.792364597320557, - "23": 5.147730827331543, - "24": 5.587794303894043, - "25": 4.857039928436279, - "26": 7.223695278167725, - "27": 4.380226135253906, - "28": 4.780410289764404, - "29": 4.617242813110352, - "30": 8.140088081359863, - "31": 7.500243663787842, - "32": 5.915950775146484, - "33": 4.088742733001709, - "34": 5.866542816162109, - "35": 6.472012996673584, - "36": 4.977142810821533, - "37": 4.597317218780518, - "38": 6.1798930168151855, - "39": 3.996333122253418, - "40": 8.745774269104004, - "41": 5.304678916931152, - "42": 4.416441917419434, - "43": 7.424479007720947, - "44": 5.2670793533325195, - "45": 4.364896297454834, - "46": 8.071239471435547, - "47": 6.173081874847412, - "48": 4.716697692871094, - "49": 4.693948268890381, - "50": 5.485614776611328, - "51": 4.669734001159668, - "52": 8.000372886657715, - "53": 5.641817569732666 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 0.00874362, - 0.00824185, - 0.095459, - 0.071179, - 0.021507, - 0.13955, - 0.0391893, - 0.207436, - 0.0349083, - 0.155571, - 0.0288326, - 0.126346, - 0.0409866, - 0.155717, - 0.0380572, - 0.232723, - 0.0181335, - 0.257029, - 0.014752, - 0.0110642, - 0.00922339, - 0.245798, - 0.0375661, - 0.0141135, - 0.0225718, - 0.149628, - 0.0253725, - 0.0851953, - 0.0426271, - 0.27578, - 0.0278822, - 0.192819, - 0.0105942, - 0.317131, - 0.0150714, - 0.0640241, - 0.0697431, - 0.151231, - 0.0224838, - 0.198315, - 0.0303162, - 0.0303512, - 0.192865, - 0.0283095, - 0.0328943, - 0.278962, - 0.0313786, - 0.029585, - 0.0870444, - 0.0487185, - 0.0181989, - 0.285577, - 0.0223935, - 0.0211674 - ], - "train_epoch_time": 5.057394027709961, - "train_loss": 4.25890385191273, - "train_score": 0.13767485653225284, - "val_loss": 4.278916929400474, - "val_score": 0.13572312698161698 - }, - { - "epoch": 1, - "grad_norm": 3.087127685546875, - "learning_rate": 1.0, - "model_norm": 87.36463165283203, - "step_logs": { - "grad_norm": { - "54": 4.851062774658203, - "55": 4.642487049102783, - "56": 13.736299514770508, - "57": 7.214140892028809, - "58": 6.71858549118042, - "59": 3.9326348304748535, - "60": 5.912207126617432, - "61": 7.219697952270508, - "62": 5.136228084564209, - "63": 6.090472221374512, - "64": 5.010668754577637, - "65": 3.2521190643310547, - "66": 8.625386238098145, - "67": 3.8423526287078857, - "68": 8.762495040893555, - "69": 10.446293830871582, - "70": 4.44512414932251, - "71": 8.88054084777832, - "72": 5.5230021476745605, - "73": 2.6638336181640625, - "74": 4.368422985076904, - "75": 7.9679107666015625, - "76": 4.711985111236572, - "77": 1.975710153579712, - "78": 11.510388374328613, - "79": 4.894310474395752, - "80": 2.6999690532684326, - "81": 4.465037822723389, - "82": 3.6896812915802, - "83": 2.1784634590148926, - "84": 4.675506114959717, - "85": 3.7070164680480957, - "86": 3.475722551345825, - "87": 4.807565212249756, - "88": 1.534299612045288, - "89": 7.62632417678833, - "90": 2.80254864692688, - "91": 7.808013916015625, - "92": 1.3819202184677124, - "93": 5.648556232452393, - "94": 2.7121543884277344, - "95": 7.299947738647461, - "96": 5.433496475219727, - "97": 1.0458855628967285, - "98": 4.237065315246582, - "99": 1.5481511354446411, - "100": 3.3716747760772705, - "101": 2.228264570236206, - "102": 1.277934193611145, - "103": 2.9510915279388428, - "104": 3.282909393310547, - "105": 1.9327986240386963, - "106": 1.456048607826233, - "107": 3.087127685546875 - }, - "loss": { - "54": 4.254249572753906, - "55": 4.582394123077393, - "56": 4.7759904861450195, - "57": 4.368451118469238, - "58": 4.007016181945801, - "59": 4.331964015960693, - "60": 4.342117786407471, - "61": 4.68231201171875, - "62": 4.248996734619141, - "63": 4.268795490264893, - "64": 4.463672637939453, - "65": 4.019813537597656, - "66": 5.721035957336426, - "67": 5.101027965545654, - "68": 4.912322044372559, - "69": 4.708672523498535, - "70": 3.6658873558044434, - "71": 4.833868026733398, - "72": 4.2417497634887695, - "73": 3.8356680870056152, - "74": 3.903273582458496, - "75": 4.723843574523926, - "76": 4.217850685119629, - "77": 3.323133945465088, - "78": 6.23946475982666, - "79": 4.575232982635498, - "80": 3.8332982063293457, - "81": 3.989940643310547, - "82": 4.012438774108887, - "83": 3.6906023025512695, - "84": 4.02376651763916, - "85": 4.2337493896484375, - "86": 3.80649995803833, - "87": 4.055908203125, - "88": 3.8032517433166504, - "89": 4.17458438873291, - "90": 3.67374849319458, - "91": 4.98996639251709, - "92": 3.4046430587768555, - "93": 4.4559502601623535, - "94": 4.028538227081299, - "95": 4.399440288543701, - "96": 3.4790291786193848, - "97": 3.110877513885498, - "98": 3.7854416370391846, - "99": 3.4571709632873535, - "100": 3.4675159454345703, - "101": 3.7974157333374023, - "102": 3.13344144821167, - "103": 3.4828577041625977, - "104": 4.106999397277832, - "105": 3.4190545082092285, - "106": 3.7496399879455566, - "107": 3.676380157470703 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.18078, - 0.212614, - 0.0253119, - 0.0839379, - 0.0887698, - 0.280103, - 0.124223, - 0.0898302, - 0.161064, - 0.115081, - 0.177787, - 0.380078, - 0.0768984, - 0.345512, - 0.0639781, - 0.0431493, - 0.185529, - 0.0612937, - 0.139058, - 0.540539, - 0.204541, - 0.0744058, - 0.189969, - 0.851337, - 0.0470942, - 0.190999, - 0.525842, - 0.200132, - 0.294734, - 0.777672, - 0.184067, - 0.308089, - 0.315091, - 0.175484, - 1.6156, - 0.0717766, - 0.467738, - 0.0818496, - 1.78281, - 0.139658, - 0.54767, - 0.0825578, - 0.117842, - 2.8439, - 0.210856, - 1.44243, - 0.305019, - 0.764812, - 1.91869, - 0.399917, - 0.381072, - 0.915235, - 1.76863, - 0.385755 - ], - "train_epoch_time": 5.054786443710327, - "train_loss": 3.416832287308133, - "train_score": 0.16706756639856862, - "val_loss": 3.4741060314988705, - "val_score": 0.16689240121540327 - }, - { - "epoch": 2, - "grad_norm": 3.008190393447876, - "learning_rate": 1.0, - "model_norm": 87.41412353515625, - "step_logs": { - "grad_norm": { - "108": 1.5771931409835815, - "109": 1.3276400566101074, - "110": 1.6606084108352661, - "111": 3.2099571228027344, - "112": 1.7904987335205078, - "113": 5.71084451675415, - "114": 2.318511486053467, - "115": 2.369474411010742, - "116": 2.9814114570617676, - "117": 1.5239108800888062, - "118": 3.0710394382476807, - "119": 1.6041967868804932, - "120": 5.355349063873291, - "121": 2.286020040512085, - "122": 4.396191596984863, - "123": 2.8245620727539062, - "124": 2.5127451419830322, - "125": 1.7162646055221558, - "126": 6.4136810302734375, - "127": 1.796766757965088, - "128": 7.797423839569092, - "129": 1.0371285676956177, - "130": 4.269420146942139, - "131": 3.9408934116363525, - "132": 1.033205509185791, - "133": 1.4431287050247192, - "134": 2.451185703277588, - "135": 3.02068829536438, - "136": 0.9753559827804565, - "137": 3.2207963466644287, - "138": 2.112476110458374, - "139": 1.313253402709961, - "140": 5.322528839111328, - "141": 1.7499157190322876, - "142": 6.138791084289551, - "143": 3.267159938812256, - "144": 3.9819841384887695, - "145": 1.6480112075805664, - "146": 4.6178436279296875, - "147": 2.288015365600586, - "148": 3.5858347415924072, - "149": 1.1324995756149292, - "150": 1.6467430591583252, - "151": 2.653310775756836, - "152": 2.111180305480957, - "153": 3.752389669418335, - "154": 1.9792944192886353, - "155": 3.4413907527923584, - "156": 3.5163180828094482, - "157": 1.061113715171814, - "158": 3.7987544536590576, - "159": 1.3852602243423462, - "160": 1.8876944780349731, - "161": 3.008190393447876 - }, - "loss": { - "108": 3.391155242919922, - "109": 3.254826068878174, - "110": 3.4949028491973877, - "111": 4.522764682769775, - "112": 3.1810269355773926, - "113": 4.352084159851074, - "114": 3.6936278343200684, - "115": 3.3618383407592773, - "116": 4.287606239318848, - "117": 3.4038262367248535, - "118": 3.589904308319092, - "119": 3.4578728675842285, - "120": 3.757467031478882, - "121": 3.3929529190063477, - "122": 3.8742623329162598, - "123": 3.7166049480438232, - "124": 3.2067177295684814, - "125": 3.4688572883605957, - "126": 4.05441951751709, - "127": 3.249584674835205, - "128": 5.004021644592285, - "129": 2.9070515632629395, - "130": 3.653980255126953, - "131": 3.5371079444885254, - "132": 3.0327553749084473, - "133": 2.936931610107422, - "134": 3.98585844039917, - "135": 3.6089138984680176, - "136": 3.0453946590423584, - "137": 3.212122917175293, - "138": 3.414034605026245, - "139": 2.92160701751709, - "140": 4.298502445220947, - "141": 3.044149398803711, - "142": 4.040562629699707, - "143": 3.4205234050750732, - "144": 3.7751450538635254, - "145": 3.0437707901000977, - "146": 4.015471458435059, - "147": 3.212235450744629, - "148": 3.880786418914795, - "149": 2.9279117584228516, - "150": 2.974810838699341, - "151": 3.8245153427124023, - "152": 3.2268872261047363, - "153": 3.343679666519165, - "154": 3.5337448120117188, - "155": 3.7032947540283203, - "156": 3.5217983722686768, - "157": 2.9809324741363525, - "158": 3.4148528575897217, - "159": 2.962611198425293, - "160": 3.130906820297241, - "161": 4.3312482833862305 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 1.36326, - 1.84658, - 1.26736, - 0.43894, - 0.992246, - 0.133443, - 0.687123, - 0.598788, - 0.48236, - 1.46571, - 0.380638, - 1.34367, - 0.131015, - 0.649259, - 0.200464, - 0.465848, - 0.507883, - 1.17765, - 0.098563, - 1.00657, - 0.0823032, - 2.70264, - 0.20046, - 0.22775, - 2.84095, - 1.41021, - 0.663391, - 0.395517, - 3.20123, - 0.309646, - 0.76504, - 1.69404, - 0.151733, - 0.994104, - 0.10722, - 0.320444, - 0.238086, - 1.12071, - 0.188303, - 0.613606, - 0.301814, - 2.28287, - 1.097, - 0.543251, - 0.723991, - 0.23747, - 0.902016, - 0.312695, - 0.284832, - 2.64745, - 0.236641, - 1.54387, - 0.878631, - 0.478633 - ], - "train_epoch_time": 5.055427312850952, - "train_loss": 2.80425652438291, - "train_score": 0.22392844333710255, - "val_loss": 2.829315626251711, - "val_score": 0.2205663392519157 - }, - { - "epoch": 3, - "grad_norm": 7.985816478729248, - "learning_rate": 1.0, - "model_norm": 87.48208618164062, - "step_logs": { - "grad_norm": { - "162": 0.739536464214325, - "163": 9.031047821044922, - "164": 0.47472620010375977, - "165": 1.0238107442855835, - "166": 1.0248939990997314, - "167": 0.8922759294509888, - "168": 0.8334680795669556, - "169": 1.789474606513977, - "170": 1.6453161239624023, - "171": 5.625613212585449, - "172": 1.9424501657485962, - "173": 14.148873329162598, - "174": 2.742997646331787, - "175": 1.8475837707519531, - "176": 1.7735297679901123, - "177": 5.4583659172058105, - "178": 0.9533764123916626, - "179": 2.4805867671966553, - "180": 1.0320863723754883, - "181": 1.922633171081543, - "182": 2.158294439315796, - "183": 7.784940719604492, - "184": 1.870803713798523, - "185": 2.184821605682373, - "186": 10.198567390441895, - "187": 2.120152473449707, - "188": 2.170665740966797, - "189": 2.0228075981140137, - "190": 2.058455467224121, - "191": 3.568654775619507, - "192": 1.2148218154907227, - "193": 0.9665382504463196, - "194": 1.8063281774520874, - "195": 1.1803311109542847, - "196": 10.03723430633545, - "197": 1.332942247390747, - "198": 0.8169476985931396, - "199": 4.852204322814941, - "200": 1.0940513610839844, - "201": 2.2535884380340576, - "202": 1.2635124921798706, - "203": 7.535573482513428, - "204": 1.1998114585876465, - "205": 10.150912284851074, - "206": 1.3481035232543945, - "207": 4.655223369598389, - "208": 1.4760758876800537, - "209": 0.8217176198959351, - "210": 3.4677295684814453, - "211": 1.007032871246338, - "212": 1.2883204221725464, - "213": 1.8062108755111694, - "214": 1.568005919456482, - "215": 7.985816478729248 - }, - "loss": { - "162": 2.7989280223846436, - "163": 4.361610412597656, - "164": 2.6710832118988037, - "165": 2.7454373836517334, - "166": 3.0132532119750977, - "167": 2.839517116546631, - "168": 2.78610897064209, - "169": 2.9087047576904297, - "170": 3.0820183753967285, - "171": 3.928989887237549, - "172": 3.401419162750244, - "173": 7.087863922119141, - "174": 3.383911609649658, - "175": 3.271345615386963, - "176": 3.098524570465088, - "177": 3.9379611015319824, - "178": 2.953331232070923, - "179": 3.0407285690307617, - "180": 3.0467114448547363, - "181": 3.0696702003479004, - "182": 2.9657020568847656, - "183": 4.595019340515137, - "184": 3.3624134063720703, - "185": 2.9694716930389404, - "186": 6.6835503578186035, - "187": 3.152012586593628, - "188": 3.545067310333252, - "189": 3.051173210144043, - "190": 3.091698169708252, - "191": 3.435682773590088, - "192": 3.0081863403320312, - "193": 3.0356650352478027, - "194": 2.8297853469848633, - "195": 2.8922810554504395, - "196": 5.024540901184082, - "197": 2.9263577461242676, - "198": 2.9566190242767334, - "199": 3.381348133087158, - "200": 2.9656076431274414, - "201": 3.047962188720703, - "202": 3.042660713195801, - "203": 3.8212239742279053, - "204": 2.7600789070129395, - "205": 5.395393371582031, - "206": 2.7800631523132324, - "207": 4.111872673034668, - "208": 3.174259662628174, - "209": 2.729217052459717, - "210": 3.371816635131836, - "211": 2.7568399906158447, - "212": 2.8635785579681396, - "213": 3.318913459777832, - "214": 2.949039936065674, - "215": 5.546300411224365 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 5.11767, - 0.0534774, - 11.8523, - 2.61922, - 2.86865, - 3.56653, - 4.0107, - 0.90834, - 1.13851, - 0.124148, - 0.901489, - 0.0354056, - 0.449747, - 0.958337, - 0.985095, - 0.132174, - 3.24925, - 0.494161, - 2.86022, - 0.830422, - 0.636658, - 0.0758188, - 0.960715, - 0.622081, - 0.0642582, - 0.701219, - 0.752382, - 0.745689, - 0.729649, - 0.269776, - 2.03835, - 3.24949, - 0.867282, - 2.07603, - 0.0498733, - 1.64704, - 4.43003, - 0.143619, - 2.47764, - 0.600151, - 1.90588, - 0.067293, - 1.91732, - 0.0523616, - 1.52971, - 0.18974, - 1.45688, - 4.04197, - 0.280397, - 2.71847, - 1.72529, - 1.01732, - 1.19946, - 0.086969 - ], - "train_epoch_time": 5.054608583450317, - "train_loss": 2.8779535056186716, - "train_score": 0.20478165348085817, - "val_loss": 2.9066813320571603, - "val_score": 0.19806167483329773 - }, - { - "epoch": 4, - "grad_norm": 0.5887344479560852, - "learning_rate": 1.0, - "model_norm": 87.54537963867188, - "step_logs": { - "grad_norm": { - "216": 1.05914306640625, - "217": 1.1333253383636475, - "218": 2.3842105865478516, - "219": 0.8865511417388916, - "220": 2.2461509704589844, - "221": 1.1158198118209839, - "222": 4.969597816467285, - "223": 0.861678957939148, - "224": 1.3381102085113525, - "225": 0.9215143918991089, - "226": 5.811474800109863, - "227": 0.886582612991333, - "228": 1.4481470584869385, - "229": 1.5169901847839355, - "230": 4.161715507507324, - "231": 1.1239099502563477, - "232": 1.2122349739074707, - "233": 1.2336012125015259, - "234": 4.419249534606934, - "235": 1.3811534643173218, - "236": 2.5956661701202393, - "237": 2.1354124546051025, - "238": 0.7544280290603638, - "239": 1.8376989364624023, - "240": 1.5926493406295776, - "241": 2.820427417755127, - "242": 0.8177281022071838, - "243": 0.9029402732849121, - "244": 0.9629725813865662, - "245": 1.0313796997070312, - "246": 0.6563055515289307, - "247": 0.39263778924942017, - "248": 0.2830714285373688, - "249": 0.4418735206127167, - "250": 0.7193074226379395, - "251": 1.573967456817627, - "252": 0.8245394825935364, - "253": 1.0902609825134277, - "254": 0.8563145399093628, - "255": 0.5519356727600098, - "256": 0.9002417922019958, - "257": 0.9481765627861023, - "258": 0.5470548868179321, - "259": 0.39079615473747253, - "260": 0.426209956407547, - "261": 0.48951825499534607, - "262": 0.533036470413208, - "263": 0.6724916696548462, - "264": 0.6284416317939758, - "265": 0.4307269752025604, - "266": 0.3624004125595093, - "267": 0.4356800317764282, - "268": 0.5273780822753906, - "269": 0.5887344479560852 - }, - "loss": { - "216": 2.879425525665283, - "217": 2.860900402069092, - "218": 3.128995418548584, - "219": 2.9086966514587402, - "220": 2.8993587493896484, - "221": 2.8802409172058105, - "222": 3.5089950561523438, - "223": 2.8219335079193115, - "224": 2.8240935802459717, - "225": 2.8346526622772217, - "226": 3.576157569885254, - "227": 2.7567503452301025, - "228": 2.7820897102355957, - "229": 3.1573550701141357, - "230": 3.269378185272217, - "231": 2.991978883743286, - "232": 2.734656810760498, - "233": 2.941859006881714, - "234": 3.293694257736206, - "235": 2.987952709197998, - "236": 3.0175209045410156, - "237": 3.4019830226898193, - "238": 2.6543259620666504, - "239": 3.040815830230713, - "240": 2.9467356204986572, - "241": 4.197122573852539, - "242": 2.751098394393921, - "243": 2.747145175933838, - "244": 2.7478253841400146, - "245": 2.8338794708251953, - "246": 2.6444525718688965, - "247": 2.6249399185180664, - "248": 2.593257188796997, - "249": 2.569084405899048, - "250": 2.651834487915039, - "251": 2.801464080810547, - "252": 2.9775736331939697, - "253": 2.734055995941162, - "254": 2.927776336669922, - "255": 2.6600732803344727, - "256": 2.627235174179077, - "257": 2.8831098079681396, - "258": 2.6698570251464844, - "259": 2.5659618377685547, - "260": 2.569091796875, - "261": 2.577655792236328, - "262": 2.594210624694824, - "263": 2.6245179176330566, - "264": 2.700070858001709, - "265": 2.5859293937683105, - "266": 2.5591416358947754, - "267": 2.5564053058624268, - "268": 2.597079038619995, - "269": 2.5908541679382324 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 2.56683, - 2.22738, - 0.550447, - 3.70076, - 0.574677, - 2.31335, - 0.142082, - 3.80063, - 1.57723, - 3.33807, - 0.105887, - 3.50719, - 1.32662, - 1.37201, - 0.188765, - 2.36862, - 1.86093, - 1.93318, - 0.16865, - 1.56635, - 0.447871, - 0.746051, - 4.66357, - 0.900412, - 1.16172, - 0.527621, - 4.11423, - 3.36949, - 2.9632, - 2.66406, - 6.13937, - 17.0269, - 32.3634, - 13.1578, - 5.12528, - 1.13082, - 4.37965, - 2.3001, - 3.99274, - 8.73206, - 3.24176, - 3.20688, - 8.92126, - 16.8016, - 14.1427, - 10.7569, - 9.13043, - 5.80331, - 6.83668, - 13.9384, - 19.4857, - 13.4677, - 9.33772, - 7.47488 - ], - "train_epoch_time": 5.054750919342041, - "train_loss": 2.6228919218739275, - "train_score": 0.23670081599713055, - "val_loss": 2.665089219090038, - "val_score": 0.23133431426351023 - }, - { - "epoch": 5, - "grad_norm": 0.6827788352966309, - "learning_rate": 1.0, - "model_norm": 87.62759399414062, - "step_logs": { - "grad_norm": { - "270": 0.5555106997489929, - "271": 0.4655524492263794, - "272": 0.47277209162712097, - "273": 0.5839066505432129, - "274": 0.6063619256019592, - "275": 0.5545136332511902, - "276": 0.5252218246459961, - "277": 0.5745373368263245, - "278": 0.5235341787338257, - "279": 0.40295249223709106, - "280": 0.4501647651195526, - "281": 0.6298251152038574, - "282": 0.5882461071014404, - "283": 0.421758770942688, - "284": 0.44777485728263855, - "285": 0.5653491020202637, - "286": 0.676089882850647, - "287": 0.6589581370353699, - "288": 0.5578166842460632, - "289": 0.43333059549331665, - "290": 0.4672476351261139, - "291": 0.5668012499809265, - "292": 0.6152086853981018, - "293": 0.6537403464317322, - "294": 0.5736836194992065, - "295": 0.4108264446258545, - "296": 0.43720370531082153, - "297": 0.5664796829223633, - "298": 0.5703925490379333, - "299": 0.510169267654419, - "300": 0.5257142782211304, - "301": 0.6524471640586853, - "302": 0.6611528992652893, - "303": 0.501374363899231, - "304": 0.43720799684524536, - "305": 0.5381279587745667, - "306": 0.7145493030548096, - "307": 0.654240608215332, - "308": 0.537788450717926, - "309": 0.5183095335960388, - "310": 0.5211868286132812, - "311": 0.6392931938171387, - "312": 0.6911410689353943, - "313": 0.48801928758621216, - "314": 0.4185806214809418, - "315": 0.4073719084262848, - "316": 0.4472411572933197, - "317": 1.0167473554611206, - "318": 0.5887118577957153, - "319": 0.615260124206543, - "320": 0.6129778623580933, - "321": 0.6420866847038269, - "322": 0.711050271987915, - "323": 0.6827788352966309 - }, - "loss": { - "270": 2.629457950592041, - "271": 2.576857089996338, - "272": 2.579404354095459, - "273": 2.594836711883545, - "274": 2.625920295715332, - "275": 2.5772459506988525, - "276": 2.5971596240997314, - "277": 2.5823988914489746, - "278": 2.6025447845458984, - "279": 2.556942939758301, - "280": 2.539665699005127, - "281": 2.575623035430908, - "282": 2.6504249572753906, - "283": 2.5649685859680176, - "284": 2.5449976921081543, - "285": 2.556992530822754, - "286": 2.634875774383545, - "287": 2.6135270595550537, - "288": 2.610264539718628, - "289": 2.5462141036987305, - "290": 2.554551839828491, - "291": 2.5557708740234375, - "292": 2.5983874797821045, - "293": 2.5749433040618896, - "294": 2.6319751739501953, - "295": 2.5239522457122803, - "296": 2.5390303134918213, - "297": 2.5517003536224365, - "298": 2.6059937477111816, - "299": 2.5403308868408203, - "300": 2.5613574981689453, - "301": 2.5686450004577637, - "302": 2.6447978019714355, - "303": 2.5433998107910156, - "304": 2.5435404777526855, - "305": 2.532345771789551, - "306": 2.5997838973999023, - "307": 2.573017120361328, - "308": 2.5923736095428467, - "309": 2.560164451599121, - "310": 2.5676488876342773, - "311": 2.563610553741455, - "312": 2.6065878868103027, - "313": 2.5391602516174316, - "314": 2.5279202461242676, - "315": 2.4809811115264893, - "316": 2.4592719078063965, - "317": 2.528409242630005, - "318": 2.575133800506592, - "319": 2.543478488922119, - "320": 2.6020755767822266, - "321": 2.5485613346099854, - "322": 2.605581760406494, - "323": 2.583954334259033 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 8.52082, - 11.8892, - 11.5403, - 7.61068, - 7.14196, - 8.38169, - 9.41484, - 7.82324, - 9.49529, - 15.7476, - 12.5324, - 6.49295, - 7.65945, - 14.4196, - 12.6931, - 8.00011, - 5.76436, - 6.01882, - 8.38883, - 13.5599, - 11.7009, - 7.95537, - 6.86529, - 6.025, - 7.99718, - 14.9542, - 13.2831, - 7.95172, - 8.00987, - 9.76027, - 9.26767, - 6.03411, - 6.05046, - 10.1179, - 13.3065, - 8.74484, - 5.09182, - 6.01129, - 8.96344, - 9.52992, - 9.45255, - 6.27266, - 5.45681, - 10.6614, - 14.428, - 14.95, - 12.2948, - 2.4458, - 7.43009, - 6.71909, - 6.92517, - 6.1817, - 5.15352, - 5.54274 - ], - "train_epoch_time": 5.056915521621704, - "train_loss": 2.575000104671571, - "train_score": 0.2442005918220947, - "val_loss": 2.6129064505190582, - "val_score": 0.23873421397534905 - }, - { - "epoch": 6, - "grad_norm": 0.42222860455513, - "learning_rate": 1.0, - "model_norm": 87.7262191772461, - "step_logs": { - "grad_norm": { - "324": 0.589959442615509, - "325": 0.5029729008674622, - "326": 0.5285340547561646, - "327": 0.5284609794616699, - "328": 0.5149835348129272, - "329": 0.5487874746322632, - "330": 0.5941106677055359, - "331": 0.6051709055900574, - "332": 0.5953346490859985, - "333": 0.5666784644126892, - "334": 0.5870032906532288, - "335": 0.6597731113433838, - "336": 0.6418724060058594, - "337": 0.5051636695861816, - "338": 0.45763349533081055, - "339": 0.5128597021102905, - "340": 0.5366122722625732, - "341": 0.5496450066566467, - "342": 0.5769292712211609, - "343": 0.6236070990562439, - "344": 0.615502119064331, - "345": 0.600796639919281, - "346": 0.6764445304870605, - "347": 0.650355339050293, - "348": 0.7011405229568481, - "349": 0.6819117665290833, - "350": 0.6357214450836182, - "351": 0.6570878028869629, - "352": 0.7232521772384644, - "353": 0.6644817590713501, - "354": 0.5698176026344299, - "355": 0.5169979333877563, - "356": 0.5554744005203247, - "357": 0.6457024216651917, - "358": 0.6742708086967468, - "359": 0.5455997586250305, - "360": 0.5129860639572144, - "361": 0.8755683302879333, - "362": 0.7512721419334412, - "363": 0.7248756885528564, - "364": 0.6089893579483032, - "365": 0.8547742366790771, - "366": 0.5908450484275818, - "367": 0.6266611814498901, - "368": 0.6146601438522339, - "369": 0.6133776307106018, - "370": 0.519788920879364, - "371": 0.5844746828079224, - "372": 0.5966187119483948, - "373": 0.5463952422142029, - "374": 0.5837127566337585, - "375": 0.8275039196014404, - "376": 0.5807767510414124, - "377": 0.42222860455513 - }, - "loss": { - "324": 2.5863800048828125, - "325": 2.553689956665039, - "326": 2.531513214111328, - "327": 2.528355360031128, - "328": 2.549199104309082, - "329": 2.5063982009887695, - "330": 2.5663740634918213, - "331": 2.5246520042419434, - "332": 2.5476579666137695, - "333": 2.509037494659424, - "334": 2.5129401683807373, - "335": 2.552493095397949, - "336": 2.5522704124450684, - "337": 2.5181727409362793, - "338": 2.5297961235046387, - "339": 2.485309600830078, - "340": 2.5213050842285156, - "341": 2.4957408905029297, - "342": 2.5106847286224365, - "343": 2.481025218963623, - "344": 2.541317939758301, - "345": 2.4983444213867188, - "346": 2.4951438903808594, - "347": 2.530484676361084, - "348": 2.5371439456939697, - "349": 2.5106794834136963, - "350": 2.5177316665649414, - "351": 2.499049663543701, - "352": 2.5348687171936035, - "353": 2.542555332183838, - "354": 2.525477647781372, - "355": 2.4737844467163086, - "356": 2.497093677520752, - "357": 2.4768943786621094, - "358": 2.5111653804779053, - "359": 2.4554877281188965, - "360": 2.430851697921753, - "361": 2.435702085494995, - "362": 2.5544791221618652, - "363": 2.542387008666992, - "364": 2.5089550018310547, - "365": 2.503735303878784, - "366": 2.5334179401397705, - "367": 2.5242385864257812, - "368": 2.541548252105713, - "369": 2.5084705352783203, - "370": 2.4832043647766113, - "371": 2.461982011795044, - "372": 2.481531858444214, - "373": 2.4528322219848633, - "374": 2.436610221862793, - "375": 2.4492297172546387, - "376": 2.524195671081543, - "377": 2.4112091064453125 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 7.43101, - 10.0944, - 9.06221, - 9.05341, - 9.61207, - 8.32227, - 7.27085, - 6.89359, - 7.18818, - 7.81329, - 7.29291, - 5.86375, - 6.19483, - 9.86782, - 12.0795, - 9.44895, - 8.75597, - 8.26104, - 7.54305, - 6.37983, - 6.70811, - 6.92145, - 5.45295, - 5.98277, - 5.16101, - 5.39927, - 6.22982, - 5.78799, - 4.84592, - 5.75843, - 7.77807, - 9.25517, - 8.09295, - 5.94077, - 5.5234, - 8.24877, - 9.23735, - 3.1772, - 4.52593, - 4.83854, - 6.76509, - 3.42677, - 7.25704, - 6.42784, - 6.72711, - 6.66735, - 9.19091, - 7.20698, - 6.9715, - 8.21588, - 7.15134, - 3.57676, - 7.4835, - 13.5251 - ], - "train_epoch_time": 5.059398651123047, - "train_loss": 2.3843994223404477, - "train_score": 0.3101181403208668, - "val_loss": 2.412595060197412, - "val_score": 0.30106827647486184 - }, - { - "epoch": 7, - "grad_norm": 0.6233770847320557, - "learning_rate": 1.0, - "model_norm": 87.8237533569336, - "step_logs": { - "grad_norm": { - "378": 0.4476366937160492, - "379": 0.725091814994812, - "380": 0.5437142252922058, - "381": 0.4848865568637848, - "382": 0.6433197855949402, - "383": 0.5641137361526489, - "384": 0.7488631010055542, - "385": 0.7799643278121948, - "386": 0.5854859948158264, - "387": 0.5013403296470642, - "388": 0.6148808002471924, - "389": 0.9244084358215332, - "390": 0.6364380121231079, - "391": 0.6464278101921082, - "392": 0.6114261746406555, - "393": 0.5769841074943542, - "394": 0.5522688031196594, - "395": 0.5251255035400391, - "396": 0.49269723892211914, - "397": 0.47639235854148865, - "398": 0.5340412259101868, - "399": 0.6054332852363586, - "400": 0.6141825318336487, - "401": 0.6073479056358337, - "402": 0.7358940839767456, - "403": 0.5304068922996521, - "404": 0.5366724133491516, - "405": 1.4512220621109009, - "406": 0.5987162590026855, - "407": 0.6535307765007019, - "408": 0.44275209307670593, - "409": 0.3558882474899292, - "410": 0.4601386487483978, - "411": 0.5123399496078491, - "412": 0.5616330504417419, - "413": 0.7331110835075378, - "414": 0.6110520362854004, - "415": 0.5966317653656006, - "416": 0.6240831017494202, - "417": 0.7943153977394104, - "418": 0.824618935585022, - "419": 0.7633992433547974, - "420": 0.6250609159469604, - "421": 0.6731459498405457, - "422": 0.7580225467681885, - "423": 0.6872450113296509, - "424": 0.5986344814300537, - "425": 0.6646986603736877, - "426": 1.035421371459961, - "427": 0.6865931749343872, - "428": 0.6413306593894958, - "429": 0.605828583240509, - "430": 0.5741073489189148, - "431": 0.6233770847320557 - }, - "loss": { - "378": 2.4035091400146484, - "379": 2.4032530784606934, - "380": 2.475454330444336, - "381": 2.423172950744629, - "382": 2.44462513923645, - "383": 2.482314348220825, - "384": 2.4797093868255615, - "385": 2.5459303855895996, - "386": 2.4761035442352295, - "387": 2.403139591217041, - "388": 2.359795570373535, - "389": 2.468825340270996, - "390": 2.517343521118164, - "391": 2.4774680137634277, - "392": 2.460294246673584, - "393": 2.44753098487854, - "394": 2.4470326900482178, - "395": 2.4523162841796875, - "396": 2.406097173690796, - "397": 2.4421441555023193, - "398": 2.3985066413879395, - "399": 2.46334171295166, - "400": 2.429025650024414, - "401": 2.4523916244506836, - "402": 2.4385428428649902, - "403": 2.4708199501037598, - "404": 2.4242401123046875, - "405": 2.497091770172119, - "406": 2.50575852394104, - "407": 2.437063694000244, - "408": 2.4413199424743652, - "409": 2.3665173053741455, - "410": 2.3716275691986084, - "411": 2.410421848297119, - "412": 2.420224666595459, - "413": 2.4213624000549316, - "414": 2.443295955657959, - "415": 2.4177072048187256, - "416": 2.4265623092651367, - "417": 2.4684195518493652, - "418": 2.4558656215667725, - "419": 2.5177488327026367, - "420": 2.423591136932373, - "421": 2.3797426223754883, - "422": 2.4399895668029785, - "423": 2.467034339904785, - "424": 2.380239963531494, - "425": 2.3962860107421875, - "426": 2.4109139442443848, - "427": 2.4847571849823, - "428": 2.4449620246887207, - "429": 2.455070734024048, - "430": 2.3972742557525635, - "431": 2.411729335784912 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 11.9948, - 4.57102, - 8.37363, - 10.3063, - 5.90688, - 7.80052, - 4.42177, - 4.18502, - 7.2233, - 9.56123, - 6.24155, - 2.8891, - 6.21484, - 5.92882, - 6.58111, - 7.35192, - 8.02304, - 8.89304, - 9.91181, - 10.7607, - 8.40991, - 6.72035, - 6.43928, - 6.64836, - 4.50298, - 8.78259, - 8.417, - 1.18568, - 6.99032, - 5.70604, - 12.4538, - 18.6845, - 11.2013, - 9.18283, - 7.67274, - 4.50526, - 6.54364, - 6.79189, - 6.23027, - 3.91231, - 3.61159, - 4.32025, - 6.20318, - 5.25184, - 4.24643, - 5.22338, - 6.64198, - 5.42362, - 2.24878, - 5.2709, - 5.9444, - 6.68905, - 7.2733, - 6.20622 - ], - "train_epoch_time": 5.062114477157593, - "train_loss": 2.409191673079043, - "train_score": 0.3023863432910213, - "val_loss": 2.467650843542561, - "val_score": 0.290035699285678 - }, - { - "epoch": 8, - "grad_norm": 0.9071621298789978, - "learning_rate": 1.0, - "model_norm": 87.94059753417969, - "step_logs": { - "grad_norm": { - "432": 0.7765079140663147, - "433": 0.8220192193984985, - "434": 0.6735662221908569, - "435": 0.5537124276161194, - "436": 0.5728278160095215, - "437": 0.8567742109298706, - "438": 0.6464678049087524, - "439": 0.6067763566970825, - "440": 0.6834186911582947, - "441": 0.5651432275772095, - "442": 0.5057303309440613, - "443": 0.580076277256012, - "444": 0.713246762752533, - "445": 0.6217272877693176, - "446": 0.5551022291183472, - "447": 0.5998145341873169, - "448": 0.6619981527328491, - "449": 0.733294665813446, - "450": 0.6560757756233215, - "451": 0.6208107471466064, - "452": 0.598636269569397, - "453": 0.5802643895149231, - "454": 0.6702688336372375, - "455": 0.6370559930801392, - "456": 0.6309664845466614, - "457": 0.5908674597740173, - "458": 0.676193118095398, - "459": 0.7668659687042236, - "460": 0.7398245334625244, - "461": 0.6312540173530579, - "462": 0.6135047078132629, - "463": 0.5682210326194763, - "464": 0.6274297833442688, - "465": 0.6611471176147461, - "466": 0.7763511538505554, - "467": 0.5277438163757324, - "468": 0.46193280816078186, - "469": 0.5078476667404175, - "470": 0.5151538252830505, - "471": 0.5464355945587158, - "472": 0.8680021166801453, - "473": 0.6129276156425476, - "474": 0.5865321159362793, - "475": 0.5939337015151978, - "476": 0.6458668112754822, - "477": 0.6915168166160583, - "478": 0.6741195917129517, - "479": 0.7049943208694458, - "480": 0.6342566013336182, - "481": 0.9019966125488281, - "482": 0.6422078609466553, - "483": 0.5967493057250977, - "484": 0.5948537588119507, - "485": 0.9071621298789978 - }, - "loss": { - "432": 2.434274196624756, - "433": 2.4632740020751953, - "434": 2.4352469444274902, - "435": 2.392164945602417, - "436": 2.362865924835205, - "437": 2.415482759475708, - "438": 2.4197092056274414, - "439": 2.4119417667388916, - "440": 2.37589430809021, - "441": 2.394064426422119, - "442": 2.360340118408203, - "443": 2.345093250274658, - "444": 2.3945202827453613, - "445": 2.4090452194213867, - "446": 2.3488574028015137, - "447": 2.343813180923462, - "448": 2.370213031768799, - "449": 2.3920602798461914, - "450": 2.3988003730773926, - "451": 2.366093158721924, - "452": 2.334099769592285, - "453": 2.3401384353637695, - "454": 2.3362374305725098, - "455": 2.4077751636505127, - "456": 2.3257977962493896, - "457": 2.342228889465332, - "458": 2.3290772438049316, - "459": 2.382145881652832, - "460": 2.407740592956543, - "461": 2.388184070587158, - "462": 2.3617303371429443, - "463": 2.3404135704040527, - "464": 2.310192346572876, - "465": 2.36226224899292, - "466": 2.3162612915039062, - "467": 2.3913581371307373, - "468": 2.3197669982910156, - "469": 2.3052096366882324, - "470": 2.318777561187744, - "471": 2.315047264099121, - "472": 2.3315112590789795, - "473": 2.401728630065918, - "474": 2.354654312133789, - "475": 2.377084732055664, - "476": 2.3382976055145264, - "477": 2.3644192218780518, - "478": 2.368288516998291, - "479": 2.372807502746582, - "480": 2.378720283508301, - "481": 2.346017360687256, - "482": 2.3976635932922363, - "483": 2.3420934677124023, - "484": 2.3065268993377686, - "485": 2.352982997894287 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 4.03718, - 3.64543, - 5.36763, - 7.8023, - 7.20097, - 3.29057, - 5.78988, - 6.55103, - 5.0869, - 7.49581, - 9.22862, - 6.96931, - 4.70694, - 6.23225, - 7.62273, - 6.51462, - 5.40846, - 4.44852, - 5.57296, - 6.13922, - 6.51318, - 6.95008, - 5.20019, - 5.93281, - 5.84197, - 6.70887, - 5.09381, - 4.05069, - 4.39898, - 5.99321, - 6.27472, - 7.24866, - 5.86838, - 5.40421, - 3.84301, - 8.58615, - 10.8714, - 8.93807, - 8.73746, - 7.75322, - 3.09454, - 6.39301, - 6.84453, - 6.73858, - 5.60549, - 4.94446, - 5.21148, - 4.7741, - 5.91307, - 2.88351, - 5.81349, - 6.57689, - 6.51836, - 2.85923 - ], - "train_epoch_time": 5.063021659851074, - "train_loss": 2.4160673170896994, - "train_score": 0.3061323080377565, - "val_loss": 2.4691786831747375, - "val_score": 0.2923229407223987 - }, - { - "epoch": 9, - "grad_norm": 0.8479442000389099, - "learning_rate": 1.0, - "model_norm": 88.05618286132812, - "step_logs": { - "grad_norm": { - "486": 0.8017803430557251, - "487": 0.7247504591941833, - "488": 0.5997197031974792, - "489": 0.5661600828170776, - "490": 0.5384531021118164, - "491": 0.5412113666534424, - "492": 0.5752896070480347, - "493": 0.5827569365501404, - "494": 0.5506663918495178, - "495": 0.6922354698181152, - "496": 0.6688157320022583, - "497": 0.710025429725647, - "498": 0.640258252620697, - "499": 0.5868394374847412, - "500": 0.7574666738510132, - "501": 0.7814472913742065, - "502": 0.6115919947624207, - "503": 0.48716840147972107, - "504": 0.5875566601753235, - "505": 0.5984475016593933, - "506": 0.7445812225341797, - "507": 0.7910962700843811, - "508": 0.6796084642410278, - "509": 0.6982877254486084, - "510": 0.6632845401763916, - "511": 0.6065599322319031, - "512": 0.5757735371589661, - "513": 0.5152581930160522, - "514": 0.5502412915229797, - "515": 0.5284173488616943, - "516": 0.4957486689090729, - "517": 0.5644363164901733, - "518": 0.6596044898033142, - "519": 0.7322612404823303, - "520": 0.6808573007583618, - "521": 0.5945437550544739, - "522": 0.6843143701553345, - "523": 0.6302454471588135, - "524": 0.5569486021995544, - "525": 0.564264178276062, - "526": 0.7126025557518005, - "527": 0.7082458734512329, - "528": 0.5964210629463196, - "529": 0.5213846564292908, - "530": 0.5039574503898621, - "531": 0.6565131545066833, - "532": 0.6962807774543762, - "533": 1.044175148010254, - "534": 0.7827070951461792, - "535": 0.9594875574111938, - "536": 0.7189468741416931, - "537": 0.8180437088012695, - "538": 1.003755807876587, - "539": 0.8479442000389099 - }, - "loss": { - "486": 2.4348576068878174, - "487": 2.45119047164917, - "488": 2.3284833431243896, - "489": 2.297154664993286, - "490": 2.2644057273864746, - "491": 2.2953734397888184, - "492": 2.30232310295105, - "493": 2.317416191101074, - "494": 2.26430082321167, - "495": 2.276296854019165, - "496": 2.3626694679260254, - "497": 2.3176751136779785, - "498": 2.340026378631592, - "499": 2.286736011505127, - "500": 2.290950059890747, - "501": 2.36889910697937, - "502": 2.3355743885040283, - "503": 2.252765655517578, - "504": 2.2416040897369385, - "505": 2.2984800338745117, - "506": 2.2948336601257324, - "507": 2.3218183517456055, - "508": 2.341596841812134, - "509": 2.3350746631622314, - "510": 2.290396213531494, - "511": 2.3208940029144287, - "512": 2.281456470489502, - "513": 2.2863335609436035, - "514": 2.242624282836914, - "515": 2.2667813301086426, - "516": 2.24092960357666, - "517": 2.2533531188964844, - "518": 2.2849860191345215, - "519": 2.323093891143799, - "520": 2.297314405441284, - "521": 2.30865478515625, - "522": 2.2752223014831543, - "523": 2.3129215240478516, - "524": 2.2830934524536133, - "525": 2.2515766620635986, - "526": 2.279601573944092, - "527": 2.325507164001465, - "528": 2.2451751232147217, - "529": 2.2410316467285156, - "530": 2.230013370513916, - "531": 2.2327327728271484, - "532": 2.3034873008728027, - "533": 2.3184001445770264, - "534": 2.3564209938049316, - "535": 2.373034954071045, - "536": 2.393862724304199, - "537": 2.3292441368103027, - "538": 2.3392906188964844, - "539": 2.448216438293457 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 3.78759, - 4.66659, - 6.47406, - 7.16658, - 7.81013, - 7.83645, - 6.95654, - 6.82384, - 7.46719, - 4.7503, - 5.2819, - 4.59732, - 5.70835, - 6.64014, - 3.9929, - 3.87924, - 6.2441, - 9.492, - 6.49321, - 6.41784, - 4.1393, - 3.70996, - 5.06984, - 4.78886, - 5.20608, - 6.30824, - 6.88191, - 8.61172, - 7.40713, - 8.11812, - 9.11812, - 7.07293, - 5.2519, - 4.33246, - 4.95574, - 6.53118, - 4.85862, - 5.82293, - 7.36026, - 7.07167, - 4.48915, - 4.63607, - 6.31167, - 8.24388, - 8.78051, - 5.18024, - 4.75135, - 2.12638, - 3.8464, - 2.57766, - 4.63133, - 3.48066, - 2.32182, - 3.40499 - ], - "train_epoch_time": 5.057504653930664, - "train_loss": 2.341117527700394, - "train_score": 0.32564450320246574, - "val_loss": 2.4157500888395256, - "val_score": 0.3131502937230396 - }, - { - "epoch": 10, - "grad_norm": 0.6387036442756653, - "learning_rate": 1.0, - "model_norm": 88.17684173583984, - "step_logs": { - "grad_norm": { - "540": 0.7706964015960693, - "541": 0.565232515335083, - "542": 0.6389411687850952, - "543": 0.8183033466339111, - "544": 0.7362214922904968, - "545": 0.8955008387565613, - "546": 0.6621264219284058, - "547": 0.5685049295425415, - "548": 0.6059681177139282, - "549": 0.6288581490516663, - "550": 0.5820083022117615, - "551": 0.5389314889907837, - "552": 0.5582356452941895, - "553": 0.5509064793586731, - "554": 0.6221991181373596, - "555": 0.5400289297103882, - "556": 0.5090938806533813, - "557": 0.5750826597213745, - "558": 0.6969702243804932, - "559": 0.6738913059234619, - "560": 0.7683441042900085, - "561": 0.53670334815979, - "562": 0.48783236742019653, - "563": 0.486321359872818, - "564": 0.5148715972900391, - "565": 0.5636603832244873, - "566": 0.6192460060119629, - "567": 0.6551604270935059, - "568": 0.6902365684509277, - "569": 0.6826980113983154, - "570": 0.6609243154525757, - "571": 0.58481365442276, - "572": 0.5430092811584473, - "573": 0.5805642604827881, - "574": 0.6683855652809143, - "575": 0.6684220433235168, - "576": 0.6165861487388611, - "577": 0.5472785234451294, - "578": 0.5367283225059509, - "579": 0.5435999631881714, - "580": 0.5493052005767822, - "581": 0.5877703428268433, - "582": 0.6477825045585632, - "583": 0.608697772026062, - "584": 0.601865291595459, - "585": 0.6329501271247864, - "586": 0.6878306269645691, - "587": 0.6167069673538208, - "588": 0.5438553094863892, - "589": 0.5399678349494934, - "590": 0.5317276120185852, - "591": 0.6248087286949158, - "592": 0.6037768125534058, - "593": 0.6387036442756653 - }, - "loss": { - "540": 2.348557472229004, - "541": 2.277038812637329, - "542": 2.2487144470214844, - "543": 2.335327625274658, - "544": 2.3149187564849854, - "545": 2.3475818634033203, - "546": 2.354139804840088, - "547": 2.284546375274658, - "548": 2.249492645263672, - "549": 2.274181365966797, - "550": 2.254321813583374, - "551": 2.2341079711914062, - "552": 2.234658718109131, - "553": 2.251466751098633, - "554": 2.230208396911621, - "555": 2.2891364097595215, - "556": 2.2364907264709473, - "557": 2.2209420204162598, - "558": 2.21760892868042, - "559": 2.282057762145996, - "560": 2.2576422691345215, - "561": 2.2787747383117676, - "562": 2.2567410469055176, - "563": 2.24727201461792, - "564": 2.219088554382324, - "565": 2.210675001144409, - "566": 2.2498698234558105, - "567": 2.240264415740967, - "568": 2.2447094917297363, - "569": 2.2753264904022217, - "570": 2.279207944869995, - "571": 2.2346253395080566, - "572": 2.200122833251953, - "573": 2.188459873199463, - "574": 2.2267870903015137, - "575": 2.2667417526245117, - "576": 2.2027907371520996, - "577": 2.206265926361084, - "578": 2.1901350021362305, - "579": 2.207444667816162, - "580": 2.201704263687134, - "581": 2.2042269706726074, - "582": 2.2094364166259766, - "583": 2.1977386474609375, - "584": 2.2322731018066406, - "585": 2.206829309463501, - "586": 2.231187105178833, - "587": 2.2853989601135254, - "588": 2.2001137733459473, - "589": 2.1983747482299805, - "590": 2.1891374588012695, - "591": 2.1933391094207764, - "592": 2.214601993560791, - "593": 2.1937899589538574 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 3.95398, - 7.12715, - 5.50824, - 3.48754, - 4.27089, - 2.92745, - 5.36971, - 7.06857, - 6.12611, - 5.75069, - 6.65515, - 7.69196, - 7.17094, - 7.41839, - 5.76085, - 7.84942, - 8.62922, - 6.71547, - 4.56517, - 5.02513, - 3.82423, - 7.91103, - 9.48289, - 9.50187, - 8.37099, - 6.95809, - 5.8672, - 5.2192, - 4.71155, - 4.88187, - 5.21772, - 6.53386, - 7.46161, - 6.49289, - 4.98453, - 5.07342, - 5.7941, - 7.36616, - 7.60259, - 7.47018, - 7.29679, - 6.3803, - 5.2653, - 5.93161, - 6.16238, - 5.50845, - 4.71599, - 6.00903, - 7.43838, - 7.53991, - 7.74274, - 5.61839, - 6.07495, - 5.3777 - ], - "train_epoch_time": 5.056248188018799, - "train_loss": 2.235028208208563, - "train_score": 0.35139997319306326, - "val_loss": 2.3336211624977645, - "val_score": 0.3283761486530851 - }, - { - "epoch": 11, - "grad_norm": 0.6408677101135254, - "learning_rate": 1.0, - "model_norm": 88.31568145751953, - "step_logs": { - "grad_norm": { - "594": 0.7077268958091736, - "595": 0.6217532753944397, - "596": 0.5789417028427124, - "597": 0.5227370858192444, - "598": 0.5119795203208923, - "599": 0.6203845739364624, - "600": 0.6974325776100159, - "601": 0.6027323603630066, - "602": 0.5231297016143799, - "603": 0.5360603332519531, - "604": 0.5519902110099792, - "605": 0.6456120014190674, - "606": 0.6429061889648438, - "607": 0.576969563961029, - "608": 0.6324966549873352, - "609": 0.6383657455444336, - "610": 0.6201777458190918, - "611": 0.5834636688232422, - "612": 0.616550087928772, - "613": 0.7507140040397644, - "614": 0.934150755405426, - "615": 0.9501323699951172, - "616": 0.8356849551200867, - "617": 0.8668741583824158, - "618": 0.7063598036766052, - "619": 0.6286273002624512, - "620": 0.592766523361206, - "621": 0.5907629728317261, - "622": 0.5974923968315125, - "623": 0.6208613514900208, - "624": 0.6769024133682251, - "625": 0.6768881678581238, - "626": 0.6350733637809753, - "627": 0.5566412806510925, - "628": 0.48644495010375977, - "629": 0.556790828704834, - "630": 0.7378922700881958, - "631": 0.6458808183670044, - "632": 0.47657760977745056, - "633": 0.4891546368598938, - "634": 0.5827817320823669, - "635": 0.6533992886543274, - "636": 0.5813054442405701, - "637": 0.53339684009552, - "638": 0.5462914705276489, - "639": 0.5413225293159485, - "640": 0.5329384803771973, - "641": 0.522574782371521, - "642": 0.5853460431098938, - "643": 0.5998774170875549, - "644": 0.6070759892463684, - "645": 0.7032648921012878, - "646": 0.9001753926277161, - "647": 0.6408677101135254 - }, - "loss": { - "594": 2.222482681274414, - "595": 2.2153053283691406, - "596": 2.214951753616333, - "597": 2.1934876441955566, - "598": 2.1778922080993652, - "599": 2.13860821723938, - "600": 2.2235894203186035, - "601": 2.2297611236572266, - "602": 2.1694183349609375, - "603": 2.1711907386779785, - "604": 2.186601161956787, - "605": 2.2139649391174316, - "606": 2.2311410903930664, - "607": 2.224823474884033, - "608": 2.1895785331726074, - "609": 2.198668956756592, - "610": 2.163444757461548, - "611": 2.2034220695495605, - "612": 2.161508083343506, - "613": 2.2344391345977783, - "614": 2.248788356781006, - "615": 2.317734718322754, - "616": 2.298546314239502, - "617": 2.2629029750823975, - "618": 2.3032338619232178, - "619": 2.2257790565490723, - "620": 2.201134204864502, - "621": 2.1813535690307617, - "622": 2.1882834434509277, - "623": 2.1853721141815186, - "624": 2.208638906478882, - "625": 2.2136659622192383, - "626": 2.1800966262817383, - "627": 2.1558072566986084, - "628": 2.1377124786376953, - "629": 2.11506986618042, - "630": 2.1868984699249268, - "631": 2.213593006134033, - "632": 2.124800205230713, - "633": 2.1536502838134766, - "634": 2.147817373275757, - "635": 2.162109613418579, - "636": 2.1786575317382812, - "637": 2.1588425636291504, - "638": 2.1571078300476074, - "639": 2.129655361175537, - "640": 2.14258074760437, - "641": 2.134822368621826, - "642": 2.1243739128112793, - "643": 2.189227819442749, - "644": 2.143068313598633, - "645": 2.19210147857666, - "646": 2.2131314277648926, - "647": 2.2399368286132812 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 4.43718, - 5.73057, - 6.60837, - 8.02728, - 8.30866, - 5.5566, - 4.57141, - 6.13775, - 7.92729, - 7.55563, - 7.1764, - 5.31163, - 5.39799, - 6.68328, - 5.47323, - 5.39536, - 5.62488, - 6.47247, - 5.68617, - 3.96478, - 2.577, - 2.56741, - 3.2913, - 3.0113, - 4.61622, - 5.63243, - 6.2644, - 6.25028, - 6.12969, - 5.66939, - 4.82028, - 4.83146, - 5.4054, - 6.95759, - 9.03404, - 6.82245, - 4.01645, - 5.30631, - 9.35515, - 9.00084, - 6.32391, - 5.06431, - 6.44733, - 7.58787, - 7.22808, - 7.2677, - 7.54368, - 7.81745, - 6.2002, - 6.08367, - 5.815, - 4.43223, - 2.7312, - 5.4538 - ], - "train_epoch_time": 5.059988737106323, - "train_loss": 2.168833270202921, - "train_score": 0.3682758697221748, - "val_loss": 2.269371535830054, - "val_score": 0.340372954594692 - }, - { - "epoch": 12, - "grad_norm": 0.5115713477134705, - "learning_rate": 1.0, - "model_norm": 88.42173767089844, - "step_logs": { - "grad_norm": { - "648": 0.716665506362915, - "649": 0.8069600462913513, - "650": 0.7947304844856262, - "651": 0.7240526080131531, - "652": 0.7629228234291077, - "653": 1.1339279413223267, - "654": 0.8302299976348877, - "655": 0.7467823624610901, - "656": 0.6972697973251343, - "657": 0.5455514192581177, - "658": 0.47693899273872375, - "659": 0.49731290340423584, - "660": 0.48412537574768066, - "661": 0.5093258619308472, - "662": 0.5902482867240906, - "663": 0.5586665868759155, - "664": 0.46420934796333313, - "665": 0.4215879440307617, - "666": 0.4409480690956116, - "667": 0.43534886837005615, - "668": 0.4254956841468811, - "669": 0.4858226478099823, - "670": 0.5949127078056335, - "671": 0.601553201675415, - "672": 0.5546567440032959, - "673": 0.5125888586044312, - "674": 0.5321463942527771, - "675": 0.46987468004226685, - "676": 0.4241557717323303, - "677": 0.41045764088630676, - "678": 0.4471478760242462, - "679": 0.49451401829719543, - "680": 0.563727617263794, - "681": 0.5436035990715027, - "682": 0.45979243516921997, - "683": 0.41452208161354065, - "684": 0.44676491618156433, - "685": 0.571497917175293, - "686": 0.5866062641143799, - "687": 0.46033743023872375, - "688": 0.42913320660591125, - "689": 0.468587726354599, - "690": 0.4917983412742615, - "691": 0.5114935040473938, - "692": 0.48584261536598206, - "693": 0.42314836382865906, - "694": 0.4293260872364044, - "695": 0.43268439173698425, - "696": 0.43268081545829773, - "697": 0.4635193347930908, - "698": 0.503139317035675, - "699": 0.546288013458252, - "700": 0.5250817537307739, - "701": 0.5115713477134705 - }, - "loss": { - "648": 2.1549344062805176, - "649": 2.224435806274414, - "650": 2.2394330501556396, - "651": 2.1879374980926514, - "652": 2.2070116996765137, - "653": 2.235476016998291, - "654": 2.2922840118408203, - "655": 2.2481298446655273, - "656": 2.241356372833252, - "657": 2.1795616149902344, - "658": 2.1363630294799805, - "659": 2.11326265335083, - "660": 2.1178579330444336, - "661": 2.1301376819610596, - "662": 2.161309242248535, - "663": 2.144866943359375, - "664": 2.11765193939209, - "665": 2.0968875885009766, - "666": 2.0897750854492188, - "667": 2.0687460899353027, - "668": 2.066110372543335, - "669": 2.097501039505005, - "670": 2.0905919075012207, - "671": 2.0850939750671387, - "672": 2.1050355434417725, - "673": 2.1126999855041504, - "674": 2.0986886024475098, - "675": 2.0715060234069824, - "676": 2.0975241661071777, - "677": 2.052122116088867, - "678": 2.057093858718872, - "679": 2.1010711193084717, - "680": 2.079598903656006, - "681": 2.069915771484375, - "682": 2.0774035453796387, - "683": 2.0765464305877686, - "684": 2.0656509399414062, - "685": 2.065680503845215, - "686": 2.0785820484161377, - "687": 2.052807092666626, - "688": 2.0685455799102783, - "689": 2.0394287109375, - "690": 2.0377089977264404, - "691": 2.068427085876465, - "692": 2.034313678741455, - "693": 2.0061779022216797, - "694": 2.028668165206909, - "695": 2.0189952850341797, - "696": 2.0436224937438965, - "697": 2.015399932861328, - "698": 2.029754877090454, - "699": 2.022144079208374, - "700": 2.0621938705444336, - "701": 2.0341384410858154 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 4.19567, - 3.41598, - 3.54567, - 4.17344, - 3.79178, - 1.7386, - 3.32561, - 4.03119, - 4.61009, - 7.32315, - 9.39181, - 8.54464, - 9.0361, - 8.21138, - 6.20365, - 6.87219, - 9.82713, - 11.7977, - 10.7479, - 10.9152, - 11.412, - 8.88683, - 5.90694, - 5.76206, - 6.84244, - 8.0408, - 7.41115, - 9.38258, - 11.6589, - 12.1805, - 10.2885, - 8.59179, - 6.54396, - 7.00468, - 9.82646, - 12.085, - 10.349, - 6.32461, - 6.04051, - 9.68714, - 11.2326, - 9.2881, - 8.42496, - 7.90606, - 8.6184, - 11.2043, - 11.0062, - 10.7843, - 10.916, - 9.38049, - 8.01802, - 6.77593, - 7.47955, - 7.77263 - ], - "train_epoch_time": 5.061323404312134, - "train_loss": 2.0338356951901013, - "train_score": 0.40135513812602847, - "val_loss": 2.1551041594876486, - "val_score": 0.3673982853935999 - }, - { - "epoch": 13, - "grad_norm": 0.32616376876831055, - "learning_rate": 0.6666666666666667, - "model_norm": 88.48424530029297, - "step_logs": { - "grad_norm": { - "702": 0.4989427924156189, - "703": 0.5028764605522156, - "704": 0.48513442277908325, - "705": 0.4481595754623413, - "706": 0.3963972330093384, - "707": 0.37314093112945557, - "708": 0.4267770051956177, - "709": 0.49082428216934204, - "710": 0.48820096254348755, - "711": 0.4020784795284271, - "712": 0.3392620086669922, - "713": 0.3534849286079407, - "714": 0.38990235328674316, - "715": 0.4022107720375061, - "716": 0.4177328944206238, - "717": 0.4514225423336029, - "718": 0.44274264574050903, - "719": 0.4521128535270691, - "720": 0.4563765823841095, - "721": 0.4543166160583496, - "722": 0.4531152844429016, - "723": 0.4502907991409302, - "724": 0.43363961577415466, - "725": 0.44261205196380615, - "726": 0.4255932867527008, - "727": 0.3841048777103424, - "728": 0.3714306056499481, - "729": 0.3813962936401367, - "730": 0.3985673785209656, - "731": 0.4312443435192108, - "732": 0.4193502366542816, - "733": 0.3988642990589142, - "734": 0.39689362049102783, - "735": 0.4039759635925293, - "736": 0.36932554841041565, - "737": 0.3757588267326355, - "738": 0.38661423325538635, - "739": 0.3642880916595459, - "740": 0.3629930317401886, - "741": 0.37425899505615234, - "742": 0.37483593821525574, - "743": 0.3770025670528412, - "744": 0.36514976620674133, - "745": 0.32594454288482666, - "746": 0.30586183071136475, - "747": 0.3098975419998169, - "748": 0.35013002157211304, - "749": 0.38314712047576904, - "750": 0.3351095914840698, - "751": 0.3467327654361725, - "752": 0.36268121004104614, - "753": 0.36673134565353394, - "754": 0.3334101736545563, - "755": 0.32616376876831055 - }, - "loss": { - "702": 2.034492015838623, - "703": 2.0352673530578613, - "704": 2.0394086837768555, - "705": 2.008737564086914, - "706": 2.0163679122924805, - "707": 1.9963488578796387, - "708": 1.9911524057388306, - "709": 2.041245222091675, - "710": 2.034959316253662, - "711": 2.0070085525512695, - "712": 1.9800987243652344, - "713": 1.9934635162353516, - "714": 1.9859341382980347, - "715": 2.022761344909668, - "716": 1.9788706302642822, - "717": 1.96261465549469, - "718": 1.9863977432250977, - "719": 2.0154967308044434, - "720": 1.96248197555542, - "721": 1.974233627319336, - "722": 2.0144500732421875, - "723": 1.9809954166412354, - "724": 2.0080857276916504, - "725": 2.024318218231201, - "726": 1.9882514476776123, - "727": 1.985202670097351, - "728": 1.9838249683380127, - "729": 1.9879852533340454, - "730": 1.9706623554229736, - "731": 2.007246494293213, - "732": 1.9716395139694214, - "733": 1.9697849750518799, - "734": 1.9749248027801514, - "735": 1.9931504726409912, - "736": 1.9625194072723389, - "737": 1.9570355415344238, - "738": 1.9798170328140259, - "739": 1.987271785736084, - "740": 1.9576994180679321, - "741": 2.002255439758301, - "742": 1.9505881071090698, - "743": 1.9529694318771362, - "744": 1.9706934690475464, - "745": 1.9538817405700684, - "746": 1.9835524559020996, - "747": 1.9771671295166016, - "748": 1.931983232498169, - "749": 1.9355602264404297, - "750": 1.976599097251892, - "751": 1.9477629661560059, - "752": 1.9791746139526367, - "753": 1.979038953781128, - "754": 1.9702351093292236, - "755": 1.9630451202392578 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 8.17249, - 8.0482, - 8.66523, - 10.0013, - 12.8324, - 14.3381, - 10.9321, - 8.47311, - 8.53805, - 12.4145, - 17.2035, - 15.9539, - 13.0633, - 12.5037, - 11.3402, - 9.63094, - 10.1336, - 9.86026, - 9.42234, - 9.56492, - 9.81158, - 9.77006, - 10.6788, - 10.3331, - 10.977, - 13.4557, - 14.3796, - 13.6666, - 12.4053, - 10.7933, - 11.2118, - 12.3814, - 12.5373, - 12.2132, - 14.3878, - 13.8605, - 13.2455, - 14.975, - 14.8576, - 14.2947, - 13.883, - 13.7406, - 14.7801, - 18.3912, - 21.2028, - 20.5877, - 15.7596, - 13.1849, - 17.6013, - 16.2012, - 15.0464, - 14.7149, - 17.7239, - 18.4527 - ], - "train_epoch_time": 5.0623815059661865, - "train_loss": 1.9476230926116878, - "train_score": 0.42392283901251543, - "val_loss": 2.077079646212362, - "val_score": 0.38772782751804924 - }, - { - "epoch": 14, - "grad_norm": 0.26985040307044983, - "learning_rate": 0.33333333333333337, - "model_norm": 88.50469207763672, - "step_logs": { - "grad_norm": { - "756": 0.30478379130363464, - "757": 0.3027219772338867, - "758": 0.32425057888031006, - "759": 0.3415772616863251, - "760": 0.303385853767395, - "761": 0.30860263109207153, - "762": 0.3042046129703522, - "763": 0.28921687602996826, - "764": 0.301988810300827, - "765": 0.2913858890533447, - "766": 0.28916874527931213, - "767": 0.303390234708786, - "768": 0.31164324283599854, - "769": 0.31332913041114807, - "770": 0.3099716901779175, - "771": 0.2818314731121063, - "772": 0.27313002943992615, - "773": 0.30405449867248535, - "774": 0.3092050850391388, - "775": 0.33552536368370056, - "776": 0.30218085646629333, - "777": 0.2772131860256195, - "778": 0.2984389662742615, - "779": 0.2824351191520691, - "780": 0.28806403279304504, - "781": 0.28275948762893677, - "782": 0.2901439964771271, - "783": 0.2757797837257385, - "784": 0.28935128450393677, - "785": 0.2884388267993927, - "786": 0.2947910726070404, - "787": 0.28605902194976807, - "788": 0.29526323080062866, - "789": 0.2733495831489563, - "790": 0.28498467803001404, - "791": 0.2944028973579407, - "792": 0.27763620018959045, - "793": 0.27936363220214844, - "794": 0.2877821922302246, - "795": 0.26401326060295105, - "796": 0.2605854272842407, - "797": 0.2725641429424286, - "798": 0.2814179062843323, - "799": 0.27628862857818604, - "800": 0.28744468092918396, - "801": 0.2990172803401947, - "802": 0.27801597118377686, - "803": 0.28110939264297485, - "804": 0.27005496621131897, - "805": 0.28657811880111694, - "806": 0.2674529254436493, - "807": 0.28101804852485657, - "808": 0.26782262325286865, - "809": 0.26985040307044983 - }, - "loss": { - "756": 1.9605858325958252, - "757": 1.968909502029419, - "758": 1.9600588083267212, - "759": 1.9773964881896973, - "760": 1.9438071250915527, - "761": 1.9563130140304565, - "762": 1.949770212173462, - "763": 1.9656846523284912, - "764": 1.8893201351165771, - "765": 1.9340025186538696, - "766": 1.944157600402832, - "767": 1.9542779922485352, - "768": 1.9738895893096924, - "769": 1.918994426727295, - "770": 1.9660694599151611, - "771": 1.9126553535461426, - "772": 1.9468989372253418, - "773": 1.9277746677398682, - "774": 1.933929681777954, - "775": 1.922041416168213, - "776": 1.9419910907745361, - "777": 1.9250818490982056, - "778": 1.9404267072677612, - "779": 1.9482839107513428, - "780": 1.9292819499969482, - "781": 1.898986577987671, - "782": 1.946297287940979, - "783": 1.8935195207595825, - "784": 1.9474821090698242, - "785": 1.9518009424209595, - "786": 1.9261317253112793, - "787": 1.963820219039917, - "788": 1.9378907680511475, - "789": 1.8760712146759033, - "790": 1.905753254890442, - "791": 1.9465609788894653, - "792": 1.9615123271942139, - "793": 1.911255955696106, - "794": 1.93943190574646, - "795": 1.920114517211914, - "796": 1.9334185123443604, - "797": 1.8845516443252563, - "798": 1.92726731300354, - "799": 1.9357072114944458, - "800": 1.9099059104919434, - "801": 1.8913452625274658, - "802": 1.949580430984497, - "803": 1.9286179542541504, - "804": 1.9588322639465332, - "805": 1.9380176067352295, - "806": 1.9361475706100464, - "807": 1.9308364391326904, - "808": 1.9216241836547852, - "809": 1.9016042947769165 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 21.1058, - 21.4851, - 18.6426, - 16.9479, - 21.1185, - 20.5418, - 21.0694, - 23.4999, - 20.7169, - 22.7782, - 23.2503, - 21.2316, - 20.3239, - 19.5466, - 20.4623, - 24.0801, - 26.0978, - 20.8523, - 20.2277, - 17.0731, - 21.2673, - 25.0508, - 21.7864, - 24.4239, - 23.2497, - 23.7513, - 23.1197, - 24.8969, - 23.2607, - 23.46, - 22.1645, - 23.9988, - 22.2285, - 25.108, - 23.4652, - 22.4587, - 25.4471, - 24.4895, - 23.4178, - 27.5471, - 28.4725, - 25.3671, - 24.3354, - 25.3579, - 23.1155, - 21.1533, - 25.2233, - 24.4059, - 26.8592, - 23.5978, - 27.0672, - 24.4499, - 26.7901, - 26.114 - ], - "train_epoch_time": 5.064804315567017, - "train_loss": 1.9235311545121618, - "train_score": 0.4305830792511895, - "val_loss": 2.0592152544058835, - "val_score": 0.39383162282638245 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:37:42.127979", - "final_model_norm": 88.50469207763672, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:35:57.059600", - "step_scheduler_on_epoch": false - } - } -] \ No newline at end of file diff --git a/output/lr-stability/shakespeare-3.json b/output/lr-stability/shakespeare-3.json deleted file mode 100644 index ca09a2a..0000000 --- a/output/lr-stability/shakespeare-3.json +++ /dev/null @@ -1,43214 +0,0 @@ -[ - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 2.829242706298828, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.42960357666016, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 8.929902076721191, - "3": 5.186877250671387, - "4": 3.783720016479492, - "5": 3.923210382461548, - "6": 9.143267631530762, - "7": 23.94384002685547, - "8": 8.97022533416748, - "9": 3.847670555114746, - "10": 3.9274423122406006, - "11": 5.522656440734863, - "12": 4.651491641998291, - "13": 4.442531108856201, - "14": 4.289407730102539, - "15": 12.143924713134766, - "16": 5.79517126083374, - "17": 10.25547981262207, - "18": 3.454157590866089, - "19": 6.344359874725342, - "20": 3.7780702114105225, - "21": 7.477146148681641, - "22": 3.879782199859619, - "23": 6.381377220153809, - "24": 5.72065544128418, - "25": 4.4057769775390625, - "26": 6.91972541809082, - "27": 4.443119049072266, - "28": 37.84668731689453, - "29": 3.661571741104126, - "30": 3.1671783924102783, - "31": 3.988196611404419, - "32": 4.033689022064209, - "33": 3.5805203914642334, - "34": 2.4237263202667236, - "35": 2.7346973419189453, - "36": 3.0601348876953125, - "37": 2.965196132659912, - "38": 3.7202541828155518, - "39": 3.718553066253662, - "40": 2.521270751953125, - "41": 4.433636665344238, - "42": 3.129775047302246, - "43": 26.615100860595703, - "44": 1.878593921661377, - "45": 2.9344465732574463, - "46": 3.6754989624023438, - "47": 4.5123515129089355, - "48": 2.5713019371032715, - "49": 2.2343690395355225, - "50": 2.1086156368255615, - "51": 1.9976294040679932, - "52": 3.213641405105591, - "53": 2.829242706298828 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.9741530418395996, - "3": 3.7462706565856934, - "4": 3.6170592308044434, - "5": 3.5356454849243164, - "6": 3.550992727279663, - "7": 4.202289581298828, - "8": 3.9621734619140625, - "9": 3.4726767539978027, - "10": 3.3988418579101562, - "11": 3.424409866333008, - "12": 3.4638521671295166, - "13": 3.2228474617004395, - "14": 3.235687255859375, - "15": 3.296962261199951, - "16": 3.293919086456299, - "17": 3.6316699981689453, - "18": 3.214136838912964, - "19": 3.1056227684020996, - "20": 3.1635918617248535, - "21": 3.1394731998443604, - "22": 3.06063175201416, - "23": 3.1491594314575195, - "24": 3.1002345085144043, - "25": 3.1068034172058105, - "26": 2.945953845977783, - "27": 3.0315403938293457, - "28": 3.954380750656128, - "29": 2.937297821044922, - "30": 2.8964157104492188, - "31": 2.8965346813201904, - "32": 2.928769826889038, - "33": 2.9699316024780273, - "34": 2.7929859161376953, - "35": 2.7797155380249023, - "36": 2.8007261753082275, - "37": 2.807225227355957, - "38": 2.8082680702209473, - "39": 2.945089817047119, - "40": 2.779898166656494, - "41": 2.786756992340088, - "42": 2.7511987686157227, - "43": 3.5148537158966064, - "44": 2.698302984237671, - "45": 2.728705406188965, - "46": 2.8470048904418945, - "47": 3.057342052459717, - "48": 2.8441390991210938, - "49": 2.703319787979126, - "50": 2.7191898822784424, - "51": 2.660457134246826, - "52": 2.7339060306549072, - "53": 2.8816604614257812 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 1e-11, - 0.00179115, - 0.00384567, - 0.00587346, - 0.00787532, - 0.00978697, - 0.0105147, - 0.00716116, - 0.0137638, - 0.0173349, - 0.0191318, - 0.0200369, - 0.0223265, - 0.0240828, - 0.0259353, - 0.0179538, - 0.0275119, - 0.0227832, - 0.0337452, - 0.0304914, - 0.0366892, - 0.0305684, - 0.039704, - 0.0354551, - 0.0382976, - 0.0432452, - 0.0365529, - 0.0459253, - 0.0050259, - 0.0512201, - 0.0543529, - 0.052981, - 0.0543398, - 0.0577706, - 0.0634617, - 0.0639758, - 0.0642646, - 0.066315, - 0.0640119, - 0.0659279, - 0.0732958, - 0.0636051, - 0.0730728, - 0.00889719, - 0.0832114, - 0.0788087, - 0.0755167, - 0.0715912, - 0.0863633, - 0.0898677, - 0.0924422, - 0.0930235, - 0.0841129, - 0.0878049 - ], - "train_epoch_time": 5.0122058391571045, - "train_loss": 2.6955285815973706, - "train_score": 0.23732402259256782, - "val_loss": 2.722465205274685, - "val_score": 0.22831156707863856 - }, - { - "epoch": 1, - "grad_norm": 1.632340908050537, - "learning_rate": 0.1, - "model_norm": 87.44588470458984, - "step_logs": { - "grad_norm": { - "54": 2.167717695236206, - "55": 1.8843998908996582, - "56": 2.296365261077881, - "57": 2.6649978160858154, - "58": 2.9070963859558105, - "59": 2.2548794746398926, - "60": 1.5094093084335327, - "61": 1.9111990928649902, - "62": 3.303433895111084, - "63": 2.2870326042175293, - "64": 1.2310452461242676, - "65": 1.1800872087478638, - "66": 1.3149524927139282, - "67": 1.5462857484817505, - "68": 1.7933094501495361, - "69": 2.478090763092041, - "70": 2.4005751609802246, - "71": 2.347316265106201, - "72": 2.1841657161712646, - "73": 1.783528208732605, - "74": 1.9344581365585327, - "75": 2.398865222930908, - "76": 2.024362564086914, - "77": 1.452140212059021, - "78": 1.7936229705810547, - "79": 2.5270416736602783, - "80": 2.1543946266174316, - "81": 1.4391276836395264, - "82": 1.6454904079437256, - "83": 2.532956600189209, - "84": 2.4044623374938965, - "85": 1.7642533779144287, - "86": 1.7786751985549927, - "87": 1.9635628461837769, - "88": 2.08329439163208, - "89": 2.3949873447418213, - "90": 2.205061912536621, - "91": 1.7287977933883667, - "92": 1.8417333364486694, - "93": 2.232008218765259, - "94": 2.0573503971099854, - "95": 1.4013190269470215, - "96": 1.3763247728347778, - "97": 1.868039608001709, - "98": 1.8802298307418823, - "99": 1.754790186882019, - "100": 1.76875901222229, - "101": 1.8400527238845825, - "102": 1.7403810024261475, - "103": 1.7100898027420044, - "104": 1.7036994695663452, - "105": 1.6900243759155273, - "106": 1.6948237419128418, - "107": 1.632340908050537 - }, - "loss": { - "54": 2.697671890258789, - "55": 2.6580300331115723, - "56": 2.6626007556915283, - "57": 2.7512998580932617, - "58": 2.7369589805603027, - "59": 2.7951793670654297, - "60": 2.5983762741088867, - "61": 2.6475000381469727, - "62": 2.698931932449341, - "63": 2.8319039344787598, - "64": 2.590872287750244, - "65": 2.576648473739624, - "66": 2.5834898948669434, - "67": 2.590606689453125, - "68": 2.5987179279327393, - "69": 2.6423301696777344, - "70": 2.727649688720703, - "71": 2.6386632919311523, - "72": 2.726167917251587, - "73": 2.5826878547668457, - "74": 2.6456406116485596, - "75": 2.620405435562134, - "76": 2.695251226425171, - "77": 2.5784120559692383, - "78": 2.6063196659088135, - "79": 2.6195664405822754, - "80": 2.710402488708496, - "81": 2.5660197734832764, - "82": 2.5647058486938477, - "83": 2.611298084259033, - "84": 2.7177627086639404, - "85": 2.5913009643554688, - "86": 2.5952882766723633, - "87": 2.6092567443847656, - "88": 2.612041473388672, - "89": 2.6423397064208984, - "90": 2.696099042892456, - "91": 2.579451084136963, - "92": 2.6191582679748535, - "93": 2.592412233352661, - "94": 2.6643106937408447, - "95": 2.571406841278076, - "96": 2.5484509468078613, - "97": 2.553044319152832, - "98": 2.600273370742798, - "99": 2.5597410202026367, - "100": 2.584235906600952, - "101": 2.562354564666748, - "102": 2.6044912338256836, - "103": 2.558192491531372, - "104": 2.5852434635162354, - "105": 2.5465574264526367, - "106": 2.5729241371154785, - "107": 2.574094772338867 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 0.0919884, - 0.0937386, - 0.0909897, - 0.0885685, - 0.0866258, - 0.0916632, - 0.0958, - 0.0935468, - 0.0831832, - 0.0915458, - 0.0971585, - 0.0973688, - 0.0967619, - 0.0955888, - 0.094173, - 0.0895894, - 0.0904457, - 0.0905463, - 0.0919544, - 0.094199, - 0.0933949, - 0.0901061, - 0.0929348, - 0.0960715, - 0.0941871, - 0.0891354, - 0.0921131, - 0.0961209, - 0.094986, - 0.0890592, - 0.0903862, - 0.0943344, - 0.0942551, - 0.09312, - 0.0923294, - 0.0902088, - 0.0917286, - 0.0945239, - 0.0939185, - 0.0912338, - 0.0926412, - 0.0963221, - 0.0964167, - 0.093603, - 0.0936348, - 0.0943264, - 0.0942924, - 0.0938026, - 0.0945047, - 0.0945933, - 0.0946846, - 0.0946899, - 0.0947131, - 0.095079 - ], - "train_epoch_time": 4.843577861785889, - "train_loss": 2.578076574552692, - "train_score": 0.24235002684114312, - "val_loss": 2.6129268844420546, - "val_score": 0.24001686288691826 - }, - { - "epoch": 2, - "grad_norm": 1.6892448663711548, - "learning_rate": 0.1, - "model_norm": 87.45940399169922, - "step_logs": { - "grad_norm": { - "108": 1.6401013135910034, - "109": 1.7984172105789185, - "110": 1.7912962436676025, - "111": 1.6297528743743896, - "112": 1.71138596534729, - "113": 1.810578465461731, - "114": 1.8892784118652344, - "115": 1.8201881647109985, - "116": 1.6007473468780518, - "117": 1.6456505060195923, - "118": 1.6218377351760864, - "119": 1.6116821765899658, - "120": 1.6829594373703003, - "121": 1.7640148401260376, - "122": 1.6729094982147217, - "123": 1.4540841579437256, - "124": 1.4654661417007446, - "125": 1.5338268280029297, - "126": 1.582726001739502, - "127": 1.622740626335144, - "128": 1.735021948814392, - "129": 1.7508931159973145, - "130": 1.8040889501571655, - "131": 1.6388498544692993, - "132": 1.5618400573730469, - "133": 1.6743911504745483, - "134": 1.719663143157959, - "135": 1.9646096229553223, - "136": 1.7544358968734741, - "137": 1.4120299816131592, - "138": 1.4097856283187866, - "139": 1.6519172191619873, - "140": 1.753260612487793, - "141": 1.7085126638412476, - "142": 1.7379885911941528, - "143": 1.701741099357605, - "144": 1.6415828466415405, - "145": 1.4609078168869019, - "146": 1.487511157989502, - "147": 1.7455832958221436, - "148": 1.6841126680374146, - "149": 1.5068544149398804, - "150": 1.6761696338653564, - "151": 1.7969931364059448, - "152": 1.734042763710022, - "153": 1.5202258825302124, - "154": 1.3413509130477905, - "155": 1.2147680521011353, - "156": 1.26915442943573, - "157": 1.4844236373901367, - "158": 1.4759669303894043, - "159": 1.4076545238494873, - "160": 1.4967929124832153, - "161": 1.6892448663711548 - }, - "loss": { - "108": 2.5715224742889404, - "109": 2.532073497772217, - "110": 2.599666118621826, - "111": 2.557715892791748, - "112": 2.568601131439209, - "113": 2.5607824325561523, - "114": 2.580101728439331, - "115": 2.5469844341278076, - "116": 2.5666747093200684, - "117": 2.549964666366577, - "118": 2.5779125690460205, - "119": 2.5485129356384277, - "120": 2.5577869415283203, - "121": 2.543717384338379, - "122": 2.585871696472168, - "123": 2.5400009155273438, - "124": 2.549086093902588, - "125": 2.5163979530334473, - "126": 2.5393409729003906, - "127": 2.538783073425293, - "128": 2.5583629608154297, - "129": 2.5411787033081055, - "130": 2.552502155303955, - "131": 2.5548455715179443, - "132": 2.558537483215332, - "133": 2.5306711196899414, - "134": 2.5770304203033447, - "135": 2.560976505279541, - "136": 2.6058897972106934, - "137": 2.5130372047424316, - "138": 2.5369863510131836, - "139": 2.516374111175537, - "140": 2.5712404251098633, - "141": 2.5201947689056396, - "142": 2.555270195007324, - "143": 2.536632537841797, - "144": 2.580138921737671, - "145": 2.5126419067382812, - "146": 2.5284831523895264, - "147": 2.5315775871276855, - "148": 2.5695245265960693, - "149": 2.5148541927337646, - "150": 2.5509133338928223, - "151": 2.5566539764404297, - "152": 2.5582828521728516, - "153": 2.5279204845428467, - "154": 2.5321807861328125, - "155": 2.5014920234680176, - "156": 2.504667282104492, - "157": 2.5098249912261963, - "158": 2.528846263885498, - "159": 2.512179374694824, - "160": 2.5179286003112793, - "161": 2.5021111965179443 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 0.0950297, - 0.0939967, - 0.0941873, - 0.095064, - 0.0946063, - 0.0939843, - 0.0935304, - 0.0938932, - 0.0952457, - 0.0949576, - 0.0951459, - 0.095151, - 0.0947537, - 0.094236, - 0.0948664, - 0.0960042, - 0.0959578, - 0.0955342, - 0.0952994, - 0.0950696, - 0.0944436, - 0.0943112, - 0.0940065, - 0.0950062, - 0.0954498, - 0.0947515, - 0.0945737, - 0.0929925, - 0.0944234, - 0.0961844, - 0.0962306, - 0.0948567, - 0.0943596, - 0.0945258, - 0.0944193, - 0.0946, - 0.095037, - 0.095926, - 0.0958079, - 0.0943235, - 0.0947697, - 0.0956806, - 0.0947805, - 0.0940599, - 0.0944494, - 0.0956287, - 0.0965692, - 0.0971349, - 0.0968847, - 0.0957948, - 0.0958706, - 0.0962059, - 0.0957406, - 0.0946053 - ], - "train_epoch_time": 4.84375262260437, - "train_loss": 2.5431613441861343, - "train_score": 0.24829402800269929, - "val_loss": 2.585049842720601, - "val_score": 0.23902123997118388 - }, - { - "epoch": 3, - "grad_norm": 1.6417392492294312, - "learning_rate": 0.1, - "model_norm": 87.47151184082031, - "step_logs": { - "grad_norm": { - "162": 1.6463249921798706, - "163": 1.5342001914978027, - "164": 1.455183982849121, - "165": 1.5131183862686157, - "166": 1.6168954372406006, - "167": 1.5889735221862793, - "168": 1.5289008617401123, - "169": 1.4898327589035034, - "170": 1.570967197418213, - "171": 1.6290321350097656, - "172": 1.5947881937026978, - "173": 1.5506904125213623, - "174": 1.4376262426376343, - "175": 1.398522973060608, - "176": 1.4237021207809448, - "177": 1.4817471504211426, - "178": 1.4833542108535767, - "179": 1.4684853553771973, - "180": 1.453198790550232, - "181": 1.493291974067688, - "182": 1.573236346244812, - "183": 1.5547974109649658, - "184": 1.5328584909439087, - "185": 1.5478310585021973, - "186": 1.5275315046310425, - "187": 1.415902018547058, - "188": 1.3994611501693726, - "189": 1.393317461013794, - "190": 1.4230738878250122, - "191": 1.5195673704147339, - "192": 1.4761430025100708, - "193": 1.4361157417297363, - "194": 1.352280616760254, - "195": 1.5173275470733643, - "196": 1.5241765975952148, - "197": 1.4327393770217896, - "198": 1.4824446439743042, - "199": 1.5598995685577393, - "200": 1.4209989309310913, - "201": 1.2262513637542725, - "202": 1.3120940923690796, - "203": 1.444899559020996, - "204": 1.4460457563400269, - "205": 1.4080867767333984, - "206": 1.5145533084869385, - "207": 1.6775528192520142, - "208": 1.567270278930664, - "209": 1.400187611579895, - "210": 1.3919522762298584, - "211": 1.6053329706192017, - "212": 1.5757410526275635, - "213": 1.680374026298523, - "214": 1.709222435951233, - "215": 1.6417392492294312 - }, - "loss": { - "162": 2.5523223876953125, - "163": 2.5141079425811768, - "164": 2.5111641883850098, - "165": 2.5069899559020996, - "166": 2.541145086288452, - "167": 2.515294075012207, - "168": 2.5161244869232178, - "169": 2.5069851875305176, - "170": 2.526698589324951, - "171": 2.5056231021881104, - "172": 2.5440969467163086, - "173": 2.5141029357910156, - "174": 2.508448600769043, - "175": 2.475994825363159, - "176": 2.5205020904541016, - "177": 2.485654354095459, - "178": 2.499504327774048, - "179": 2.5099403858184814, - "180": 2.522557497024536, - "181": 2.5125904083251953, - "182": 2.512593984603882, - "183": 2.52034854888916, - "184": 2.530547618865967, - "185": 2.512237548828125, - "186": 2.529956102371216, - "187": 2.4996047019958496, - "188": 2.511016368865967, - "189": 2.5138168334960938, - "190": 2.5169854164123535, - "191": 2.500276565551758, - "192": 2.5048160552978516, - "193": 2.4937100410461426, - "194": 2.4996755123138428, - "195": 2.4886741638183594, - "196": 2.5265872478485107, - "197": 2.5009970664978027, - "198": 2.4965901374816895, - "199": 2.4987947940826416, - "200": 2.5282092094421387, - "201": 2.4793591499328613, - "202": 2.494969367980957, - "203": 2.4941344261169434, - "204": 2.508145570755005, - "205": 2.4973325729370117, - "206": 2.507845163345337, - "207": 2.503416061401367, - "208": 2.509850025177002, - "209": 2.4855904579162598, - "210": 2.486443281173706, - "211": 2.4937844276428223, - "212": 2.501904010772705, - "213": 2.5076377391815186, - "214": 2.536623477935791, - "215": 2.5081586837768555 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 0.0949581, - 0.0955282, - 0.0959543, - 0.0956331, - 0.0951076, - 0.0952209, - 0.0955611, - 0.0957608, - 0.0953437, - 0.0949707, - 0.0952394, - 0.095436, - 0.0960434, - 0.0962004, - 0.0961345, - 0.0957703, - 0.095784, - 0.0958811, - 0.0959824, - 0.0957511, - 0.0953059, - 0.0954237, - 0.0955634, - 0.0954488, - 0.0955918, - 0.0961444, - 0.0962466, - 0.0962822, - 0.0961326, - 0.0955862, - 0.0958317, - 0.0960289, - 0.0964713, - 0.095579, - 0.0956047, - 0.0960579, - 0.0957843, - 0.0953571, - 0.0961599, - 0.0970568, - 0.0966649, - 0.0959828, - 0.0959983, - 0.0961819, - 0.0956266, - 0.0946784, - 0.0953349, - 0.0962059, - 0.0962499, - 0.0950868, - 0.0952725, - 0.09467, - 0.094555, - 0.0949009 - ], - "train_epoch_time": 4.84400200843811, - "train_loss": 2.5096431797169885, - "train_score": 0.255805012468286, - "val_loss": 2.5529645710122844, - "val_score": 0.25108083377749446 - }, - { - "epoch": 4, - "grad_norm": 1.2616267204284668, - "learning_rate": 0.1, - "model_norm": 87.48371887207031, - "step_logs": { - "grad_norm": { - "216": 1.4753199815750122, - "217": 1.4843089580535889, - "218": 1.702085018157959, - "219": 1.9024754762649536, - "220": 1.770369529724121, - "221": 1.5003074407577515, - "222": 1.4227277040481567, - "223": 1.2950764894485474, - "224": 1.260401725769043, - "225": 1.4663050174713135, - "226": 1.4639695882797241, - "227": 1.3770688772201538, - "228": 1.3091496229171753, - "229": 1.3295845985412598, - "230": 1.4498988389968872, - "231": 1.5564842224121094, - "232": 1.4637154340744019, - "233": 1.3805773258209229, - "234": 1.4203565120697021, - "235": 1.3844490051269531, - "236": 1.404839277267456, - "237": 1.3583775758743286, - "238": 1.377422571182251, - "239": 1.4285433292388916, - "240": 1.3406805992126465, - "241": 1.3092323541641235, - "242": 1.4103388786315918, - "243": 1.447407841682434, - "244": 1.336208462715149, - "245": 1.4705170392990112, - "246": 1.558107614517212, - "247": 1.5607337951660156, - "248": 1.5305641889572144, - "249": 1.7422747611999512, - "250": 1.6681272983551025, - "251": 1.5193485021591187, - "252": 1.5240287780761719, - "253": 1.4833558797836304, - "254": 1.4607470035552979, - "255": 1.3434115648269653, - "256": 1.3210078477859497, - "257": 1.3474167585372925, - "258": 1.377573013305664, - "259": 1.197898507118225, - "260": 1.021098256111145, - "261": 1.1203125715255737, - "262": 1.2265043258666992, - "263": 1.2755136489868164, - "264": 1.2767614126205444, - "265": 1.0849404335021973, - "266": 1.154232144355774, - "267": 1.2829339504241943, - "268": 1.204483151435852, - "269": 1.2616267204284668 - }, - "loss": { - "216": 2.52264404296875, - "217": 2.498532295227051, - "218": 2.513336658477783, - "219": 2.5009446144104004, - "220": 2.5377063751220703, - "221": 2.4965500831604004, - "222": 2.4910073280334473, - "223": 2.4984376430511475, - "224": 2.4834978580474854, - "225": 2.476687431335449, - "226": 2.509334087371826, - "227": 2.4919402599334717, - "228": 2.496373176574707, - "229": 2.46787691116333, - "230": 2.4884681701660156, - "231": 2.4986793994903564, - "232": 2.5191092491149902, - "233": 2.4749345779418945, - "234": 2.513787269592285, - "235": 2.482914686203003, - "236": 2.4701218605041504, - "237": 2.4905314445495605, - "238": 2.4847304821014404, - "239": 2.469801425933838, - "240": 2.4805047512054443, - "241": 2.4836788177490234, - "242": 2.4825828075408936, - "243": 2.4821274280548096, - "244": 2.486481189727783, - "245": 2.4815030097961426, - "246": 2.4846348762512207, - "247": 2.485553741455078, - "248": 2.5009756088256836, - "249": 2.487590789794922, - "250": 2.5263524055480957, - "251": 2.4827191829681396, - "252": 2.4865167140960693, - "253": 2.4900436401367188, - "254": 2.483656883239746, - "255": 2.483293056488037, - "256": 2.4900450706481934, - "257": 2.4850914478302, - "258": 2.4637441635131836, - "259": 2.4640021324157715, - "260": 2.4561357498168945, - "261": 2.4686825275421143, - "262": 2.480060577392578, - "263": 2.45208740234375, - "264": 2.4610671997070312, - "265": 2.452866554260254, - "266": 2.444319248199463, - "267": 2.437253713607788, - "268": 2.4502177238464355, - "269": 2.4595770835876465 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 0.0958643, - 0.0957773, - 0.0945506, - 0.0932522, - 0.0941839, - 0.0956864, - 0.0960957, - 0.0967525, - 0.0969008, - 0.09584, - 0.0959044, - 0.0963346, - 0.0966812, - 0.0965422, - 0.0959473, - 0.0953763, - 0.095921, - 0.0962922, - 0.0961421, - 0.0962837, - 0.0961586, - 0.0964279, - 0.0963225, - 0.0960325, - 0.0965036, - 0.0966644, - 0.0961483, - 0.0959507, - 0.0965341, - 0.0958248, - 0.0953421, - 0.0953288, - 0.0955261, - 0.0942495, - 0.0947802, - 0.0955576, - 0.0955379, - 0.0957687, - 0.0958813, - 0.0964936, - 0.0966146, - 0.0964759, - 0.0962916, - 0.0971705, - 0.0979216, - 0.097521, - 0.0970565, - 0.0967891, - 0.0967944, - 0.0976568, - 0.0973471, - 0.0967337, - 0.0971246, - 0.0968657 - ], - "train_epoch_time": 4.843307971954346, - "train_loss": 2.4654785947786, - "train_score": 0.2695906564614694, - "val_loss": 2.5175748082170806, - "val_score": 0.26251255750792996 - }, - { - "epoch": 5, - "grad_norm": 1.1777217388153076, - "learning_rate": 0.1, - "model_norm": 87.49819946289062, - "step_logs": { - "grad_norm": { - "270": 1.5681922435760498, - "271": 1.7132329940795898, - "272": 1.6316648721694946, - "273": 1.48341965675354, - "274": 1.5266185998916626, - "275": 1.5643476247787476, - "276": 1.5164021253585815, - "277": 1.4735674858093262, - "278": 1.5165610313415527, - "279": 1.6102924346923828, - "280": 1.541800618171692, - "281": 1.5295721292495728, - "282": 1.5281633138656616, - "283": 1.4212607145309448, - "284": 1.3670010566711426, - "285": 1.557177186012268, - "286": 1.717246651649475, - "287": 1.7445170879364014, - "288": 1.616858959197998, - "289": 1.492230772972107, - "290": 1.4443650245666504, - "291": 1.4495494365692139, - "292": 1.5596765279769897, - "293": 1.5833477973937988, - "294": 1.6813318729400635, - "295": 1.5986870527267456, - "296": 1.3505122661590576, - "297": 1.276371717453003, - "298": 1.2337177991867065, - "299": 1.1896799802780151, - "300": 1.2713990211486816, - "301": 1.3051244020462036, - "302": 1.4924052953720093, - "303": 1.5375165939331055, - "304": 1.6132373809814453, - "305": 1.6318001747131348, - "306": 1.5559868812561035, - "307": 1.5971157550811768, - "308": 1.6797418594360352, - "309": 1.6264094114303589, - "310": 1.5401262044906616, - "311": 1.3944423198699951, - "312": 1.283914566040039, - "313": 1.3803130388259888, - "314": 1.4413127899169922, - "315": 1.4595688581466675, - "316": 1.8463311195373535, - "317": 1.648689866065979, - "318": 1.7266327142715454, - "319": 1.6444430351257324, - "320": 1.6546168327331543, - "321": 1.461954116821289, - "322": 1.2780884504318237, - "323": 1.1777217388153076 - }, - "loss": { - "270": 2.4524741172790527, - "271": 2.484706163406372, - "272": 2.4989864826202393, - "273": 2.4877777099609375, - "274": 2.4687724113464355, - "275": 2.480257511138916, - "276": 2.479813575744629, - "277": 2.490440607070923, - "278": 2.4598026275634766, - "279": 2.5062897205352783, - "280": 2.470792531967163, - "281": 2.4756789207458496, - "282": 2.4691925048828125, - "283": 2.4633398056030273, - "284": 2.4576454162597656, - "285": 2.466660261154175, - "286": 2.4862875938415527, - "287": 2.4686734676361084, - "288": 2.484574794769287, - "289": 2.455036163330078, - "290": 2.4594199657440186, - "291": 2.4611167907714844, - "292": 2.4461159706115723, - "293": 2.483973264694214, - "294": 2.445065975189209, - "295": 2.496243715286255, - "296": 2.4402995109558105, - "297": 2.43380069732666, - "298": 2.4325037002563477, - "299": 2.4196219444274902, - "300": 2.4435489177703857, - "301": 2.4557044506073, - "302": 2.4425840377807617, - "303": 2.4472739696502686, - "304": 2.4574999809265137, - "305": 2.461731433868408, - "306": 2.4328176975250244, - "307": 2.4578046798706055, - "308": 2.454918146133423, - "309": 2.4583582878112793, - "310": 2.4348974227905273, - "311": 2.4526233673095703, - "312": 2.407618284225464, - "313": 2.4238271713256836, - "314": 2.4122562408447266, - "315": 2.424379348754883, - "316": 2.4501571655273438, - "317": 2.455643653869629, - "318": 2.430037260055542, - "319": 2.4438700675964355, - "320": 2.4183945655822754, - "321": 2.437368869781494, - "322": 2.415073871612549, - "323": 2.396214485168457 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 0.0952256, - 0.0944229, - 0.0949426, - 0.0957646, - 0.0954927, - 0.0952986, - 0.0955691, - 0.0958226, - 0.0955337, - 0.0950814, - 0.0954103, - 0.095488, - 0.0954847, - 0.0960614, - 0.0963374, - 0.0953151, - 0.0944016, - 0.094194, - 0.095002, - 0.0956617, - 0.0959313, - 0.095906, - 0.0952632, - 0.0951961, - 0.0945351, - 0.09513, - 0.0963976, - 0.0967615, - 0.0969663, - 0.0971584, - 0.0967983, - 0.0966481, - 0.0956395, - 0.0953927, - 0.0949712, - 0.0948692, - 0.09526, - 0.0950668, - 0.0945656, - 0.0948946, - 0.0953554, - 0.0961871, - 0.0966899, - 0.0962183, - 0.0958719, - 0.0957913, - 0.0934959, - 0.0947557, - 0.0942203, - 0.0947575, - 0.0946429, - 0.0957997, - 0.0967287, - 0.0971872 - ], - "train_epoch_time": 4.843637228012085, - "train_loss": 2.3951509260207713, - "train_score": 0.29289813482949517, - "val_loss": 2.450149717068152, - "val_score": 0.2812141221097909 - }, - { - "epoch": 6, - "grad_norm": 1.392254114151001, - "learning_rate": 0.1, - "model_norm": 87.51399993896484, - "step_logs": { - "grad_norm": { - "324": 1.1860820055007935, - "325": 1.139835238456726, - "326": 1.2046810388565063, - "327": 1.3439661264419556, - "328": 1.4064558744430542, - "329": 1.3534471988677979, - "330": 1.3458380699157715, - "331": 1.2536165714263916, - "332": 1.443936824798584, - "333": 1.554681658744812, - "334": 1.6362686157226562, - "335": 1.7995811700820923, - "336": 1.8044660091400146, - "337": 1.9230691194534302, - "338": 1.793067455291748, - "339": 1.5272263288497925, - "340": 1.470845103263855, - "341": 1.485547661781311, - "342": 1.5559905767440796, - "343": 1.6723933219909668, - "344": 1.6789045333862305, - "345": 1.5583182573318481, - "346": 1.497838020324707, - "347": 1.481758952140808, - "348": 1.4876999855041504, - "349": 1.4353747367858887, - "350": 1.425061583518982, - "351": 1.5410263538360596, - "352": 1.6250354051589966, - "353": 1.6732479333877563, - "354": 1.62208092212677, - "355": 1.639390230178833, - "356": 1.4765866994857788, - "357": 1.2863820791244507, - "358": 1.2355879545211792, - "359": 1.235979437828064, - "360": 1.3444041013717651, - "361": 1.580484390258789, - "362": 1.5247668027877808, - "363": 1.3593993186950684, - "364": 1.3910149335861206, - "365": 1.698815107345581, - "366": 1.7982655763626099, - "367": 1.7412036657333374, - "368": 1.5068066120147705, - "369": 1.3631888628005981, - "370": 1.187957525253296, - "371": 1.3362401723861694, - "372": 1.4558069705963135, - "373": 1.4392685890197754, - "374": 1.3998827934265137, - "375": 1.2879066467285156, - "376": 1.3272349834442139, - "377": 1.392254114151001 - }, - "loss": { - "324": 2.3722052574157715, - "325": 2.411508798599243, - "326": 2.385117530822754, - "327": 2.400644063949585, - "328": 2.4109585285186768, - "329": 2.3783326148986816, - "330": 2.3828747272491455, - "331": 2.3994574546813965, - "332": 2.3886513710021973, - "333": 2.4140372276306152, - "334": 2.393425941467285, - "335": 2.4094622135162354, - "336": 2.4645755290985107, - "337": 2.4251999855041504, - "338": 2.4448819160461426, - "339": 2.4097023010253906, - "340": 2.398139476776123, - "341": 2.3962512016296387, - "342": 2.4052672386169434, - "343": 2.3844337463378906, - "344": 2.40040922164917, - "345": 2.394157886505127, - "346": 2.4045188426971436, - "347": 2.4006471633911133, - "348": 2.419200897216797, - "349": 2.3951902389526367, - "350": 2.3989980220794678, - "351": 2.407146453857422, - "352": 2.4095678329467773, - "353": 2.4075894355773926, - "354": 2.415912628173828, - "355": 2.387561321258545, - "356": 2.4027962684631348, - "357": 2.377953052520752, - "358": 2.4014976024627686, - "359": 2.3618595600128174, - "360": 2.361713409423828, - "361": 2.382688283920288, - "362": 2.3993608951568604, - "363": 2.388090133666992, - "364": 2.3624143600463867, - "365": 2.3993406295776367, - "366": 2.4330270290374756, - "367": 2.4033923149108887, - "368": 2.4099574089050293, - "369": 2.3658499717712402, - "370": 2.3675289154052734, - "371": 2.3777809143066406, - "372": 2.3918118476867676, - "373": 2.3724188804626465, - "374": 2.3873162269592285, - "375": 2.3658089637756348, - "376": 2.361781120300293, - "377": 2.367701768875122 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 0.0971202, - 0.0973769, - 0.0970475, - 0.0963744, - 0.0960593, - 0.0962918, - 0.0963385, - 0.096829, - 0.0958182, - 0.0952325, - 0.0947031, - 0.0937028, - 0.0938035, - 0.0929156, - 0.0938305, - 0.0953838, - 0.0956841, - 0.0955979, - 0.0952082, - 0.09446, - 0.0944543, - 0.0951734, - 0.0955427, - 0.095627, - 0.0956258, - 0.0958764, - 0.0959393, - 0.0952991, - 0.094805, - 0.0945051, - 0.0948358, - 0.0946716, - 0.0956599, - 0.0966376, - 0.0969193, - 0.0968673, - 0.0963145, - 0.0950192, - 0.095379, - 0.096275, - 0.0960659, - 0.0943271, - 0.0937686, - 0.0940669, - 0.0955013, - 0.0962211, - 0.0971059, - 0.0963812, - 0.0957575, - 0.0958168, - 0.0960575, - 0.0966132, - 0.0964048, - 0.0960676 - ], - "train_epoch_time": 4.843627691268921, - "train_loss": 2.366916924467046, - "train_score": 0.29962338593119014, - "val_loss": 2.4222577751435588, - "val_score": 0.2854970944735268 - }, - { - "epoch": 7, - "grad_norm": 1.669904112815857, - "learning_rate": 0.1, - "model_norm": 87.52935028076172, - "step_logs": { - "grad_norm": { - "378": 1.4751824140548706, - "379": 1.554626703262329, - "380": 1.6681768894195557, - "381": 1.383451223373413, - "382": 1.116145372390747, - "383": 1.165587067604065, - "384": 1.4156062602996826, - "385": 1.8636415004730225, - "386": 1.8343044519424438, - "387": 1.7075082063674927, - "388": 1.7049862146377563, - "389": 1.808707594871521, - "390": 1.6509422063827515, - "391": 1.3917895555496216, - "392": 1.5421075820922852, - "393": 1.5486865043640137, - "394": 1.4522162675857544, - "395": 1.6417620182037354, - "396": 1.6343942880630493, - "397": 1.2589577436447144, - "398": 1.207227110862732, - "399": 1.4624335765838623, - "400": 1.4087663888931274, - "401": 1.358626365661621, - "402": 1.3215059041976929, - "403": 1.2855933904647827, - "404": 1.4452123641967773, - "405": 1.6052252054214478, - "406": 1.5231980085372925, - "407": 1.3681321144104004, - "408": 1.5055698156356812, - "409": 1.5484745502471924, - "410": 1.7293670177459717, - "411": 1.4456441402435303, - "412": 1.3399046659469604, - "413": 1.1268364191055298, - "414": 1.0692131519317627, - "415": 1.0324090719223022, - "416": 0.9636962413787842, - "417": 1.118695855140686, - "418": 1.3339556455612183, - "419": 1.4107352495193481, - "420": 1.6140360832214355, - "421": 1.7020269632339478, - "422": 1.5629189014434814, - "423": 1.5723743438720703, - "424": 1.4688177108764648, - "425": 1.4020779132843018, - "426": 1.548861026763916, - "427": 1.7145603895187378, - "428": 1.7066810131072998, - "429": 1.66238272190094, - "430": 1.5904337167739868, - "431": 1.669904112815857 - }, - "loss": { - "378": 2.371570110321045, - "379": 2.367591619491577, - "380": 2.3902666568756104, - "381": 2.361928939819336, - "382": 2.3372890949249268, - "383": 2.3406941890716553, - "384": 2.3528828620910645, - "385": 2.374268054962158, - "386": 2.4097800254821777, - "387": 2.3714141845703125, - "388": 2.3969037532806396, - "389": 2.3816514015197754, - "390": 2.401012659072876, - "391": 2.3565120697021484, - "392": 2.353564977645874, - "393": 2.3646492958068848, - "394": 2.353878974914551, - "395": 2.3482301235198975, - "396": 2.3860223293304443, - "397": 2.3507814407348633, - "398": 2.3687691688537598, - "399": 2.3317477703094482, - "400": 2.36746883392334, - "401": 2.3564627170562744, - "402": 2.358987331390381, - "403": 2.350877523422241, - "404": 2.360438346862793, - "405": 2.370955228805542, - "406": 2.381950616836548, - "407": 2.3405771255493164, - "408": 2.3413681983947754, - "409": 2.377681016921997, - "410": 2.3699045181274414, - "411": 2.3864994049072266, - "412": 2.359920024871826, - "413": 2.3440327644348145, - "414": 2.335439682006836, - "415": 2.3359432220458984, - "416": 2.321115255355835, - "417": 2.324124813079834, - "418": 2.326892137527466, - "419": 2.365225076675415, - "420": 2.3420863151550293, - "421": 2.362074851989746, - "422": 2.3602867126464844, - "423": 2.3613364696502686, - "424": 2.359339475631714, - "425": 2.3359060287475586, - "426": 2.348653793334961, - "427": 2.3661108016967773, - "428": 2.3529343605041504, - "429": 2.3723464012145996, - "430": 2.3470795154571533, - "431": 2.3493247032165527 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 0.0956132, - 0.0951438, - 0.0944991, - 0.0961061, - 0.0974042, - 0.0971797, - 0.0959155, - 0.0931843, - 0.0934743, - 0.0942087, - 0.0942827, - 0.0935734, - 0.0946289, - 0.0960522, - 0.0951909, - 0.0951734, - 0.0957124, - 0.0945723, - 0.094699, - 0.0967388, - 0.0970155, - 0.095615, - 0.0959772, - 0.096231, - 0.0964306, - 0.0966042, - 0.0957632, - 0.0948461, - 0.0953559, - 0.0961552, - 0.0953829, - 0.0951998, - 0.0940647, - 0.0958051, - 0.0963356, - 0.0973629, - 0.0976109, - 0.0977694, - 0.0980387, - 0.0973782, - 0.0963172, - 0.0959627, - 0.0947315, - 0.0942222, - 0.09508, - 0.0950253, - 0.0956278, - 0.0959621, - 0.095141, - 0.0941512, - 0.0941712, - 0.0944961, - 0.094887, - 0.0943976 - ], - "train_epoch_time": 4.8406760692596436, - "train_loss": 2.3526640700472994, - "train_score": 0.30506411416287743, - "val_loss": 2.3984971511788262, - "val_score": 0.29477612008730936 - }, - { - "epoch": 8, - "grad_norm": 1.352026104927063, - "learning_rate": 0.1, - "model_norm": 87.54547882080078, - "step_logs": { - "grad_norm": { - "432": 1.6159244775772095, - "433": 1.5814199447631836, - "434": 1.7194830179214478, - "435": 1.6321874856948853, - "436": 1.5271730422973633, - "437": 1.5218703746795654, - "438": 1.6716121435165405, - "439": 1.7178452014923096, - "440": 1.6843918561935425, - "441": 1.4707714319229126, - "442": 1.4967799186706543, - "443": 1.7388938665390015, - "444": 1.769057273864746, - "445": 1.697637677192688, - "446": 1.6095563173294067, - "447": 1.444825530052185, - "448": 1.3334919214248657, - "449": 1.3897777795791626, - "450": 1.3749045133590698, - "451": 1.1873587369918823, - "452": 1.1644511222839355, - "453": 1.2821862697601318, - "454": 1.3721824884414673, - "455": 1.4244450330734253, - "456": 1.5382767915725708, - "457": 1.5794689655303955, - "458": 1.7272545099258423, - "459": 1.818613886833191, - "460": 1.7755937576293945, - "461": 1.6588903665542603, - "462": 1.5850425958633423, - "463": 1.5457450151443481, - "464": 1.5867552757263184, - "465": 1.4877558946609497, - "466": 1.206352710723877, - "467": 1.1839745044708252, - "468": 1.3052968978881836, - "469": 1.3289339542388916, - "470": 1.4100149869918823, - "471": 1.4546879529953003, - "472": 1.489277720451355, - "473": 1.5010201930999756, - "474": 1.4384151697158813, - "475": 1.5987670421600342, - "476": 1.5114128589630127, - "477": 1.5496670007705688, - "478": 1.685840368270874, - "479": 1.6992778778076172, - "480": 1.5443183183670044, - "481": 1.3526712656021118, - "482": 1.2337331771850586, - "483": 1.2588777542114258, - "484": 1.3107843399047852, - "485": 1.352026104927063 - }, - "loss": { - "432": 2.3697707653045654, - "433": 2.352079391479492, - "434": 2.352780818939209, - "435": 2.3681323528289795, - "436": 2.3505606651306152, - "437": 2.347172737121582, - "438": 2.354395866394043, - "439": 2.3686418533325195, - "440": 2.3710150718688965, - "441": 2.3649792671203613, - "442": 2.321746349334717, - "443": 2.356660842895508, - "444": 2.3641395568847656, - "445": 2.3779103755950928, - "446": 2.336223602294922, - "447": 2.325019121170044, - "448": 2.317532539367676, - "449": 2.3175973892211914, - "450": 2.33904767036438, - "451": 2.314915895462036, - "452": 2.297523021697998, - "453": 2.3162975311279297, - "454": 2.3309273719787598, - "455": 2.3304758071899414, - "456": 2.325908660888672, - "457": 2.350257635116577, - "458": 2.346595048904419, - "459": 2.3654356002807617, - "460": 2.338963508605957, - "461": 2.3616232872009277, - "462": 2.332193374633789, - "463": 2.3296713829040527, - "464": 2.3327627182006836, - "465": 2.3536195755004883, - "466": 2.315582275390625, - "467": 2.3132290840148926, - "468": 2.3035173416137695, - "469": 2.317366123199463, - "470": 2.302553176879883, - "471": 2.3296689987182617, - "472": 2.325779438018799, - "473": 2.3288848400115967, - "474": 2.3056187629699707, - "475": 2.317692756652832, - "476": 2.304396390914917, - "477": 2.3225655555725098, - "478": 2.3388824462890625, - "479": 2.3441243171691895, - "480": 2.3449299335479736, - "481": 2.31307053565979, - "482": 2.2947025299072266, - "483": 2.292106866836548, - "484": 2.3105478286743164, - "485": 2.3324155807495117 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 0.0947783, - 0.094952, - 0.0940882, - 0.0946748, - 0.0952734, - 0.0952982, - 0.0943982, - 0.094136, - 0.0943547, - 0.0956267, - 0.0953974, - 0.0939714, - 0.0937921, - 0.0942864, - 0.0947467, - 0.0957036, - 0.0963053, - 0.0959997, - 0.0961161, - 0.0970449, - 0.0971337, - 0.0965729, - 0.0961179, - 0.0958283, - 0.0951594, - 0.0949601, - 0.0940231, - 0.0934658, - 0.0936859, - 0.0944944, - 0.094889, - 0.0951221, - 0.0948797, - 0.095509, - 0.0969534, - 0.0970592, - 0.0964336, - 0.0963294, - 0.0958614, - 0.0956556, - 0.0954488, - 0.095386, - 0.0957057, - 0.0947739, - 0.0952775, - 0.0950843, - 0.0942723, - 0.0941982, - 0.0951608, - 0.0961953, - 0.0967899, - 0.0966585, - 0.0964152, - 0.0962291 - ], - "train_epoch_time": 4.8405914306640625, - "train_loss": 2.303981245470526, - "train_score": 0.32044252157211306, - "val_loss": 2.368876140509079, - "val_score": 0.3031357642087816 - }, - { - "epoch": 9, - "grad_norm": 1.4082812070846558, - "learning_rate": 0.1, - "model_norm": 87.56199645996094, - "step_logs": { - "grad_norm": { - "486": 1.2864775657653809, - "487": 1.2649035453796387, - "488": 1.3978171348571777, - "489": 1.4597502946853638, - "490": 1.3774306774139404, - "491": 1.245689034461975, - "492": 1.214050531387329, - "493": 1.2781760692596436, - "494": 1.3233747482299805, - "495": 1.5580949783325195, - "496": 1.7756626605987549, - "497": 1.6769715547561646, - "498": 1.3581280708312988, - "499": 1.22910475730896, - "500": 1.1415377855300903, - "501": 1.0999529361724854, - "502": 1.2983170747756958, - "503": 1.6977559328079224, - "504": 1.830295443534851, - "505": 1.7282837629318237, - "506": 1.6115379333496094, - "507": 1.5725210905075073, - "508": 1.5280147790908813, - "509": 1.4653064012527466, - "510": 1.4042085409164429, - "511": 1.4489576816558838, - "512": 1.5666319131851196, - "513": 1.597487211227417, - "514": 1.3567883968353271, - "515": 1.2098150253295898, - "516": 1.2113839387893677, - "517": 1.4553508758544922, - "518": 1.5904985666275024, - "519": 1.6239534616470337, - "520": 1.5390082597732544, - "521": 1.5543642044067383, - "522": 1.6086974143981934, - "523": 1.4266736507415771, - "524": 1.320531964302063, - "525": 1.3625422716140747, - "526": 1.5260429382324219, - "527": 1.6582518815994263, - "528": 1.6923820972442627, - "529": 1.572224497795105, - "530": 1.6293480396270752, - "531": 1.664021611213684, - "532": 1.5834639072418213, - "533": 1.4302923679351807, - "534": 1.3162269592285156, - "535": 1.4563848972320557, - "536": 1.493881106376648, - "537": 1.4727799892425537, - "538": 1.3968660831451416, - "539": 1.4082812070846558 - }, - "loss": { - "486": 2.305068016052246, - "487": 2.299149990081787, - "488": 2.285799503326416, - "489": 2.3291726112365723, - "490": 2.2877230644226074, - "491": 2.323141574859619, - "492": 2.287027359008789, - "493": 2.3029165267944336, - "494": 2.308023452758789, - "495": 2.322627544403076, - "496": 2.3242156505584717, - "497": 2.332921028137207, - "498": 2.310991048812866, - "499": 2.2614879608154297, - "500": 2.2862839698791504, - "501": 2.2918813228607178, - "502": 2.2997522354125977, - "503": 2.3261070251464844, - "504": 2.3541808128356934, - "505": 2.3150224685668945, - "506": 2.3013603687286377, - "507": 2.321082592010498, - "508": 2.3025972843170166, - "509": 2.3107120990753174, - "510": 2.306215286254883, - "511": 2.294032573699951, - "512": 2.305708408355713, - "513": 2.330054521560669, - "514": 2.3123176097869873, - "515": 2.281512975692749, - "516": 2.277214527130127, - "517": 2.293863296508789, - "518": 2.3103156089782715, - "519": 2.304133415222168, - "520": 2.3078079223632812, - "521": 2.295689582824707, - "522": 2.3106651306152344, - "523": 2.3192057609558105, - "524": 2.2956135272979736, - "525": 2.3030943870544434, - "526": 2.298799991607666, - "527": 2.288393497467041, - "528": 2.2966222763061523, - "529": 2.2845492362976074, - "530": 2.3210959434509277, - "531": 2.30497407913208, - "532": 2.30173397064209, - "533": 2.306532859802246, - "534": 2.261338949203491, - "535": 2.2612709999084473, - "536": 2.282104969024658, - "537": 2.2685036659240723, - "538": 2.269813060760498, - "539": 2.2653915882110596 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 0.0965345, - 0.0966375, - 0.0959012, - 0.0956258, - 0.0960184, - 0.0967682, - 0.0968783, - 0.0965744, - 0.0963447, - 0.0950335, - 0.093648, - 0.0943154, - 0.0961624, - 0.0967679, - 0.0972291, - 0.0974283, - 0.0964648, - 0.0941658, - 0.0933576, - 0.0939397, - 0.0946589, - 0.0949425, - 0.0951747, - 0.0955603, - 0.0959003, - 0.0956243, - 0.0949467, - 0.0948081, - 0.0961718, - 0.0968921, - 0.0968785, - 0.095587, - 0.0948094, - 0.094587, - 0.0951189, - 0.0950009, - 0.0946971, - 0.0957963, - 0.0963409, - 0.0961257, - 0.0951789, - 0.0943324, - 0.0941304, - 0.0948676, - 0.0945906, - 0.0943338, - 0.0948347, - 0.0957537, - 0.0963107, - 0.0955201, - 0.0953384, - 0.0954373, - 0.0958789, - 0.0958063 - ], - "train_epoch_time": 4.841122388839722, - "train_loss": 2.2861214772529546, - "train_score": 0.32931088598249975, - "val_loss": 2.3297110687851497, - "val_score": 0.3184513128036473 - }, - { - "epoch": 10, - "grad_norm": 1.5447317361831665, - "learning_rate": 0.1, - "model_norm": 87.57910919189453, - "step_logs": { - "grad_norm": { - "540": 1.4496456384658813, - "541": 1.428707480430603, - "542": 1.3890002965927124, - "543": 1.3327672481536865, - "544": 1.3495399951934814, - "545": 1.4422661066055298, - "546": 1.4840099811553955, - "547": 1.4972702264785767, - "548": 1.4662714004516602, - "549": 1.5260636806488037, - "550": 1.3724501132965088, - "551": 1.175660252571106, - "552": 1.1435974836349487, - "553": 1.265898585319519, - "554": 1.3222699165344238, - "555": 1.3145053386688232, - "556": 1.4351242780685425, - "557": 1.523789405822754, - "558": 1.6304676532745361, - "559": 1.8852672576904297, - "560": 2.408698320388794, - "561": 1.6329765319824219, - "562": 1.2162243127822876, - "563": 1.349126935005188, - "564": 1.541892170906067, - "565": 1.5408660173416138, - "566": 1.482460379600525, - "567": 1.35426926612854, - "568": 1.4242154359817505, - "569": 1.4016752243041992, - "570": 1.491642713546753, - "571": 1.603078842163086, - "572": 1.5918983221054077, - "573": 1.5547316074371338, - "574": 1.5848643779754639, - "575": 1.6563140153884888, - "576": 1.4822837114334106, - "577": 1.3219908475875854, - "578": 1.351645827293396, - "579": 1.2938183546066284, - "580": 1.182682752609253, - "581": 1.1695317029953003, - "582": 1.1982420682907104, - "583": 1.2221649885177612, - "584": 1.2715436220169067, - "585": 1.2286652326583862, - "586": 1.1889058351516724, - "587": 1.1929199695587158, - "588": 1.2656303644180298, - "589": 1.4840818643569946, - "590": 1.5940841436386108, - "591": 1.5778687000274658, - "592": 1.5546985864639282, - "593": 1.5447317361831665 - }, - "loss": { - "540": 2.2956197261810303, - "541": 2.275216579437256, - "542": 2.293718099594116, - "543": 2.2971489429473877, - "544": 2.2522454261779785, - "545": 2.2696895599365234, - "546": 2.2697768211364746, - "547": 2.2869839668273926, - "548": 2.2796876430511475, - "549": 2.2783961296081543, - "550": 2.290621280670166, - "551": 2.2746129035949707, - "552": 2.248746395111084, - "553": 2.2493629455566406, - "554": 2.258431911468506, - "555": 2.281113862991333, - "556": 2.268069267272949, - "557": 2.2675845623016357, - "558": 2.2552695274353027, - "559": 2.310277223587036, - "560": 2.27936053276062, - "561": 2.3375160694122314, - "562": 2.2768137454986572, - "563": 2.259498119354248, - "564": 2.2936599254608154, - "565": 2.2451322078704834, - "566": 2.255136489868164, - "567": 2.245969533920288, - "568": 2.272033452987671, - "569": 2.2828240394592285, - "570": 2.2702159881591797, - "571": 2.2861099243164062, - "572": 2.2712438106536865, - "573": 2.2878823280334473, - "574": 2.2461040019989014, - "575": 2.2610225677490234, - "576": 2.251922845840454, - "577": 2.249603271484375, - "578": 2.254251718521118, - "579": 2.2607345581054688, - "580": 2.2414402961730957, - "581": 2.241957187652588, - "582": 2.242112398147583, - "583": 2.219125270843506, - "584": 2.249091148376465, - "585": 2.229884624481201, - "586": 2.224472999572754, - "587": 2.2105965614318848, - "588": 2.231696605682373, - "589": 2.2200279235839844, - "590": 2.259990692138672, - "591": 2.258254289627075, - "592": 2.2463722229003906, - "593": 2.2557711601257324 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 0.0956232, - 0.0957068, - 0.0959641, - 0.0962777, - 0.0961139, - 0.0956184, - 0.0953731, - 0.0953277, - 0.0954969, - 0.0951377, - 0.0960508, - 0.0970513, - 0.0971743, - 0.0965604, - 0.0962734, - 0.0963508, - 0.0956568, - 0.0951295, - 0.0944342, - 0.0928572, - 0.08871, - 0.0946038, - 0.0968538, - 0.0961282, - 0.0950727, - 0.094978, - 0.0953538, - 0.0960772, - 0.0957269, - 0.0958743, - 0.0953285, - 0.0946785, - 0.094716, - 0.0949825, - 0.0947046, - 0.0942803, - 0.0953485, - 0.0962609, - 0.0961056, - 0.0964299, - 0.0969742, - 0.0970398, - 0.0968975, - 0.0967441, - 0.0965303, - 0.0967259, - 0.0969207, - 0.0968817, - 0.0965355, - 0.0952739, - 0.0946773, - 0.0947756, - 0.0948947, - 0.0949766 - ], - "train_epoch_time": 4.840698480606079, - "train_loss": 2.2450779760927175, - "train_score": 0.34053084642507425, - "val_loss": 2.3052828722514733, - "val_score": 0.3245595938576349 - }, - { - "epoch": 11, - "grad_norm": 1.2617815732955933, - "learning_rate": 0.1, - "model_norm": 87.59596252441406, - "step_logs": { - "grad_norm": { - "594": 1.588753581047058, - "595": 1.5247321128845215, - "596": 1.5911153554916382, - "597": 1.775584101676941, - "598": 1.8242466449737549, - "599": 1.8073973655700684, - "600": 1.5275120735168457, - "601": 1.3717238903045654, - "602": 1.3489559888839722, - "603": 1.3506227731704712, - "604": 1.3558343648910522, - "605": 1.2104554176330566, - "606": 1.1499735116958618, - "607": 1.1806302070617676, - "608": 1.1823066473007202, - "609": 1.1349818706512451, - "610": 1.1166205406188965, - "611": 1.16506826877594, - "612": 1.3397775888442993, - "613": 1.5055805444717407, - "614": 1.5274930000305176, - "615": 1.457973599433899, - "616": 1.5001298189163208, - "617": 1.4768356084823608, - "618": 1.4328608512878418, - "619": 1.4925471544265747, - "620": 1.7491965293884277, - "621": 1.9343494176864624, - "622": 1.8554956912994385, - "623": 1.6830286979675293, - "624": 1.617335557937622, - "625": 1.7890456914901733, - "626": 1.7231693267822266, - "627": 1.5459706783294678, - "628": 1.5641156435012817, - "629": 1.757225513458252, - "630": 1.7565991878509521, - "631": 1.540608525276184, - "632": 1.3137248754501343, - "633": 1.271006464958191, - "634": 1.3354873657226562, - "635": 1.3851101398468018, - "636": 1.3646599054336548, - "637": 1.2995293140411377, - "638": 1.2750593423843384, - "639": 1.447117805480957, - "640": 1.653588891029358, - "641": 1.9522098302841187, - "642": 1.6688272953033447, - "643": 1.3668769598007202, - "644": 1.4728426933288574, - "645": 1.5510808229446411, - "646": 1.4057600498199463, - "647": 1.2617815732955933 - }, - "loss": { - "594": 2.248753547668457, - "595": 2.270047187805176, - "596": 2.249763011932373, - "597": 2.2486512660980225, - "598": 2.2625999450683594, - "599": 2.257965087890625, - "600": 2.231330394744873, - "601": 2.2484304904937744, - "602": 2.23899507522583, - "603": 2.2469663619995117, - "604": 2.2197325229644775, - "605": 2.2334465980529785, - "606": 2.215475559234619, - "607": 2.2137811183929443, - "608": 2.1991682052612305, - "609": 2.2368717193603516, - "610": 2.2260210514068604, - "611": 2.194429397583008, - "612": 2.232891321182251, - "613": 2.225104570388794, - "614": 2.2434089183807373, - "615": 2.240908145904541, - "616": 2.2376346588134766, - "617": 2.262025833129883, - "618": 2.267366409301758, - "619": 2.251049518585205, - "620": 2.246934413909912, - "621": 2.285754442214966, - "622": 2.2500481605529785, - "623": 2.253274917602539, - "624": 2.2515296936035156, - "625": 2.238271713256836, - "626": 2.2604024410247803, - "627": 2.2659225463867188, - "628": 2.2380685806274414, - "629": 2.248012065887451, - "630": 2.2320353984832764, - "631": 2.2373127937316895, - "632": 2.2257747650146484, - "633": 2.21968936920166, - "634": 2.200162887573242, - "635": 2.205569267272949, - "636": 2.226161003112793, - "637": 2.232771158218384, - "638": 2.227863073348999, - "639": 2.191601037979126, - "640": 2.2179627418518066, - "641": 2.2556052207946777, - "642": 2.2579398155212402, - "643": 2.2348270416259766, - "644": 2.222165584564209, - "645": 2.2351973056793213, - "646": 2.2275376319885254, - "647": 2.2061872482299805 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 0.0946859, - 0.0951288, - 0.0946732, - 0.093449, - 0.0931497, - 0.0932543, - 0.0950313, - 0.0959838, - 0.0960951, - 0.0960991, - 0.0960239, - 0.096824, - 0.0971019, - 0.0969479, - 0.0969198, - 0.0972012, - 0.0972757, - 0.097, - 0.0961359, - 0.0951532, - 0.0950569, - 0.0954719, - 0.0952123, - 0.0954007, - 0.0956686, - 0.0952852, - 0.0936254, - 0.0924344, - 0.0928931, - 0.0940862, - 0.09451, - 0.0933272, - 0.0938367, - 0.0949904, - 0.0948177, - 0.0935734, - 0.0935347, - 0.0949629, - 0.0962677, - 0.0964888, - 0.0961047, - 0.095832, - 0.0959852, - 0.096356, - 0.0964797, - 0.0954402, - 0.0941938, - 0.09221, - 0.0941911, - 0.0959876, - 0.0953462, - 0.0948931, - 0.0957526, - 0.0965174 - ], - "train_epoch_time": 4.84069037437439, - "train_loss": 2.2040659528893753, - "train_score": 0.3523459916306363, - "val_loss": 2.2799425461809615, - "val_score": 0.335758109351387 - }, - { - "epoch": 12, - "grad_norm": 0.9351850748062134, - "learning_rate": 0.1, - "model_norm": 87.61099243164062, - "step_logs": { - "grad_norm": { - "648": 1.3094134330749512, - "649": 1.3486777544021606, - "650": 1.4072928428649902, - "651": 1.5142947435379028, - "652": 1.5094581842422485, - "653": 1.3887529373168945, - "654": 1.4224172830581665, - "655": 1.5083428621292114, - "656": 1.5954478979110718, - "657": 1.5511927604675293, - "658": 1.4117122888565063, - "659": 1.3409422636032104, - "660": 1.2231510877609253, - "661": 1.237666368484497, - "662": 1.2927314043045044, - "663": 1.2787615060806274, - "664": 1.2403388023376465, - "665": 1.2749487161636353, - "666": 1.264051914215088, - "667": 1.1782317161560059, - "668": 1.133363962173462, - "669": 1.0662480592727661, - "670": 1.0082484483718872, - "671": 0.9322500824928284, - "672": 0.9343487024307251, - "673": 1.0088428258895874, - "674": 1.0157840251922607, - "675": 0.8826058506965637, - "676": 0.8474686741828918, - "677": 0.8494150042533875, - "678": 0.9389997720718384, - "679": 1.0475910902023315, - "680": 1.0899783372879028, - "681": 1.17728590965271, - "682": 1.20793616771698, - "683": 1.1445024013519287, - "684": 1.1728129386901855, - "685": 1.3455970287322998, - "686": 1.2690045833587646, - "687": 1.0965877771377563, - "688": 1.0773963928222656, - "689": 1.0435794591903687, - "690": 1.0178760290145874, - "691": 1.0164440870285034, - "692": 1.0642462968826294, - "693": 1.0559436082839966, - "694": 0.9758284091949463, - "695": 1.0340592861175537, - "696": 1.075991153717041, - "697": 1.0048353672027588, - "698": 1.035723090171814, - "699": 1.1538376808166504, - "700": 1.0841097831726074, - "701": 0.9351850748062134 - }, - "loss": { - "648": 2.202803611755371, - "649": 2.2173171043395996, - "650": 2.181130886077881, - "651": 2.2152657508850098, - "652": 2.229952812194824, - "653": 2.2089314460754395, - "654": 2.208660840988159, - "655": 2.2242431640625, - "656": 2.226830005645752, - "657": 2.2219443321228027, - "658": 2.209901809692383, - "659": 2.211392879486084, - "660": 2.195700168609619, - "661": 2.1754508018493652, - "662": 2.2183332443237305, - "663": 2.2171554565429688, - "664": 2.198988437652588, - "665": 2.209177017211914, - "666": 2.1785616874694824, - "667": 2.16093373298645, - "668": 2.1865477561950684, - "669": 2.1781415939331055, - "670": 2.1762661933898926, - "671": 2.1845035552978516, - "672": 2.1780917644500732, - "673": 2.1550164222717285, - "674": 2.176767587661743, - "675": 2.1646058559417725, - "676": 2.134427547454834, - "677": 2.1593589782714844, - "678": 2.1512317657470703, - "679": 2.180830478668213, - "680": 2.173267364501953, - "681": 2.1815433502197266, - "682": 2.1676177978515625, - "683": 2.177546977996826, - "684": 2.179234504699707, - "685": 2.1747994422912598, - "686": 2.195272445678711, - "687": 2.203248977661133, - "688": 2.168781280517578, - "689": 2.156891345977783, - "690": 2.158818006515503, - "691": 2.1580328941345215, - "692": 2.140256404876709, - "693": 2.1329727172851562, - "694": 2.1519196033477783, - "695": 2.170727252960205, - "696": 2.1416468620300293, - "697": 2.178642749786377, - "698": 2.1451587677001953, - "699": 2.1795005798339844, - "700": 2.166288375854492, - "701": 2.142151355743408 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 0.096254, - 0.0954902, - 0.0945269, - 0.0934034, - 0.0929019, - 0.0929798, - 0.0922284, - 0.0912155, - 0.090163, - 0.0898497, - 0.0900187, - 0.0898066, - 0.0897611, - 0.0890904, - 0.0883188, - 0.0878027, - 0.0873691, - 0.0866528, - 0.0860829, - 0.0858378, - 0.0854541, - 0.0851039, - 0.08471, - 0.0843626, - 0.0837553, - 0.0829122, - 0.0823128, - 0.0821022, - 0.0815807, - 0.080988, - 0.0801432, - 0.0792517, - 0.0785245, - 0.077665, - 0.0769656, - 0.0765892, - 0.0759144, - 0.0747592, - 0.074453, - 0.0743847, - 0.0738209, - 0.073309, - 0.0727804, - 0.0721875, - 0.0714622, - 0.0708841, - 0.0704882, - 0.0697678, - 0.0690569, - 0.0686436, - 0.067961, - 0.067114, - 0.0666731, - 0.0663723 - ], - "train_epoch_time": 4.840441703796387, - "train_loss": 2.145929629012536, - "train_score": 0.36693754481790397, - "val_loss": 2.2230640298052853, - "val_score": 0.34471423012107166 - }, - { - "epoch": 13, - "grad_norm": 0.7406230568885803, - "learning_rate": 0.06666666666666668, - "model_norm": 87.62065124511719, - "step_logs": { - "grad_norm": { - "702": 0.9913109540939331, - "703": 0.9523283839225769, - "704": 0.8870086669921875, - "705": 0.7779407501220703, - "706": 0.7760103940963745, - "707": 0.8092449903488159, - "708": 0.8593055009841919, - "709": 0.8720701932907104, - "710": 0.7881453037261963, - "711": 0.813335120677948, - "712": 0.8608036041259766, - "713": 0.9568566679954529, - "714": 0.9770629405975342, - "715": 0.9285096526145935, - "716": 0.8567588925361633, - "717": 0.7760965824127197, - "718": 0.7677116990089417, - "719": 0.768796443939209, - "720": 0.8074935674667358, - "721": 0.8065921664237976, - "722": 0.8056923747062683, - "723": 0.7458757162094116, - "724": 0.7424354553222656, - "725": 0.8486406207084656, - "726": 0.8646981716156006, - "727": 0.902239978313446, - "728": 0.8873850107192993, - "729": 0.8135435581207275, - "730": 0.7287591695785522, - "731": 0.7761465311050415, - "732": 0.7969075441360474, - "733": 0.7741085290908813, - "734": 0.7060049772262573, - "735": 0.6913495063781738, - "736": 0.7907267212867737, - "737": 0.8001796007156372, - "738": 0.7330057621002197, - "739": 0.7804092764854431, - "740": 0.7430421113967896, - "741": 0.7955508828163147, - "742": 0.7392135262489319, - "743": 0.8314844369888306, - "744": 0.8225041031837463, - "745": 0.7510942816734314, - "746": 0.7440597414970398, - "747": 0.7147427201271057, - "748": 0.6901541352272034, - "749": 0.737075924873352, - "750": 0.7721534371376038, - "751": 0.7181875109672546, - "752": 0.7882216572761536, - "753": 0.7213799953460693, - "754": 0.7337266802787781, - "755": 0.7406230568885803 - }, - "loss": { - "702": 2.136104106903076, - "703": 2.1402642726898193, - "704": 2.127439498901367, - "705": 2.1252241134643555, - "706": 2.1404170989990234, - "707": 2.1388142108917236, - "708": 2.1538124084472656, - "709": 2.137284278869629, - "710": 2.139042854309082, - "711": 2.140145778656006, - "712": 2.143686294555664, - "713": 2.150625705718994, - "714": 2.1344122886657715, - "715": 2.156564950942993, - "716": 2.126880168914795, - "717": 2.1501851081848145, - "718": 2.144930362701416, - "719": 2.1238722801208496, - "720": 2.1252119541168213, - "721": 2.1325180530548096, - "722": 2.1329076290130615, - "723": 2.1131508350372314, - "724": 2.1258697509765625, - "725": 2.1209659576416016, - "726": 2.1325011253356934, - "727": 2.1062605381011963, - "728": 2.1219520568847656, - "729": 2.1355714797973633, - "730": 2.1436898708343506, - "731": 2.1131651401519775, - "732": 2.1354570388793945, - "733": 2.1211252212524414, - "734": 2.110975503921509, - "735": 2.1173393726348877, - "736": 2.122076988220215, - "737": 2.099818229675293, - "738": 2.115570068359375, - "739": 2.1291046142578125, - "740": 2.133324146270752, - "741": 2.11545467376709, - "742": 2.144829273223877, - "743": 2.1413776874542236, - "744": 2.1241583824157715, - "745": 2.1375081539154053, - "746": 2.114259719848633, - "747": 2.122079849243164, - "748": 2.10994291305542, - "749": 2.078666925430298, - "750": 2.1118717193603516, - "751": 2.103835344314575, - "752": 2.102637767791748, - "753": 2.1102163791656494, - "754": 2.105670928955078, - "755": 2.1023080348968506 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 0.0656598, - 0.0651378, - 0.0646499, - 0.0642221, - 0.063623, - 0.0629673, - 0.0622907, - 0.0616617, - 0.06118, - 0.0605393, - 0.0598679, - 0.059123, - 0.0584842, - 0.0579626, - 0.0574495, - 0.0569495, - 0.0563505, - 0.0557372, - 0.0550861, - 0.0544817, - 0.0538756, - 0.0533267, - 0.0527236, - 0.0520059, - 0.0513848, - 0.0507323, - 0.0501463, - 0.0496156, - 0.0490825, - 0.0484288, - 0.0478059, - 0.0472139, - 0.0466552, - 0.0460556, - 0.0453737, - 0.0447543, - 0.044195, - 0.0435541, - 0.0429696, - 0.0423229, - 0.0417521, - 0.0410837, - 0.0404781, - 0.0399121, - 0.0393029, - 0.0387077, - 0.038107, - 0.0374699, - 0.0368444, - 0.0362579, - 0.0356141, - 0.0350332, - 0.0344158, - 0.0338009 - ], - "train_epoch_time": 4.840989112854004, - "train_loss": 2.1088773102124074, - "train_score": 0.37540127322013617, - "val_loss": 2.190773714560455, - "val_score": 0.35245048770537746 - }, - { - "epoch": 14, - "grad_norm": 0.6430807709693909, - "learning_rate": 0.03333333333333334, - "model_norm": 87.62384033203125, - "step_logs": { - "grad_norm": { - "756": 0.7788354158401489, - "757": 0.7368332743644714, - "758": 0.7312880158424377, - "759": 0.7624044418334961, - "760": 0.7455044388771057, - "761": 0.6700214147567749, - "762": 0.6613816618919373, - "763": 0.6746414303779602, - "764": 0.7023569345474243, - "765": 0.7054687142372131, - "766": 0.7379094362258911, - "767": 0.6996170878410339, - "768": 0.7023919224739075, - "769": 0.6621960401535034, - "770": 0.6923285722732544, - "771": 0.6755625009536743, - "772": 0.6708892583847046, - "773": 0.7081537246704102, - "774": 0.6640799641609192, - "775": 0.6700623631477356, - "776": 0.6931478977203369, - "777": 0.6490085124969482, - "778": 0.6820791363716125, - "779": 0.6616635918617249, - "780": 0.6551328897476196, - "781": 0.6683677434921265, - "782": 0.7173438668251038, - "783": 0.7942193746566772, - "784": 0.6706562042236328, - "785": 0.657605767250061, - "786": 0.6927198171615601, - "787": 0.721331000328064, - "788": 0.7300679683685303, - "789": 0.6800004243850708, - "790": 0.6603149175643921, - "791": 0.684819757938385, - "792": 0.6196650266647339, - "793": 0.6337314248085022, - "794": 0.6710205078125, - "795": 0.6451219320297241, - "796": 0.6872656345367432, - "797": 0.6542031764984131, - "798": 0.6223916411399841, - "799": 0.6473522782325745, - "800": 0.6199365854263306, - "801": 0.6282517910003662, - "802": 0.6723541617393494, - "803": 0.6211723685264587, - "804": 0.6353791952133179, - "805": 0.6930897831916809, - "806": 0.6347803473472595, - "807": 0.6613824963569641, - "808": 0.7201979756355286, - "809": 0.6430807709693909 - }, - "loss": { - "756": 2.1183676719665527, - "757": 2.1099693775177, - "758": 2.113245964050293, - "759": 2.110524892807007, - "760": 2.0794217586517334, - "761": 2.114016056060791, - "762": 2.1083898544311523, - "763": 2.098907470703125, - "764": 2.09934139251709, - "765": 2.1003122329711914, - "766": 2.1237707138061523, - "767": 2.1085312366485596, - "768": 2.1284918785095215, - "769": 2.0959107875823975, - "770": 2.105087995529175, - "771": 2.086397647857666, - "772": 2.1112520694732666, - "773": 2.1046180725097656, - "774": 2.1233530044555664, - "775": 2.11547589302063, - "776": 2.0912961959838867, - "777": 2.093679904937744, - "778": 2.120704412460327, - "779": 2.1068384647369385, - "780": 2.110521078109741, - "781": 2.0921778678894043, - "782": 2.083437442779541, - "783": 2.1080901622772217, - "784": 2.100724697113037, - "785": 2.1047897338867188, - "786": 2.0874509811401367, - "787": 2.111093044281006, - "788": 2.0974903106689453, - "789": 2.122121810913086, - "790": 2.084350109100342, - "791": 2.115586519241333, - "792": 2.0995631217956543, - "793": 2.1061604022979736, - "794": 2.1131274700164795, - "795": 2.096116304397583, - "796": 2.096200466156006, - "797": 2.1056478023529053, - "798": 2.0977399349212646, - "799": 2.088071823120117, - "800": 2.08858585357666, - "801": 2.0939793586730957, - "802": 2.089242935180664, - "803": 2.0822339057922363, - "804": 2.08237886428833, - "805": 2.103977680206299, - "806": 2.0992512702941895, - "807": 2.084773540496826, - "808": 2.124077558517456, - "809": 2.1066696643829346 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 0.033175, - 0.0325789, - 0.0319689, - 0.0313456, - 0.0307374, - 0.0301501, - 0.0295388, - 0.0289214, - 0.0283006, - 0.0276867, - 0.0270663, - 0.0264617, - 0.0258483, - 0.0252418, - 0.0246221, - 0.0240109, - 0.0233983, - 0.0227775, - 0.0221711, - 0.0215555, - 0.0209372, - 0.0203287, - 0.0197104, - 0.0190978, - 0.0184837, - 0.0178671, - 0.0172471, - 0.0166252, - 0.0160219, - 0.0154077, - 0.0147896, - 0.0141727, - 0.0135569, - 0.0129447, - 0.0123298, - 0.0117132, - 0.0110998, - 0.0104833, - 0.00986616, - 0.00925076, - 0.00863357, - 0.00801815, - 0.00740235, - 0.0067855, - 0.00616934, - 0.00555265, - 0.00493563, - 0.00431926, - 0.00370237, - 0.00308533, - 0.00246855, - 0.00185149, - 0.00123438, - 0.000617247 - ], - "train_epoch_time": 4.840726137161255, - "train_loss": 2.096882079287273, - "train_score": 0.37770355095822294, - "val_loss": 2.181603769210395, - "val_score": 0.3543924005652952 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:21:58.660459", - "final_model_norm": 87.62384033203125, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:20:16.888493", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.7124191522598267, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.42977142333984, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 9.073990821838379, - "3": 5.383543014526367, - "4": 4.2085280418396, - "5": 7.449563980102539, - "6": 21.270666122436523, - "7": 8.745401382446289, - "8": 5.525934219360352, - "9": 4.254435062408447, - "10": 3.7144956588745117, - "11": 6.272667407989502, - "12": 6.959863185882568, - "13": 3.8103017807006836, - "14": 5.224253177642822, - "15": 6.990877628326416, - "16": 3.912477970123291, - "17": 5.975802898406982, - "18": 16.006542205810547, - "19": 4.558985710144043, - "20": 26.973201751708984, - "21": 4.577878952026367, - "22": 7.547962188720703, - "23": 6.881000518798828, - "24": 3.721916437149048, - "25": 3.037304401397705, - "26": 3.674651622772217, - "27": 3.39523983001709, - "28": 4.316905975341797, - "29": 4.059088706970215, - "30": 3.932001829147339, - "31": 2.15552020072937, - "32": 2.463045358657837, - "33": 3.3573660850524902, - "34": 3.1214869022369385, - "35": 2.7118217945098877, - "36": 3.5217769145965576, - "37": 4.170468807220459, - "38": 4.609957218170166, - "39": 3.713710069656372, - "40": 25.984375, - "41": 2.281721830368042, - "42": 3.208026647567749, - "43": 3.073312520980835, - "44": 3.6890244483947754, - "45": 3.754546642303467, - "46": 3.4417636394500732, - "47": 2.5035364627838135, - "48": 2.6139848232269287, - "49": 2.6245594024658203, - "50": 2.4692258834838867, - "51": 3.0870068073272705, - "52": 2.605820894241333, - "53": 1.7124191522598267 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.969463348388672, - "3": 3.7336039543151855, - "4": 3.6111388206481934, - "5": 3.5463061332702637, - "6": 3.959855556488037, - "7": 3.9910166263580322, - "8": 3.5273513793945312, - "9": 3.487791061401367, - "10": 3.4190897941589355, - "11": 3.400611400604248, - "12": 3.5444602966308594, - "13": 3.259803056716919, - "14": 3.2384300231933594, - "15": 3.2085137367248535, - "16": 3.2316131591796875, - "17": 3.1274282932281494, - "18": 3.285050630569458, - "19": 3.1542654037475586, - "20": 3.476672649383545, - "21": 3.187765598297119, - "22": 3.2425570487976074, - "23": 3.4823873043060303, - "24": 3.005009174346924, - "25": 2.9901604652404785, - "26": 2.8855042457580566, - "27": 2.97104811668396, - "28": 2.924663543701172, - "29": 2.910982131958008, - "30": 3.047665596008301, - "31": 2.7814574241638184, - "32": 2.8077807426452637, - "33": 2.804180860519409, - "34": 2.8649051189422607, - "35": 2.7817745208740234, - "36": 2.811476230621338, - "37": 2.8675894737243652, - "38": 2.974514961242676, - "39": 2.815462112426758, - "40": 3.4331846237182617, - "41": 2.7526211738586426, - "42": 2.756105422973633, - "43": 2.779527187347412, - "44": 2.8949084281921387, - "45": 2.8271660804748535, - "46": 2.934957981109619, - "47": 2.736778736114502, - "48": 2.7559447288513184, - "49": 2.750572919845581, - "50": 2.771946430206299, - "51": 2.7134382724761963, - "52": 2.85939359664917, - "53": 2.663601875305176 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 1e-11, - 0.00179479, - 0.00384067, - 0.00586345, - 0.00784607, - 0.00927433, - 0.00711937, - 0.0123441, - 0.0149637, - 0.0171968, - 0.0192242, - 0.0195161, - 0.0206186, - 0.024577, - 0.025045, - 0.0244204, - 0.0297456, - 0.028473, - 0.0149759, - 0.0337719, - 0.00771405, - 0.036905, - 0.0317337, - 0.0350418, - 0.0432185, - 0.0464197, - 0.0463594, - 0.0488794, - 0.0475215, - 0.0498222, - 0.0520748, - 0.0589475, - 0.0598612, - 0.0582705, - 0.0609518, - 0.0640716, - 0.0621324, - 0.060437, - 0.0597722, - 0.0654888, - 0.00902262, - 0.0760988, - 0.0726122, - 0.0750357, - 0.0729175, - 0.0735068, - 0.0775939, - 0.0848653, - 0.0857903, - 0.0872887, - 0.0900919, - 0.0850629, - 0.0893865, - 0.0947827 - ], - "train_epoch_time": 4.84323263168335, - "train_loss": 2.63499828616379, - "train_score": 0.25480967533468685, - "val_loss": 2.6631805685987153, - "val_score": 0.250995623018383 - }, - { - "epoch": 1, - "grad_norm": 1.8427996635437012, - "learning_rate": 0.1, - "model_norm": 87.4466781616211, - "step_logs": { - "grad_norm": { - "54": 1.7451666593551636, - "55": 2.056056022644043, - "56": 2.4911372661590576, - "57": 3.8065311908721924, - "58": 2.7564525604248047, - "59": 1.779683232307434, - "60": 1.465343952178955, - "61": 1.544689655303955, - "62": 2.3603627681732178, - "63": 2.182406187057495, - "64": 1.9513694047927856, - "65": 2.2473866939544678, - "66": 2.59765625, - "67": 2.127577781677246, - "68": 1.5903594493865967, - "69": 1.9788583517074585, - "70": 2.8934824466705322, - "71": 2.4394099712371826, - "72": 1.713663101196289, - "73": 1.8603097200393677, - "74": 2.2458744049072266, - "75": 2.239811658859253, - "76": 2.100458860397339, - "77": 1.9803333282470703, - "78": 1.9503154754638672, - "79": 2.045192241668701, - "80": 1.9893498420715332, - "81": 1.8273718357086182, - "82": 1.7364015579223633, - "83": 1.7854942083358765, - "84": 1.8950395584106445, - "85": 1.85012686252594, - "86": 1.5653562545776367, - "87": 1.734098196029663, - "88": 2.142047166824341, - "89": 2.030580759048462, - "90": 1.7446129322052002, - "91": 1.810481071472168, - "92": 1.9859819412231445, - "93": 1.8290988206863403, - "94": 1.5478321313858032, - "95": 1.6295554637908936, - "96": 2.0320796966552734, - "97": 2.0534605979919434, - "98": 1.738858699798584, - "99": 1.7014224529266357, - "100": 1.842415452003479, - "101": 1.8293942213058472, - "102": 2.0938055515289307, - "103": 1.8832848072052002, - "104": 1.3503742218017578, - "105": 1.5346651077270508, - "106": 2.1024107933044434, - "107": 1.8427996635437012 - }, - "loss": { - "54": 2.6397061347961426, - "55": 2.6529054641723633, - "56": 2.7125747203826904, - "57": 2.7431840896606445, - "58": 2.9066057205200195, - "59": 2.655151844024658, - "60": 2.6396517753601074, - "61": 2.588855504989624, - "62": 2.634075164794922, - "63": 2.72806978225708, - "64": 2.6072847843170166, - "65": 2.684499740600586, - "66": 2.6587204933166504, - "67": 2.7239718437194824, - "68": 2.5890772342681885, - "69": 2.6363325119018555, - "70": 2.653327703475952, - "71": 2.7713708877563477, - "72": 2.61672306060791, - "73": 2.6043858528137207, - "74": 2.624462127685547, - "75": 2.6916556358337402, - "76": 2.6168556213378906, - "77": 2.6732101440429688, - "78": 2.5770888328552246, - "79": 2.65824031829834, - "80": 2.5798745155334473, - "81": 2.6446685791015625, - "82": 2.5722827911376953, - "83": 2.595245361328125, - "84": 2.584846019744873, - "85": 2.6358859539031982, - "86": 2.5438292026519775, - "87": 2.5701117515563965, - "88": 2.5886170864105225, - "89": 2.6534128189086914, - "90": 2.5985097885131836, - "91": 2.5908732414245605, - "92": 2.6019034385681152, - "93": 2.5985682010650635, - "94": 2.554375648498535, - "95": 2.5828542709350586, - "96": 2.591590404510498, - "97": 2.619809150695801, - "98": 2.5770506858825684, - "99": 2.5957045555114746, - "100": 2.574801445007324, - "101": 2.597097396850586, - "102": 2.5735721588134766, - "103": 2.620408535003662, - "104": 2.5321273803710938, - "105": 2.5520882606506348, - "106": 2.5506420135498047, - "107": 2.63661527633667 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 0.0945458, - 0.0926205, - 0.0897353, - 0.0791075, - 0.0884406, - 0.0943713, - 0.0960917, - 0.0955947, - 0.090436, - 0.0919714, - 0.0931946, - 0.0914016, - 0.088739, - 0.0923286, - 0.095343, - 0.0930867, - 0.086373, - 0.0903048, - 0.0946868, - 0.0937699, - 0.091233, - 0.0914753, - 0.0922255, - 0.0931661, - 0.0931273, - 0.0927062, - 0.0928764, - 0.0940617, - 0.0944637, - 0.0942134, - 0.0935046, - 0.0939029, - 0.0954051, - 0.0944732, - 0.0918589, - 0.0927904, - 0.0944674, - 0.0940506, - 0.0929547, - 0.0939519, - 0.0955205, - 0.0951108, - 0.0926211, - 0.0925517, - 0.0944586, - 0.0947183, - 0.0938159, - 0.0939469, - 0.0921511, - 0.0936614, - 0.0965244, - 0.0955893, - 0.0920262, - 0.0939497 - ], - "train_epoch_time": 4.839537858963013, - "train_loss": 2.53486723133618, - "train_score": 0.25927636303552765, - "val_loss": 2.5695702014095336, - "val_score": 0.25444442485421725 - }, - { - "epoch": 2, - "grad_norm": 1.6176575422286987, - "learning_rate": 0.1, - "model_norm": 87.46025085449219, - "step_logs": { - "grad_norm": { - "108": 1.3904390335083008, - "109": 1.5651144981384277, - "110": 1.8946231603622437, - "111": 1.9631491899490356, - "112": 2.0436851978302, - "113": 1.9051377773284912, - "114": 1.6877081394195557, - "115": 1.7305121421813965, - "116": 1.7928662300109863, - "117": 1.7490925788879395, - "118": 1.9426746368408203, - "119": 1.9647597074508667, - "120": 1.7538739442825317, - "121": 1.7286008596420288, - "122": 1.7098926305770874, - "123": 1.7366149425506592, - "124": 1.7650038003921509, - "125": 1.6990960836410522, - "126": 1.5044797658920288, - "127": 1.4832885265350342, - "128": 1.6062524318695068, - "129": 1.4627171754837036, - "130": 1.320543885231018, - "131": 1.363011360168457, - "132": 1.5314080715179443, - "133": 1.6076903343200684, - "134": 1.5600913763046265, - "135": 1.6323246955871582, - "136": 1.8003880977630615, - "137": 1.782508373260498, - "138": 1.7450324296951294, - "139": 1.6873091459274292, - "140": 1.5233711004257202, - "141": 1.6176234483718872, - "142": 1.7869263887405396, - "143": 1.7575515508651733, - "144": 1.6200042963027954, - "145": 1.588592529296875, - "146": 1.48301362991333, - "147": 1.372484803199768, - "148": 1.4073752164840698, - "149": 1.5164399147033691, - "150": 1.606526494026184, - "151": 1.5409373044967651, - "152": 1.6272214651107788, - "153": 1.6304783821105957, - "154": 1.5988147258758545, - "155": 1.5830514430999756, - "156": 1.49552583694458, - "157": 1.4493581056594849, - "158": 1.743516445159912, - "159": 1.7905282974243164, - "160": 1.6307268142700195, - "161": 1.6176575422286987 - }, - "loss": { - "108": 2.539473533630371, - "109": 2.5351099967956543, - "110": 2.5705795288085938, - "111": 2.613246440887451, - "112": 2.556335926055908, - "113": 2.5967390537261963, - "114": 2.546572685241699, - "115": 2.5725255012512207, - "116": 2.581244945526123, - "117": 2.5693461894989014, - "118": 2.565413475036621, - "119": 2.603050708770752, - "120": 2.553065299987793, - "121": 2.5703039169311523, - "122": 2.535938024520874, - "123": 2.574498414993286, - "124": 2.5499210357666016, - "125": 2.5706706047058105, - "126": 2.546192169189453, - "127": 2.5553641319274902, - "128": 2.531917095184326, - "129": 2.5682835578918457, - "130": 2.5035579204559326, - "131": 2.531642436981201, - "132": 2.5284790992736816, - "133": 2.5431647300720215, - "134": 2.545612335205078, - "135": 2.5161311626434326, - "136": 2.537336826324463, - "137": 2.5615382194519043, - "138": 2.5330071449279785, - "139": 2.5430996417999268, - "140": 2.5300376415252686, - "141": 2.5412092208862305, - "142": 2.5488288402557373, - "143": 2.566938877105713, - "144": 2.5280909538269043, - "145": 2.5661096572875977, - "146": 2.533201217651367, - "147": 2.521578311920166, - "148": 2.5038695335388184, - "149": 2.5267040729522705, - "150": 2.513009548187256, - "151": 2.5295653343200684, - "152": 2.5126407146453857, - "153": 2.5354621410369873, - "154": 2.5191593170166016, - "155": 2.5349979400634766, - "156": 2.5124080181121826, - "157": 2.51773738861084, - "158": 2.5108580589294434, - "159": 2.560356616973877, - "160": 2.509505033493042, - "161": 2.5461692810058594 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 0.096333, - 0.0953913, - 0.0934736, - 0.0931325, - 0.0924478, - 0.0934678, - 0.0947037, - 0.0944997, - 0.0941386, - 0.094381, - 0.0931485, - 0.0930969, - 0.094318, - 0.0945067, - 0.0945496, - 0.094467, - 0.0942432, - 0.0946834, - 0.0957444, - 0.0958727, - 0.095152, - 0.0960012, - 0.0966345, - 0.0964607, - 0.095568, - 0.0951641, - 0.0954376, - 0.0949715, - 0.0939961, - 0.0941602, - 0.0943299, - 0.0946992, - 0.0956149, - 0.0951036, - 0.0941054, - 0.0943246, - 0.0950656, - 0.0953132, - 0.0958396, - 0.0963993, - 0.0961952, - 0.0956475, - 0.0951157, - 0.0955169, - 0.0949947, - 0.0950186, - 0.0951714, - 0.0952899, - 0.0957386, - 0.0959954, - 0.0942921, - 0.0941081, - 0.0949682, - 0.0951124 - ], - "train_epoch_time": 4.839670896530151, - "train_loss": 2.51940996534, - "train_score": 0.24754976687506589, - "val_loss": 2.560662880830732, - "val_score": 0.24263149395533737 - }, - { - "epoch": 3, - "grad_norm": 1.4900684356689453, - "learning_rate": 0.1, - "model_norm": 87.47274017333984, - "step_logs": { - "grad_norm": { - "162": 1.598097324371338, - "163": 1.525174617767334, - "164": 1.6116458177566528, - "165": 1.6965845823287964, - "166": 1.6861199140548706, - "167": 1.5659401416778564, - "168": 1.4150835275650024, - "169": 1.340960144996643, - "170": 1.3616056442260742, - "171": 1.38835608959198, - "172": 1.3906810283660889, - "173": 1.3606905937194824, - "174": 1.4589627981185913, - "175": 1.536238670349121, - "176": 1.693096399307251, - "177": 1.6456454992294312, - "178": 1.5588834285736084, - "179": 1.5264568328857422, - "180": 1.4997258186340332, - "181": 1.420569896697998, - "182": 1.3450911045074463, - "183": 1.334816575050354, - "184": 1.4066290855407715, - "185": 1.4533334970474243, - "186": 1.52266263961792, - "187": 1.6736797094345093, - "188": 1.664509654045105, - "189": 1.6850454807281494, - "190": 1.5338938236236572, - "191": 1.357615351676941, - "192": 1.2970796823501587, - "193": 1.2680583000183105, - "194": 1.1891568899154663, - "195": 1.2309643030166626, - "196": 1.3531808853149414, - "197": 1.4047720432281494, - "198": 1.4748094081878662, - "199": 1.4856345653533936, - "200": 1.4025931358337402, - "201": 1.3785326480865479, - "202": 1.4687319993972778, - "203": 1.4839376211166382, - "204": 1.5141972303390503, - "205": 1.5908265113830566, - "206": 1.474566102027893, - "207": 1.4316205978393555, - "208": 1.5760974884033203, - "209": 1.6366726160049438, - "210": 1.7205854654312134, - "211": 1.639279842376709, - "212": 1.4056683778762817, - "213": 1.4341869354248047, - "214": 1.4404417276382446, - "215": 1.4900684356689453 - }, - "loss": { - "162": 2.525996208190918, - "163": 2.519139289855957, - "164": 2.5197999477386475, - "165": 2.5566163063049316, - "166": 2.517716407775879, - "167": 2.5122735500335693, - "168": 2.5209250450134277, - "169": 2.5298314094543457, - "170": 2.5019795894622803, - "171": 2.515258550643921, - "172": 2.482245683670044, - "173": 2.5298373699188232, - "174": 2.502087116241455, - "175": 2.5351507663726807, - "176": 2.502654790878296, - "177": 2.552912473678589, - "178": 2.510620594024658, - "179": 2.5179388523101807, - "180": 2.514465808868408, - "181": 2.525336742401123, - "182": 2.501659870147705, - "183": 2.504927635192871, - "184": 2.503681182861328, - "185": 2.5129146575927734, - "186": 2.4964075088500977, - "187": 2.522034168243408, - "188": 2.5041115283966064, - "189": 2.5342724323272705, - "190": 2.5131492614746094, - "191": 2.489699363708496, - "192": 2.4914677143096924, - "193": 2.487210512161255, - "194": 2.4756979942321777, - "195": 2.499216318130493, - "196": 2.4841837882995605, - "197": 2.4931082725524902, - "198": 2.500441074371338, - "199": 2.513913154602051, - "200": 2.4952261447906494, - "201": 2.4986376762390137, - "202": 2.4906091690063477, - "203": 2.498720407485962, - "204": 2.476853609085083, - "205": 2.5053882598876953, - "206": 2.474944591522217, - "207": 2.5047898292541504, - "208": 2.491452693939209, - "209": 2.503051280975342, - "210": 2.5131893157958984, - "211": 2.5290403366088867, - "212": 2.478907585144043, - "213": 2.4828474521636963, - "214": 2.4856977462768555, - "215": 2.4963371753692627 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 0.095188, - 0.0955868, - 0.0950986, - 0.0946707, - 0.0946557, - 0.0953467, - 0.09618, - 0.096568, - 0.0964274, - 0.0963097, - 0.0962504, - 0.0964699, - 0.09592, - 0.0955524, - 0.0945832, - 0.0949631, - 0.0953837, - 0.0955777, - 0.095719, - 0.096158, - 0.0965101, - 0.0965657, - 0.0961988, - 0.0959668, - 0.0955624, - 0.0947387, - 0.0947579, - 0.0946952, - 0.0955283, - 0.0964306, - 0.0967339, - 0.0968687, - 0.0972234, - 0.0970577, - 0.0964455, - 0.096193, - 0.0958319, - 0.0957948, - 0.0962074, - 0.0963365, - 0.0958491, - 0.0957796, - 0.0955763, - 0.0951922, - 0.0957921, - 0.0960696, - 0.0952515, - 0.0949209, - 0.0944378, - 0.0949553, - 0.0961673, - 0.0960226, - 0.0959936, - 0.0957422 - ], - "train_epoch_time": 4.839991569519043, - "train_loss": 2.497765347673698, - "train_score": 0.25919453903626505, - "val_loss": 2.547653200432025, - "val_score": 0.25313486673801827 - }, - { - "epoch": 4, - "grad_norm": 1.4493815898895264, - "learning_rate": 0.1, - "model_norm": 87.48473358154297, - "step_logs": { - "grad_norm": { - "216": 1.5207141637802124, - "217": 1.4831293821334839, - "218": 1.548401951789856, - "219": 1.4990978240966797, - "220": 1.658570408821106, - "221": 1.7461599111557007, - "222": 1.7208502292633057, - "223": 1.4546067714691162, - "224": 1.3064782619476318, - "225": 1.4333893060684204, - "226": 1.8529034852981567, - "227": 1.876185655593872, - "228": 1.4407918453216553, - "229": 1.469433069229126, - "230": 1.4858814477920532, - "231": 1.4008532762527466, - "232": 1.3361468315124512, - "233": 1.3507158756256104, - "234": 1.3724346160888672, - "235": 1.3959327936172485, - "236": 1.5130374431610107, - "237": 1.386102318763733, - "238": 1.3643090724945068, - "239": 1.2458302974700928, - "240": 1.2737303972244263, - "241": 1.3065357208251953, - "242": 1.3514872789382935, - "243": 1.368172526359558, - "244": 1.4374116659164429, - "245": 1.5546752214431763, - "246": 1.527165174484253, - "247": 1.428321123123169, - "248": 1.4025307893753052, - "249": 1.4584320783615112, - "250": 1.633088231086731, - "251": 1.6576405763626099, - "252": 1.3490997552871704, - "253": 1.2448540925979614, - "254": 1.2974003553390503, - "255": 1.337821125984192, - "256": 1.4290159940719604, - "257": 1.5032222270965576, - "258": 1.4525079727172852, - "259": 1.4253003597259521, - "260": 1.4548730850219727, - "261": 1.4741255044937134, - "262": 1.469092607498169, - "263": 1.3693534135818481, - "264": 1.367107629776001, - "265": 1.367874264717102, - "266": 1.3529359102249146, - "267": 1.3481435775756836, - "268": 1.4297680854797363, - "269": 1.4493815898895264 - }, - "loss": { - "216": 2.5005550384521484, - "217": 2.486264705657959, - "218": 2.504789352416992, - "219": 2.5058765411376953, - "220": 2.49249267578125, - "221": 2.526111125946045, - "222": 2.4997825622558594, - "223": 2.516441822052002, - "224": 2.492928981781006, - "225": 2.4929747581481934, - "226": 2.508449077606201, - "227": 2.526498317718506, - "228": 2.498701810836792, - "229": 2.4934847354888916, - "230": 2.499743938446045, - "231": 2.475876569747925, - "232": 2.487055540084839, - "233": 2.4964940547943115, - "234": 2.4587535858154297, - "235": 2.485304832458496, - "236": 2.4922542572021484, - "237": 2.4945578575134277, - "238": 2.4734184741973877, - "239": 2.496652603149414, - "240": 2.4686832427978516, - "241": 2.47457218170166, - "242": 2.4911718368530273, - "243": 2.4890055656433105, - "244": 2.481781482696533, - "245": 2.5030057430267334, - "246": 2.467012882232666, - "247": 2.4917941093444824, - "248": 2.4658961296081543, - "249": 2.4795992374420166, - "250": 2.4773926734924316, - "251": 2.496230125427246, - "252": 2.4655301570892334, - "253": 2.466240406036377, - "254": 2.4619393348693848, - "255": 2.485466480255127, - "256": 2.4605367183685303, - "257": 2.4908394813537598, - "258": 2.4808998107910156, - "259": 2.4751126766204834, - "260": 2.475172519683838, - "261": 2.4922049045562744, - "262": 2.485206127166748, - "263": 2.4630305767059326, - "264": 2.4606151580810547, - "265": 2.474362373352051, - "266": 2.4694809913635254, - "267": 2.4504642486572266, - "268": 2.4557876586914062, - "269": 2.4724512100219727 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 0.0955803, - 0.0957637, - 0.0954327, - 0.0957084, - 0.0947703, - 0.0943084, - 0.0944081, - 0.0959655, - 0.0966899, - 0.0960423, - 0.0935949, - 0.0934874, - 0.0960118, - 0.0958499, - 0.0957706, - 0.0961881, - 0.0965352, - 0.0964748, - 0.096311, - 0.0962276, - 0.0956089, - 0.0962919, - 0.0963738, - 0.0969854, - 0.0968186, - 0.0966659, - 0.0964637, - 0.0963759, - 0.0960037, - 0.0953942, - 0.0954865, - 0.0960674, - 0.0961644, - 0.0958874, - 0.0948923, - 0.0947833, - 0.0964404, - 0.096954, - 0.0966945, - 0.0965247, - 0.0960157, - 0.0956609, - 0.0959214, - 0.096058, - 0.0958996, - 0.0958224, - 0.0958385, - 0.096333, - 0.0963412, - 0.0963568, - 0.0964263, - 0.0964242, - 0.0960042, - 0.0959249 - ], - "train_epoch_time": 4.839824199676514, - "train_loss": 2.4656357970436127, - "train_score": 0.27237042672664913, - "val_loss": 2.513917387010857, - "val_score": 0.26844144644200735 - }, - { - "epoch": 5, - "grad_norm": 1.6595345735549927, - "learning_rate": 0.1, - "model_norm": 87.49935913085938, - "step_logs": { - "grad_norm": { - "270": 1.3668824434280396, - "271": 1.4833353757858276, - "272": 1.4340763092041016, - "273": 1.3308309316635132, - "274": 1.565377116203308, - "275": 1.5734424591064453, - "276": 1.3325433731079102, - "277": 1.2003980875015259, - "278": 1.1305818557739258, - "279": 1.1756256818771362, - "280": 1.2804739475250244, - "281": 1.681312918663025, - "282": 1.7337186336517334, - "283": 1.660081386566162, - "284": 1.6165975332260132, - "285": 1.638747215270996, - "286": 1.7784677743911743, - "287": 1.625307321548462, - "288": 1.4921928644180298, - "289": 1.213752031326294, - "290": 1.1863501071929932, - "291": 1.3044021129608154, - "292": 1.3800262212753296, - "293": 1.5274262428283691, - "294": 1.5698614120483398, - "295": 1.3262922763824463, - "296": 1.1837708950042725, - "297": 1.2319022417068481, - "298": 1.3980967998504639, - "299": 1.4667826890945435, - "300": 1.5803438425064087, - "301": 1.622593879699707, - "302": 1.4403170347213745, - "303": 1.2268928289413452, - "304": 1.438249111175537, - "305": 1.7772581577301025, - "306": 1.8345683813095093, - "307": 1.652672529220581, - "308": 1.6674407720565796, - "309": 1.690192699432373, - "310": 1.7400174140930176, - "311": 1.5748227834701538, - "312": 1.4441848993301392, - "313": 1.6087982654571533, - "314": 1.8951371908187866, - "315": 1.578356146812439, - "316": 1.3722999095916748, - "317": 1.3869855403900146, - "318": 1.3587955236434937, - "319": 1.3930338621139526, - "320": 1.4942517280578613, - "321": 1.5070290565490723, - "322": 1.7591979503631592, - "323": 1.6595345735549927 - }, - "loss": { - "270": 2.4582738876342773, - "271": 2.4680166244506836, - "272": 2.4740357398986816, - "273": 2.4807915687561035, - "274": 2.469590663909912, - "275": 2.4804065227508545, - "276": 2.463829517364502, - "277": 2.467266082763672, - "278": 2.4413952827453613, - "279": 2.4508750438690186, - "280": 2.450596332550049, - "281": 2.4681344032287598, - "282": 2.5184402465820312, - "283": 2.4834656715393066, - "284": 2.470820188522339, - "285": 2.4697139263153076, - "286": 2.489670753479004, - "287": 2.459014415740967, - "288": 2.46140193939209, - "289": 2.43727970123291, - "290": 2.455320358276367, - "291": 2.4319419860839844, - "292": 2.421353340148926, - "293": 2.4347152709960938, - "294": 2.462702751159668, - "295": 2.456295967102051, - "296": 2.43804931640625, - "297": 2.421156406402588, - "298": 2.468761444091797, - "299": 2.4393293857574463, - "300": 2.4439313411712646, - "301": 2.451199531555176, - "302": 2.4367570877075195, - "303": 2.447511672973633, - "304": 2.4106650352478027, - "305": 2.4494874477386475, - "306": 2.4421024322509766, - "307": 2.475184917449951, - "308": 2.4579808712005615, - "309": 2.439710855484009, - "310": 2.4429256916046143, - "311": 2.4578781127929688, - "312": 2.416517972946167, - "313": 2.4365131855010986, - "314": 2.442086696624756, - "315": 2.453134298324585, - "316": 2.4242966175079346, - "317": 2.428849220275879, - "318": 2.427621841430664, - "319": 2.419334888458252, - "320": 2.4207675457000732, - "321": 2.4236717224121094, - "322": 2.436763286590576, - "323": 2.4410738945007324 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 0.096339, - 0.0957326, - 0.0960095, - 0.0965534, - 0.0952733, - 0.0952467, - 0.0965219, - 0.0971627, - 0.097449, - 0.0972577, - 0.096763, - 0.0945836, - 0.0943685, - 0.0947432, - 0.0949771, - 0.0948435, - 0.0940273, - 0.0949025, - 0.0956726, - 0.0970665, - 0.0972138, - 0.0966201, - 0.0962161, - 0.0954279, - 0.0952348, - 0.0965431, - 0.0972064, - 0.0969612, - 0.0961919, - 0.0957763, - 0.0951388, - 0.0949033, - 0.0959171, - 0.0970166, - 0.0958861, - 0.093943, - 0.0935534, - 0.0947711, - 0.094647, - 0.0944691, - 0.0941648, - 0.0951972, - 0.0958631, - 0.0949565, - 0.0931503, - 0.0951678, - 0.0962612, - 0.0961907, - 0.0963366, - 0.0961442, - 0.0955916, - 0.0955244, - 0.094029, - 0.0946601 - ], - "train_epoch_time": 4.839600563049316, - "train_loss": 2.406728516897478, - "train_score": 0.29554900462008277, - "val_loss": 2.463793483311492, - "val_score": 0.2832188221671962 - }, - { - "epoch": 6, - "grad_norm": 1.5348509550094604, - "learning_rate": 0.1, - "model_norm": 87.51490783691406, - "step_logs": { - "grad_norm": { - "324": 1.5204179286956787, - "325": 1.475035309791565, - "326": 1.524332880973816, - "327": 1.5180416107177734, - "328": 1.5215808153152466, - "329": 1.5502034425735474, - "330": 1.4987207651138306, - "331": 1.413873314857483, - "332": 1.4547117948532104, - "333": 1.5063804388046265, - "334": 1.4725627899169922, - "335": 1.4559494256973267, - "336": 1.4342765808105469, - "337": 1.4491323232650757, - "338": 1.4730716943740845, - "339": 1.505921483039856, - "340": 1.585554838180542, - "341": 1.4796698093414307, - "342": 1.3740936517715454, - "343": 1.4039183855056763, - "344": 1.491923451423645, - "345": 1.477642297744751, - "346": 1.431505560874939, - "347": 1.4288281202316284, - "348": 1.5642822980880737, - "349": 1.5053727626800537, - "350": 1.5076608657836914, - "351": 1.4128873348236084, - "352": 1.454950213432312, - "353": 1.487735390663147, - "354": 1.5694859027862549, - "355": 1.595165491104126, - "356": 1.593092679977417, - "357": 1.603345513343811, - "358": 1.4710255861282349, - "359": 1.508878231048584, - "360": 1.6141520738601685, - "361": 1.6575957536697388, - "362": 1.5069397687911987, - "363": 1.4971407651901245, - "364": 1.5202319622039795, - "365": 1.4801868200302124, - "366": 1.5096116065979004, - "367": 1.4805908203125, - "368": 1.4322978258132935, - "369": 1.4010130167007446, - "370": 1.3681437969207764, - "371": 1.3306944370269775, - "372": 1.3294380903244019, - "373": 1.613341212272644, - "374": 1.9168848991394043, - "375": 2.206291913986206, - "376": 1.8911081552505493, - "377": 1.5348509550094604 - }, - "loss": { - "324": 2.4078660011291504, - "325": 2.4143972396850586, - "326": 2.4101521968841553, - "327": 2.4316511154174805, - "328": 2.3986916542053223, - "329": 2.42291259765625, - "330": 2.409928798675537, - "331": 2.42907452583313, - "332": 2.386866569519043, - "333": 2.410284996032715, - "334": 2.3986454010009766, - "335": 2.3882853984832764, - "336": 2.386298656463623, - "337": 2.3972373008728027, - "338": 2.3928301334381104, - "339": 2.419609546661377, - "340": 2.3975677490234375, - "341": 2.4221301078796387, - "342": 2.3810880184173584, - "343": 2.393470048904419, - "344": 2.398592233657837, - "345": 2.407968282699585, - "346": 2.405282974243164, - "347": 2.38531494140625, - "348": 2.3865158557891846, - "349": 2.4027490615844727, - "350": 2.3786582946777344, - "351": 2.4028944969177246, - "352": 2.3642420768737793, - "353": 2.3801767826080322, - "354": 2.4026432037353516, - "355": 2.3965651988983154, - "356": 2.368370532989502, - "357": 2.4082484245300293, - "358": 2.3832249641418457, - "359": 2.402068614959717, - "360": 2.384875535964966, - "361": 2.4020755290985107, - "362": 2.3848319053649902, - "363": 2.394064426422119, - "364": 2.364562511444092, - "365": 2.3977670669555664, - "366": 2.377725124359131, - "367": 2.3808393478393555, - "368": 2.3722097873687744, - "369": 2.3877806663513184, - "370": 2.3650426864624023, - "371": 2.3636343479156494, - "372": 2.3600821495056152, - "373": 2.3635053634643555, - "374": 2.3816933631896973, - "375": 2.448493480682373, - "376": 2.439532518386841, - "377": 2.3987228870391846 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 0.0954196, - 0.0956885, - 0.0954013, - 0.0954759, - 0.0953962, - 0.0952751, - 0.0955473, - 0.0960478, - 0.0957552, - 0.0955043, - 0.0956753, - 0.0957507, - 0.0958678, - 0.0958038, - 0.0956624, - 0.0955235, - 0.0950184, - 0.0956758, - 0.0961864, - 0.0960454, - 0.0955659, - 0.0956629, - 0.0959142, - 0.0958962, - 0.0951233, - 0.0954966, - 0.0954399, - 0.0960118, - 0.095715, - 0.095557, - 0.0951238, - 0.0949589, - 0.0949145, - 0.0949331, - 0.0956573, - 0.0954754, - 0.0948204, - 0.0945901, - 0.0954553, - 0.0955281, - 0.0953407, - 0.0956309, - 0.0954269, - 0.0955989, - 0.0958552, - 0.0960521, - 0.0961934, - 0.0963894, - 0.0963908, - 0.094781, - 0.0928385, - 0.0909585, - 0.0931707, - 0.0953194 - ], - "train_epoch_time": 4.839699745178223, - "train_loss": 2.3774107679232976, - "train_score": 0.2882577115100631, - "val_loss": 2.4424089657315977, - "val_score": 0.27687284719930316 - }, - { - "epoch": 7, - "grad_norm": 1.401110053062439, - "learning_rate": 0.1, - "model_norm": 87.53044891357422, - "step_logs": { - "grad_norm": { - "378": 1.5238767862319946, - "379": 1.5971482992172241, - "380": 1.5722147226333618, - "381": 1.4197204113006592, - "382": 1.5557610988616943, - "383": 1.5613852739334106, - "384": 1.4575474262237549, - "385": 1.3418397903442383, - "386": 1.3571803569793701, - "387": 1.2810419797897339, - "388": 1.4499485492706299, - "389": 1.4625566005706787, - "390": 1.4669203758239746, - "391": 1.4268333911895752, - "392": 1.4231853485107422, - "393": 1.4888262748718262, - "394": 1.5273696184158325, - "395": 1.4988442659378052, - "396": 1.532185673713684, - "397": 1.6217350959777832, - "398": 1.5982587337493896, - "399": 1.559019923210144, - "400": 1.4788126945495605, - "401": 1.3030979633331299, - "402": 1.3073769807815552, - "403": 1.4971922636032104, - "404": 1.3816274404525757, - "405": 1.3173834085464478, - "406": 1.3925862312316895, - "407": 1.3758513927459717, - "408": 1.3136552572250366, - "409": 1.3630038499832153, - "410": 1.5459775924682617, - "411": 1.6935113668441772, - "412": 1.7326537370681763, - "413": 1.6022096872329712, - "414": 1.4557265043258667, - "415": 1.253517985343933, - "416": 1.0845822095870972, - "417": 1.2073835134506226, - "418": 1.3733117580413818, - "419": 1.694291591644287, - "420": 1.775342583656311, - "421": 1.5067014694213867, - "422": 1.3718053102493286, - "423": 1.4382741451263428, - "424": 1.4876372814178467, - "425": 1.4780538082122803, - "426": 1.5483167171478271, - "427": 1.597402811050415, - "428": 1.4621162414550781, - "429": 1.4470614194869995, - "430": 1.4325398206710815, - "431": 1.401110053062439 - }, - "loss": { - "378": 2.3827290534973145, - "379": 2.374232053756714, - "380": 2.3913118839263916, - "381": 2.3736956119537354, - "382": 2.397104263305664, - "383": 2.3694705963134766, - "384": 2.3797054290771484, - "385": 2.3538684844970703, - "386": 2.3716933727264404, - "387": 2.366917610168457, - "388": 2.360433578491211, - "389": 2.3798165321350098, - "390": 2.3500328063964844, - "391": 2.3685245513916016, - "392": 2.376107931137085, - "393": 2.3573555946350098, - "394": 2.387089252471924, - "395": 2.3546972274780273, - "396": 2.3808541297912598, - "397": 2.355680227279663, - "398": 2.4044454097747803, - "399": 2.378586769104004, - "400": 2.3672568798065186, - "401": 2.3572709560394287, - "402": 2.344491481781006, - "403": 2.3278932571411133, - "404": 2.355198860168457, - "405": 2.3390891551971436, - "406": 2.3366992473602295, - "407": 2.3351902961730957, - "408": 2.3390352725982666, - "409": 2.343677043914795, - "410": 2.36517596244812, - "411": 2.369183301925659, - "412": 2.3717522621154785, - "413": 2.365341901779175, - "414": 2.3643646240234375, - "415": 2.3370747566223145, - "416": 2.320711374282837, - "417": 2.323245048522949, - "418": 2.331423759460449, - "419": 2.347210168838501, - "420": 2.3825855255126953, - "421": 2.3443851470947266, - "422": 2.354055404663086, - "423": 2.3360114097595215, - "424": 2.3482937812805176, - "425": 2.3313653469085693, - "426": 2.365417957305908, - "427": 2.3623032569885254, - "428": 2.347109079360962, - "429": 2.344539165496826, - "430": 2.3299427032470703, - "431": 2.3400559425354004 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 0.0953534, - 0.0949019, - 0.0950856, - 0.0959272, - 0.0951941, - 0.0951072, - 0.0957271, - 0.0963163, - 0.096262, - 0.0966495, - 0.0957366, - 0.0956991, - 0.0956221, - 0.0958794, - 0.0959121, - 0.0955097, - 0.0953412, - 0.0954469, - 0.0953015, - 0.0947128, - 0.094956, - 0.0951391, - 0.0955849, - 0.0965235, - 0.096483, - 0.0954065, - 0.0961053, - 0.0964229, - 0.0960157, - 0.0961048, - 0.0964423, - 0.0961877, - 0.0951904, - 0.0942928, - 0.0940479, - 0.0948529, - 0.0957108, - 0.0967476, - 0.0975283, - 0.0969581, - 0.0961125, - 0.0942374, - 0.093796, - 0.0953819, - 0.0961566, - 0.09576, - 0.0955, - 0.0955244, - 0.095177, - 0.0948759, - 0.0956443, - 0.0957252, - 0.0957819, - 0.0959743 - ], - "train_epoch_time": 4.839696645736694, - "train_loss": 2.341660093953312, - "train_score": 0.30697296437095195, - "val_loss": 2.4016604872439675, - "val_score": 0.2907756896845646 - }, - { - "epoch": 8, - "grad_norm": 1.2714836597442627, - "learning_rate": 0.1, - "model_norm": 87.5464096069336, - "step_logs": { - "grad_norm": { - "432": 1.3507100343704224, - "433": 1.3469970226287842, - "434": 1.6031737327575684, - "435": 1.6274340152740479, - "436": 1.561687707901001, - "437": 1.4427980184555054, - "438": 1.3580224514007568, - "439": 1.2925304174423218, - "440": 1.2868298292160034, - "441": 1.2257243394851685, - "442": 1.2014594078063965, - "443": 1.2329719066619873, - "444": 1.2841089963912964, - "445": 1.3346121311187744, - "446": 1.3809603452682495, - "447": 1.3230992555618286, - "448": 1.4006848335266113, - "449": 1.6567952632904053, - "450": 1.7916064262390137, - "451": 1.8325364589691162, - "452": 2.1758933067321777, - "453": 1.8093030452728271, - "454": 1.6594533920288086, - "455": 1.5646799802780151, - "456": 1.4760572910308838, - "457": 1.4579269886016846, - "458": 1.5229400396347046, - "459": 1.7276986837387085, - "460": 1.8374427556991577, - "461": 1.677546501159668, - "462": 1.6864469051361084, - "463": 1.5372563600540161, - "464": 1.479378581047058, - "465": 1.3358135223388672, - "466": 1.3171627521514893, - "467": 1.2679260969161987, - "468": 1.2107120752334595, - "469": 1.259350061416626, - "470": 1.3164029121398926, - "471": 1.4148625135421753, - "472": 1.5306681394577026, - "473": 1.6661498546600342, - "474": 1.5870965719223022, - "475": 1.4273427724838257, - "476": 1.3611736297607422, - "477": 1.4415041208267212, - "478": 1.5811823606491089, - "479": 1.6015987396240234, - "480": 1.605069875717163, - "481": 1.63101065158844, - "482": 1.6104609966278076, - "483": 1.3696142435073853, - "484": 1.2675796747207642, - "485": 1.2714836597442627 - }, - "loss": { - "432": 2.3457813262939453, - "433": 2.3262274265289307, - "434": 2.3438339233398438, - "435": 2.348609685897827, - "436": 2.3572678565979004, - "437": 2.3277664184570312, - "438": 2.3194456100463867, - "439": 2.3039093017578125, - "440": 2.3299155235290527, - "441": 2.3128645420074463, - "442": 2.313471794128418, - "443": 2.3038887977600098, - "444": 2.3367955684661865, - "445": 2.2974181175231934, - "446": 2.313506603240967, - "447": 2.3145275115966797, - "448": 2.3211581707000732, - "449": 2.34653902053833, - "450": 2.3669865131378174, - "451": 2.356989860534668, - "452": 2.3780863285064697, - "453": 2.3617024421691895, - "454": 2.3487439155578613, - "455": 2.33650803565979, - "456": 2.323024034500122, - "457": 2.3067703247070312, - "458": 2.3273158073425293, - "459": 2.335646152496338, - "460": 2.352998733520508, - "461": 2.3597097396850586, - "462": 2.346374034881592, - "463": 2.331382989883423, - "464": 2.311239719390869, - "465": 2.328648567199707, - "466": 2.3075618743896484, - "467": 2.3089752197265625, - "468": 2.3243298530578613, - "469": 2.312345027923584, - "470": 2.313727617263794, - "471": 2.341294288635254, - "472": 2.337461471557617, - "473": 2.3379034996032715, - "474": 2.3516769409179688, - "475": 2.3145625591278076, - "476": 2.3131296634674072, - "477": 2.333616256713867, - "478": 2.3307414054870605, - "479": 2.3015410900115967, - "480": 2.340886354446411, - "481": 2.316655158996582, - "482": 2.3463101387023926, - "483": 2.342644214630127, - "484": 2.3145699501037598, - "485": 2.322575092315674 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 0.0962568, - 0.0962465, - 0.0948022, - 0.0946624, - 0.0950814, - 0.09572, - 0.0961764, - 0.0965012, - 0.0965683, - 0.0968543, - 0.0969746, - 0.0968061, - 0.096592, - 0.0962682, - 0.0960416, - 0.0963561, - 0.0959452, - 0.0944742, - 0.0936501, - 0.0933499, - 0.0909468, - 0.0935187, - 0.0944624, - 0.0950217, - 0.0955206, - 0.0955957, - 0.0952536, - 0.0939938, - 0.093306, - 0.0943726, - 0.0942857, - 0.0951763, - 0.0954794, - 0.09631, - 0.096377, - 0.0966358, - 0.0969432, - 0.0966844, - 0.0963903, - 0.0959002, - 0.0952275, - 0.0943957, - 0.0949167, - 0.0957845, - 0.0961493, - 0.0957376, - 0.0949096, - 0.0947215, - 0.0947843, - 0.0945703, - 0.0947625, - 0.0961504, - 0.0966455, - 0.0966367 - ], - "train_epoch_time": 4.839443922042847, - "train_loss": 2.3077662786076023, - "train_score": 0.31930259139500183, - "val_loss": 2.3614000239410577, - "val_score": 0.3028666761657811 - }, - { - "epoch": 9, - "grad_norm": 1.468070387840271, - "learning_rate": 0.1, - "model_norm": 87.5635757446289, - "step_logs": { - "grad_norm": { - "486": 1.3421059846878052, - "487": 1.5410593748092651, - "488": 1.6748708486557007, - "489": 1.628523349761963, - "490": 1.585997223854065, - "491": 1.5138816833496094, - "492": 1.4490344524383545, - "493": 1.5533103942871094, - "494": 1.6497552394866943, - "495": 1.6603319644927979, - "496": 1.6849194765090942, - "497": 1.6944397687911987, - "498": 1.6065895557403564, - "499": 1.6698063611984253, - "500": 1.6528459787368774, - "501": 1.3871923685073853, - "502": 1.2377095222473145, - "503": 1.3084832429885864, - "504": 1.5100948810577393, - "505": 1.624717116355896, - "506": 1.595871925354004, - "507": 1.3191120624542236, - "508": 1.2762936353683472, - "509": 1.3916237354278564, - "510": 1.4488248825073242, - "511": 1.4460535049438477, - "512": 1.5222827196121216, - "513": 1.4093161821365356, - "514": 1.328999400138855, - "515": 1.2952208518981934, - "516": 1.382433533668518, - "517": 1.4634521007537842, - "518": 1.6107661724090576, - "519": 1.571621298789978, - "520": 1.5592676401138306, - "521": 1.6456609964370728, - "522": 1.6851128339767456, - "523": 1.666506290435791, - "524": 1.5055006742477417, - "525": 1.4418994188308716, - "526": 1.3877149820327759, - "527": 1.3972231149673462, - "528": 1.399568796157837, - "529": 1.3434568643569946, - "530": 1.3118952512741089, - "531": 1.3026467561721802, - "532": 1.411010980606079, - "533": 1.413327693939209, - "534": 1.3558568954467773, - "535": 1.2408007383346558, - "536": 1.1616082191467285, - "537": 1.165681004524231, - "538": 1.3844928741455078, - "539": 1.468070387840271 - }, - "loss": { - "486": 2.3154544830322266, - "487": 2.3100531101226807, - "488": 2.343207836151123, - "489": 2.333224296569824, - "490": 2.3237388134002686, - "491": 2.3195114135742188, - "492": 2.321809768676758, - "493": 2.326282501220703, - "494": 2.32395601272583, - "495": 2.31506609916687, - "496": 2.330772876739502, - "497": 2.3274450302124023, - "498": 2.316956043243408, - "499": 2.3209829330444336, - "500": 2.3378071784973145, - "501": 2.302727222442627, - "502": 2.2809460163116455, - "503": 2.300558567047119, - "504": 2.3031578063964844, - "505": 2.3129429817199707, - "506": 2.3283753395080566, - "507": 2.2861313819885254, - "508": 2.2917866706848145, - "509": 2.29498028755188, - "510": 2.2883615493774414, - "511": 2.3073911666870117, - "512": 2.297729969024658, - "513": 2.3086354732513428, - "514": 2.3066201210021973, - "515": 2.2947752475738525, - "516": 2.2801079750061035, - "517": 2.2713325023651123, - "518": 2.3072500228881836, - "519": 2.301114082336426, - "520": 2.2939233779907227, - "521": 2.2943079471588135, - "522": 2.2893242835998535, - "523": 2.298323631286621, - "524": 2.3116707801818848, - "525": 2.3140971660614014, - "526": 2.288111448287964, - "527": 2.2836203575134277, - "528": 2.3037538528442383, - "529": 2.293445587158203, - "530": 2.3005857467651367, - "531": 2.2702369689941406, - "532": 2.2708632946014404, - "533": 2.270054340362549, - "534": 2.2814066410064697, - "535": 2.2814159393310547, - "536": 2.276214599609375, - "537": 2.2891175746917725, - "538": 2.2826390266418457, - "539": 2.2869319915771484 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 0.096256, - 0.095111, - 0.0943523, - 0.0946223, - 0.0948655, - 0.0952922, - 0.0956739, - 0.0950698, - 0.0944682, - 0.0943807, - 0.0942595, - 0.0941904, - 0.0947238, - 0.0943337, - 0.0944797, - 0.0959893, - 0.096751, - 0.0964124, - 0.0952829, - 0.0946017, - 0.0948145, - 0.0963338, - 0.0965681, - 0.0959516, - 0.0956147, - 0.0956652, - 0.0951994, - 0.0958758, - 0.0963125, - 0.0964736, - 0.0959777, - 0.0954977, - 0.0946767, - 0.0949064, - 0.0949672, - 0.0944269, - 0.0941603, - 0.0943024, - 0.0953267, - 0.0957009, - 0.0959618, - 0.0959008, - 0.0959221, - 0.0962141, - 0.0963944, - 0.0963974, - 0.0958004, - 0.0957857, - 0.0961271, - 0.0967359, - 0.0971213, - 0.0971176, - 0.0959705, - 0.0955 - ], - "train_epoch_time": 4.840027809143066, - "train_loss": 2.276319026810196, - "train_score": 0.3284500538105602, - "val_loss": 2.3410527150474927, - "val_score": 0.31462130432971996 - }, - { - "epoch": 10, - "grad_norm": 1.7734715938568115, - "learning_rate": 0.1, - "model_norm": 87.58110046386719, - "step_logs": { - "grad_norm": { - "540": 1.3855658769607544, - "541": 1.3699744939804077, - "542": 1.5931072235107422, - "543": 1.757995843887329, - "544": 2.2765657901763916, - "545": 2.043253183364868, - "546": 2.891676425933838, - "547": 2.6693274974823, - "548": 2.9742679595947266, - "549": 2.0444986820220947, - "550": 4.142254829406738, - "551": 2.08813214302063, - "552": 2.854707956314087, - "553": 1.805907130241394, - "554": 2.043489933013916, - "555": 1.8520097732543945, - "556": 1.669492483139038, - "557": 1.502358317375183, - "558": 1.2358567714691162, - "559": 1.2964330911636353, - "560": 1.280662178993225, - "561": 1.2584073543548584, - "562": 1.2414016723632812, - "563": 1.2102011442184448, - "564": 1.237259864807129, - "565": 1.2826976776123047, - "566": 1.3327841758728027, - "567": 1.3804187774658203, - "568": 1.5000768899917603, - "569": 1.5731277465820312, - "570": 1.5431245565414429, - "571": 1.4646320343017578, - "572": 1.3928265571594238, - "573": 1.3860430717468262, - "574": 1.4848979711532593, - "575": 1.4542369842529297, - "576": 1.4103590250015259, - "577": 1.4892785549163818, - "578": 1.4899351596832275, - "579": 1.3885960578918457, - "580": 1.2535943984985352, - "581": 1.1883963346481323, - "582": 1.166892170906067, - "583": 1.1567583084106445, - "584": 1.0995984077453613, - "585": 1.2123874425888062, - "586": 1.283324122428894, - "587": 1.437957525253296, - "588": 1.3781952857971191, - "589": 1.2244057655334473, - "590": 1.1463439464569092, - "591": 1.3378748893737793, - "592": 1.6402194499969482, - "593": 1.7734715938568115 - }, - "loss": { - "540": 2.286694049835205, - "541": 2.2679097652435303, - "542": 2.2743968963623047, - "543": 2.3044919967651367, - "544": 2.3004279136657715, - "545": 2.3538618087768555, - "546": 2.3604750633239746, - "547": 2.371980905532837, - "548": 2.3429999351501465, - "549": 2.3255491256713867, - "550": 2.4214377403259277, - "551": 2.3725318908691406, - "552": 2.339686393737793, - "553": 2.3350212574005127, - "554": 2.3282346725463867, - "555": 2.320429563522339, - "556": 2.3140487670898438, - "557": 2.299532413482666, - "558": 2.24477481842041, - "559": 2.264753818511963, - "560": 2.2702298164367676, - "561": 2.2654409408569336, - "562": 2.268239736557007, - "563": 2.2638630867004395, - "564": 2.2540338039398193, - "565": 2.253985643386841, - "566": 2.2684760093688965, - "567": 2.2714717388153076, - "568": 2.2746026515960693, - "569": 2.276184320449829, - "570": 2.279322624206543, - "571": 2.2680301666259766, - "572": 2.2682130336761475, - "573": 2.246882438659668, - "574": 2.2479441165924072, - "575": 2.264397144317627, - "576": 2.2778656482696533, - "577": 2.2779603004455566, - "578": 2.256025791168213, - "579": 2.281663179397583, - "580": 2.2363791465759277, - "581": 2.2800374031066895, - "582": 2.237774610519409, - "583": 2.2278127670288086, - "584": 2.2422704696655273, - "585": 2.2439115047454834, - "586": 2.2704453468322754, - "587": 2.2174038887023926, - "588": 2.227482318878174, - "589": 2.2529821395874023, - "590": 2.2411389350891113, - "591": 2.2315709590911865, - "592": 2.2589306831359863, - "593": 2.2638437747955322 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 0.0959714, - 0.0960266, - 0.0947154, - 0.0937159, - 0.0898757, - 0.0918542, - 0.084953, - 0.0869416, - 0.0841198, - 0.091754, - 0.0738389, - 0.0915842, - 0.0851676, - 0.0934724, - 0.0917702, - 0.0931179, - 0.0943197, - 0.0953219, - 0.0967099, - 0.0964221, - 0.0965137, - 0.0966229, - 0.0967145, - 0.0968667, - 0.0967158, - 0.0964787, - 0.0962323, - 0.0959743, - 0.0952867, - 0.0948441, - 0.0950358, - 0.0954845, - 0.095899, - 0.0959002, - 0.095325, - 0.0955386, - 0.0958165, - 0.0953577, - 0.0953108, - 0.0959459, - 0.0966058, - 0.096996, - 0.0970474, - 0.0970844, - 0.0973746, - 0.0968286, - 0.0965001, - 0.0955452, - 0.0959108, - 0.0967801, - 0.0971517, - 0.0961442, - 0.0943798, - 0.0935046 - ], - "train_epoch_time": 4.839099884033203, - "train_loss": 2.2669085151666892, - "train_score": 0.32776856176993424, - "val_loss": 2.320276185100306, - "val_score": 0.31862173572473496 - }, - { - "epoch": 11, - "grad_norm": 1.5081864595413208, - "learning_rate": 0.1, - "model_norm": 87.59841918945312, - "step_logs": { - "grad_norm": { - "594": 1.6634496450424194, - "595": 1.619261384010315, - "596": 1.6665533781051636, - "597": 1.7308590412139893, - "598": 1.7479417324066162, - "599": 1.6680026054382324, - "600": 1.5196536779403687, - "601": 1.3531891107559204, - "602": 1.228021264076233, - "603": 1.3028531074523926, - "604": 1.2772009372711182, - "605": 1.1832040548324585, - "606": 1.2076756954193115, - "607": 1.4356175661087036, - "608": 1.580788254737854, - "609": 1.5609453916549683, - "610": 1.387777328491211, - "611": 1.3024975061416626, - "612": 1.23203706741333, - "613": 1.25006902217865, - "614": 1.2798360586166382, - "615": 1.4011482000350952, - "616": 1.327973484992981, - "617": 1.3721123933792114, - "618": 1.467121958732605, - "619": 1.5741314888000488, - "620": 1.461991310119629, - "621": 1.1995444297790527, - "622": 0.9937043786048889, - "623": 1.0829651355743408, - "624": 1.353103518486023, - "625": 1.424338936805725, - "626": 1.445154070854187, - "627": 1.4511809349060059, - "628": 1.35936439037323, - "629": 1.2982182502746582, - "630": 1.1945499181747437, - "631": 1.144911289215088, - "632": 1.2437695264816284, - "633": 1.581872582435608, - "634": 1.6544958353042603, - "635": 1.4697126150131226, - "636": 1.4244686365127563, - "637": 1.4537724256515503, - "638": 1.4634218215942383, - "639": 1.4124324321746826, - "640": 1.3953214883804321, - "641": 1.372344732284546, - "642": 1.384291172027588, - "643": 1.3955241441726685, - "644": 1.4376662969589233, - "645": 1.3689109086990356, - "646": 1.4320026636123657, - "647": 1.5081864595413208 - }, - "loss": { - "594": 2.2812418937683105, - "595": 2.2569174766540527, - "596": 2.2505056858062744, - "597": 2.2952072620391846, - "598": 2.2475779056549072, - "599": 2.2572970390319824, - "600": 2.264566421508789, - "601": 2.246230125427246, - "602": 2.244356155395508, - "603": 2.2344260215759277, - "604": 2.2239279747009277, - "605": 2.225612163543701, - "606": 2.243206262588501, - "607": 2.238051652908325, - "608": 2.250304937362671, - "609": 2.2463605403900146, - "610": 2.2210922241210938, - "611": 2.242798328399658, - "612": 2.217909812927246, - "613": 2.2258362770080566, - "614": 2.2233424186706543, - "615": 2.2090396881103516, - "616": 2.243584632873535, - "617": 2.212984561920166, - "618": 2.2275378704071045, - "619": 2.2615268230438232, - "620": 2.23750638961792, - "621": 2.2293214797973633, - "622": 2.218424081802368, - "623": 2.2181177139282227, - "624": 2.227792263031006, - "625": 2.226933717727661, - "626": 2.2210681438446045, - "627": 2.2508931159973145, - "628": 2.22235107421875, - "629": 2.2165441513061523, - "630": 2.200906991958618, - "631": 2.210453510284424, - "632": 2.208587646484375, - "633": 2.228396415710449, - "634": 2.2476110458374023, - "635": 2.2339937686920166, - "636": 2.2140073776245117, - "637": 2.219712734222412, - "638": 2.227468490600586, - "639": 2.229064464569092, - "640": 2.2381138801574707, - "641": 2.227644443511963, - "642": 2.2352161407470703, - "643": 2.197134494781494, - "644": 2.2155399322509766, - "645": 2.2270984649658203, - "646": 2.2132015228271484, - "647": 2.2277960777282715 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 0.094282, - 0.0945101, - 0.094188, - 0.0938735, - 0.0936357, - 0.094195, - 0.0951485, - 0.0960836, - 0.0967496, - 0.0963406, - 0.0964623, - 0.0969508, - 0.0968515, - 0.0955982, - 0.0947397, - 0.0948557, - 0.0958446, - 0.0963557, - 0.0966913, - 0.0966087, - 0.0964473, - 0.0957455, - 0.0962185, - 0.0959198, - 0.0953912, - 0.0948062, - 0.0954414, - 0.0968737, - 0.0978229, - 0.0974244, - 0.096053, - 0.0956434, - 0.0955096, - 0.0955311, - 0.0960085, - 0.0963374, - 0.0968601, - 0.0971203, - 0.0966164, - 0.0946839, - 0.0942601, - 0.0953884, - 0.0956183, - 0.0954557, - 0.0954132, - 0.0957168, - 0.0958318, - 0.0959443, - 0.0958897, - 0.0957562, - 0.0955434, - 0.0959628, - 0.0955724, - 0.0951429 - ], - "train_epoch_time": 4.839369297027588, - "train_loss": 2.2234227234527064, - "train_score": 0.34212921445646793, - "val_loss": 2.303082422602464, - "val_score": 0.3244564433322298 - }, - { - "epoch": 12, - "grad_norm": 1.0092487335205078, - "learning_rate": 0.1, - "model_norm": 87.61363220214844, - "step_logs": { - "grad_norm": { - "648": 1.5923807621002197, - "649": 1.4241580963134766, - "650": 1.380035400390625, - "651": 1.3389984369277954, - "652": 1.377500295639038, - "653": 1.415576457977295, - "654": 1.3388686180114746, - "655": 1.251926302909851, - "656": 1.3301371335983276, - "657": 1.42147696018219, - "658": 1.340073585510254, - "659": 1.2445037364959717, - "660": 1.1605985164642334, - "661": 1.02606999874115, - "662": 1.0356559753417969, - "663": 1.0613659620285034, - "664": 1.2157117128372192, - "665": 1.2903574705123901, - "666": 1.2684019804000854, - "667": 1.2516309022903442, - "668": 1.2222882509231567, - "669": 1.1940480470657349, - "670": 1.1551145315170288, - "671": 1.160732626914978, - "672": 1.1861381530761719, - "673": 1.07341730594635, - "674": 1.0808777809143066, - "675": 1.1456283330917358, - "676": 1.2443689107894897, - "677": 1.315761685371399, - "678": 1.279556155204773, - "679": 1.284864068031311, - "680": 1.2411867380142212, - "681": 1.1781779527664185, - "682": 1.0981937646865845, - "683": 1.0269218683242798, - "684": 0.9819169640541077, - "685": 0.9329985976219177, - "686": 0.9042189717292786, - "687": 1.0235469341278076, - "688": 1.0084115266799927, - "689": 0.9927877187728882, - "690": 0.995064377784729, - "691": 1.0337671041488647, - "692": 1.0011017322540283, - "693": 0.9842377305030823, - "694": 0.8962352275848389, - "695": 0.8242790699005127, - "696": 0.844769299030304, - "697": 0.8928088545799255, - "698": 0.9202558994293213, - "699": 0.8769364953041077, - "700": 0.9151707887649536, - "701": 1.0092487335205078 - }, - "loss": { - "648": 2.240493059158325, - "649": 2.242720603942871, - "650": 2.2161097526550293, - "651": 2.2214736938476562, - "652": 2.208665370941162, - "653": 2.20377254486084, - "654": 2.2091503143310547, - "655": 2.2058968544006348, - "656": 2.2032339572906494, - "657": 2.2002978324890137, - "658": 2.2271029949188232, - "659": 2.183682441711426, - "660": 2.1819634437561035, - "661": 2.184974431991577, - "662": 2.181095600128174, - "663": 2.1750638484954834, - "664": 2.191298007965088, - "665": 2.210533618927002, - "666": 2.1872763633728027, - "667": 2.208545684814453, - "668": 2.1755294799804688, - "669": 2.187657356262207, - "670": 2.1991405487060547, - "671": 2.1800997257232666, - "672": 2.157829761505127, - "673": 2.1626474857330322, - "674": 2.175178050994873, - "675": 2.174002170562744, - "676": 2.167180299758911, - "677": 2.186819553375244, - "678": 2.1927907466888428, - "679": 2.150287628173828, - "680": 2.161402940750122, - "681": 2.182260274887085, - "682": 2.2066738605499268, - "683": 2.160205364227295, - "684": 2.160770893096924, - "685": 2.163475275039673, - "686": 2.1615588665008545, - "687": 2.171290874481201, - "688": 2.163928985595703, - "689": 2.157331943511963, - "690": 2.13478422164917, - "691": 2.1531004905700684, - "692": 2.163862466812134, - "693": 2.1611220836639404, - "694": 2.160479784011841, - "695": 2.1799941062927246, - "696": 2.155761241912842, - "697": 2.1345863342285156, - "698": 2.1607890129089355, - "699": 2.1383707523345947, - "700": 2.1592798233032227, - "701": 2.1568689346313477 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 0.0946443, - 0.0951086, - 0.0947446, - 0.0944089, - 0.0936091, - 0.0928237, - 0.0926756, - 0.0925337, - 0.0915667, - 0.090519, - 0.0904072, - 0.0902274, - 0.0900198, - 0.0899814, - 0.0893509, - 0.0886575, - 0.0874652, - 0.0865874, - 0.086075, - 0.085592, - 0.0850933, - 0.0846366, - 0.084212, - 0.0835863, - 0.0828834, - 0.0827047, - 0.0820997, - 0.0812886, - 0.0803419, - 0.0795147, - 0.079076, - 0.0784296, - 0.0780158, - 0.0776628, - 0.0773424, - 0.0769231, - 0.0764511, - 0.075981, - 0.075451, - 0.0745602, - 0.0739992, - 0.0734383, - 0.0728231, - 0.0721417, - 0.0716312, - 0.0710718, - 0.0706643, - 0.070211, - 0.0695602, - 0.0688563, - 0.0682117, - 0.0676846, - 0.0670187, - 0.0662317 - ], - "train_epoch_time": 4.839498043060303, - "train_loss": 2.151258555238524, - "train_score": 0.3657482963124854, - "val_loss": 2.2226866170267834, - "val_score": 0.34736473925078093 - }, - { - "epoch": 13, - "grad_norm": 0.7189573049545288, - "learning_rate": 0.06666666666666668, - "model_norm": 87.6231689453125, - "step_logs": { - "grad_norm": { - "702": 1.0570307970046997, - "703": 0.9686579704284668, - "704": 0.9260565638542175, - "705": 0.8788942098617554, - "706": 0.9802384376525879, - "707": 1.0322258472442627, - "708": 0.9872215390205383, - "709": 0.950273871421814, - "710": 0.9797484278678894, - "711": 1.0293323993682861, - "712": 0.957111120223999, - "713": 1.0317163467407227, - "714": 1.1893115043640137, - "715": 1.1397957801818848, - "716": 0.9731723666191101, - "717": 0.9121640920639038, - "718": 0.8991885781288147, - "719": 0.8456013798713684, - "720": 0.8160295486450195, - "721": 0.774200439453125, - "722": 0.7564179301261902, - "723": 0.6807435750961304, - "724": 0.680670976638794, - "725": 0.7307837009429932, - "726": 0.7699684500694275, - "727": 0.7705569267272949, - "728": 0.7109479904174805, - "729": 0.703255295753479, - "730": 0.8134910464286804, - "731": 0.7388712763786316, - "732": 0.7542753219604492, - "733": 0.7013862729072571, - "734": 0.7000980973243713, - "735": 0.7420291900634766, - "736": 0.6798243522644043, - "737": 0.6653239727020264, - "738": 0.6890830993652344, - "739": 0.6895594000816345, - "740": 0.7794739007949829, - "741": 0.869519829750061, - "742": 0.7716234922409058, - "743": 0.6729789972305298, - "744": 0.746738612651825, - "745": 0.720713198184967, - "746": 0.7666577100753784, - "747": 0.7970916032791138, - "748": 0.7638863921165466, - "749": 0.8001917004585266, - "750": 0.7377480268478394, - "751": 0.7141143083572388, - "752": 0.7794457674026489, - "753": 0.797374963760376, - "754": 0.7309591770172119, - "755": 0.7189573049545288 - }, - "loss": { - "702": 2.147902488708496, - "703": 2.143873691558838, - "704": 2.146829128265381, - "705": 2.138852119445801, - "706": 2.1529009342193604, - "707": 2.130876064300537, - "708": 2.13419771194458, - "709": 2.122539520263672, - "710": 2.1355533599853516, - "711": 2.1318490505218506, - "712": 2.115267753601074, - "713": 2.1396265029907227, - "714": 2.1589672565460205, - "715": 2.1399919986724854, - "716": 2.141000986099243, - "717": 2.1422321796417236, - "718": 2.1371140480041504, - "719": 2.152160167694092, - "720": 2.1370935440063477, - "721": 2.123014450073242, - "722": 2.1325559616088867, - "723": 2.1401262283325195, - "724": 2.1458566188812256, - "725": 2.120518207550049, - "726": 2.114623546600342, - "727": 2.1158604621887207, - "728": 2.1445391178131104, - "729": 2.116807460784912, - "730": 2.1336774826049805, - "731": 2.138178825378418, - "732": 2.132021427154541, - "733": 2.121915340423584, - "734": 2.134155750274658, - "735": 2.139400005340576, - "736": 2.110149383544922, - "737": 2.126380205154419, - "738": 2.1152446269989014, - "739": 2.1401095390319824, - "740": 2.112760066986084, - "741": 2.111144542694092, - "742": 2.1270744800567627, - "743": 2.1188018321990967, - "744": 2.125664234161377, - "745": 2.112760066986084, - "746": 2.1231720447540283, - "747": 2.110657215118408, - "748": 2.1248974800109863, - "749": 2.1240930557250977, - "750": 2.1287522315979004, - "751": 2.1012113094329834, - "752": 2.1266796588897705, - "753": 2.1155855655670166, - "754": 2.1015982627868652, - "755": 2.107381582260132 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 0.0655304, - 0.0651083, - 0.064588, - 0.064065, - 0.0632908, - 0.0625854, - 0.0620706, - 0.0615297, - 0.0608837, - 0.060197, - 0.0597117, - 0.0589978, - 0.0581308, - 0.0576164, - 0.0572895, - 0.0567745, - 0.0561865, - 0.0556535, - 0.0550788, - 0.0545155, - 0.053928, - 0.0533933, - 0.0527839, - 0.0521247, - 0.0514777, - 0.0508689, - 0.0503171, - 0.0497096, - 0.0490074, - 0.0484637, - 0.0478408, - 0.0472704, - 0.0466622, - 0.0460221, - 0.0454517, - 0.0448514, - 0.0442238, - 0.0436148, - 0.0429431, - 0.0422702, - 0.0417302, - 0.041176, - 0.0405242, - 0.0399265, - 0.0392913, - 0.0386626, - 0.0380715, - 0.0374418, - 0.0368625, - 0.0362595, - 0.0356203, - 0.0350001, - 0.0344167, - 0.0338098 - ], - "train_epoch_time": 4.840372085571289, - "train_loss": 2.1106098669673314, - "train_score": 0.37544050399679024, - "val_loss": 2.1894215573947, - "val_score": 0.3537510763148079 - }, - { - "epoch": 14, - "grad_norm": 0.6357522010803223, - "learning_rate": 0.03333333333333334, - "model_norm": 87.6263427734375, - "step_logs": { - "grad_norm": { - "756": 0.7133497595787048, - "757": 0.6459547281265259, - "758": 0.7396783828735352, - "759": 0.6765357851982117, - "760": 0.7040878534317017, - "761": 0.7207474708557129, - "762": 0.6932805776596069, - "763": 0.6927089691162109, - "764": 0.7085327506065369, - "765": 0.6976022124290466, - "766": 0.719830334186554, - "767": 0.700122594833374, - "768": 0.6259276270866394, - "769": 0.6756634712219238, - "770": 0.6431193947792053, - "771": 0.6716915369033813, - "772": 0.6533259153366089, - "773": 0.6473256945610046, - "774": 0.6735907196998596, - "775": 0.6981966495513916, - "776": 0.7065365314483643, - "777": 0.635334312915802, - "778": 0.6984379291534424, - "779": 0.6666753888130188, - "780": 0.6803655624389648, - "781": 0.6826092600822449, - "782": 0.662533164024353, - "783": 0.6707337498664856, - "784": 0.6607283353805542, - "785": 0.6483444571495056, - "786": 0.7350357174873352, - "787": 0.6047378182411194, - "788": 0.6693747639656067, - "789": 0.6867666244506836, - "790": 0.6892209649085999, - "791": 0.7210102677345276, - "792": 0.6311251521110535, - "793": 0.632441520690918, - "794": 0.6815193891525269, - "795": 0.6448667645454407, - "796": 0.6682194471359253, - "797": 0.6627563834190369, - "798": 0.6034180521965027, - "799": 0.6212030053138733, - "800": 0.6476743817329407, - "801": 0.6449694633483887, - "802": 0.6639724373817444, - "803": 0.6212056875228882, - "804": 0.6693212389945984, - "805": 0.6246767640113831, - "806": 0.6212588548660278, - "807": 0.6836837530136108, - "808": 0.6258649826049805, - "809": 0.6357522010803223 - }, - "loss": { - "756": 2.0989303588867188, - "757": 2.103926658630371, - "758": 2.120269775390625, - "759": 2.1012096405029297, - "760": 2.079378366470337, - "761": 2.121035099029541, - "762": 2.118846893310547, - "763": 2.1070432662963867, - "764": 2.1176600456237793, - "765": 2.0965466499328613, - "766": 2.115776538848877, - "767": 2.127931833267212, - "768": 2.1014981269836426, - "769": 2.083810806274414, - "770": 2.1252427101135254, - "771": 2.1187243461608887, - "772": 2.120224952697754, - "773": 2.101409435272217, - "774": 2.110621929168701, - "775": 2.10959529876709, - "776": 2.1071276664733887, - "777": 2.112398624420166, - "778": 2.0872693061828613, - "779": 2.108560562133789, - "780": 2.0847537517547607, - "781": 2.0925183296203613, - "782": 2.112980365753174, - "783": 2.1092426776885986, - "784": 2.1252660751342773, - "785": 2.0984835624694824, - "786": 2.101619243621826, - "787": 2.1003973484039307, - "788": 2.1352851390838623, - "789": 2.102938652038574, - "790": 2.100388526916504, - "791": 2.1139585971832275, - "792": 2.109753370285034, - "793": 2.099212884902954, - "794": 2.0989956855773926, - "795": 2.116032361984253, - "796": 2.0871188640594482, - "797": 2.1162281036376953, - "798": 2.102323055267334, - "799": 2.0910918712615967, - "800": 2.1121444702148438, - "801": 2.1059091091156006, - "802": 2.0819010734558105, - "803": 2.0846638679504395, - "804": 2.0995519161224365, - "805": 2.098008155822754, - "806": 2.0978121757507324, - "807": 2.107565402984619, - "808": 2.0821597576141357, - "809": 2.0969314575195312 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 0.0331992, - 0.0326103, - 0.0319664, - 0.0313739, - 0.0307511, - 0.0301353, - 0.0295304, - 0.0289168, - 0.0282998, - 0.0276885, - 0.0270705, - 0.0264623, - 0.0258634, - 0.0252387, - 0.0246322, - 0.0240125, - 0.0234015, - 0.0227876, - 0.0221693, - 0.0215511, - 0.0209356, - 0.0203308, - 0.0197076, - 0.0190973, - 0.0184805, - 0.0178656, - 0.017253, - 0.0166371, - 0.016023, - 0.0154083, - 0.0147867, - 0.01418, - 0.0135609, - 0.0129441, - 0.0123285, - 0.0117115, - 0.0110995, - 0.0104833, - 0.00986576, - 0.00925084, - 0.00863399, - 0.00801801, - 0.00740266, - 0.00678587, - 0.00616906, - 0.00555251, - 0.00493569, - 0.00431926, - 0.00370224, - 0.00308553, - 0.00246858, - 0.00185147, - 0.00123442, - 0.000617247 - ], - "train_epoch_time": 4.842392444610596, - "train_loss": 2.099011701490821, - "train_score": 0.37810930776459245, - "val_loss": 2.1825786824888804, - "val_score": 0.3543071898404 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:23:40.336308", - "final_model_norm": 87.6263427734375, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:21:58.820654", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 2.7014002799987793, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.42987823486328, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 9.138082504272461, - "3": 5.208849906921387, - "4": 3.945364475250244, - "5": 3.708055019378662, - "6": 6.398460865020752, - "7": 16.664430618286133, - "8": 10.699326515197754, - "9": 5.082013130187988, - "10": 4.138754844665527, - "11": 4.742880344390869, - "12": 7.428307056427002, - "13": 6.356147766113281, - "14": 13.46587085723877, - "15": 2.582383871078491, - "16": 3.6345374584198, - "17": 56.4307861328125, - "18": 3.446540117263794, - "19": 5.41510534286499, - "20": 5.19655179977417, - "21": 8.504855155944824, - "22": 3.3510003089904785, - "23": 48.02460479736328, - "24": 4.429149150848389, - "25": 6.106604099273682, - "26": 4.597447395324707, - "27": 3.7286911010742188, - "28": 4.605628967285156, - "29": 3.0380897521972656, - "30": 5.887907028198242, - "31": 2.898404121398926, - "32": 3.8490893840789795, - "33": 2.5686304569244385, - "34": 4.083178520202637, - "35": 4.642446994781494, - "36": 4.855794429779053, - "37": 2.6388304233551025, - "38": 3.178757905960083, - "39": 3.434710741043091, - "40": 3.305979013442993, - "41": 3.1685378551483154, - "42": 2.536409378051758, - "43": 2.8201422691345215, - "44": 2.798776865005493, - "45": 2.7589268684387207, - "46": 2.7870497703552246, - "47": 2.6443395614624023, - "48": 2.780182123184204, - "49": 2.5823721885681152, - "50": 1.9752962589263916, - "51": 2.321368455886841, - "52": 3.3420050144195557, - "53": 2.7014002799987793 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.9599180221557617, - "3": 3.7437262535095215, - "4": 3.5936968326568604, - "5": 3.540834903717041, - "6": 3.4970359802246094, - "7": 3.7328438758850098, - "8": 4.164545059204102, - "9": 3.5597293376922607, - "10": 3.3924622535705566, - "11": 3.439350128173828, - "12": 3.373410701751709, - "13": 3.5318424701690674, - "14": 3.3596181869506836, - "15": 3.2627570629119873, - "16": 3.201747179031372, - "17": 3.860020637512207, - "18": 3.183465003967285, - "19": 3.1514649391174316, - "20": 3.1900830268859863, - "21": 3.2659409046173096, - "22": 2.9773917198181152, - "23": 4.214755535125732, - "24": 2.9944722652435303, - "25": 3.2068495750427246, - "26": 3.121352195739746, - "27": 2.948528289794922, - "28": 2.9719438552856445, - "29": 2.868144989013672, - "30": 2.9442081451416016, - "31": 2.866201877593994, - "32": 2.8892412185668945, - "33": 2.8153820037841797, - "34": 2.853144407272339, - "35": 2.9503626823425293, - "36": 3.1002845764160156, - "37": 2.7691664695739746, - "38": 2.807326316833496, - "39": 2.853483200073242, - "40": 2.817741870880127, - "41": 2.907651901245117, - "42": 2.7306299209594727, - "43": 2.809755563735962, - "44": 2.7774600982666016, - "45": 2.7897768020629883, - "46": 2.745231866836548, - "47": 2.8025074005126953, - "48": 2.712313413619995, - "49": 2.822211503982544, - "50": 2.6848225593566895, - "51": 2.700636863708496, - "52": 2.7530298233032227, - "53": 2.882366418838501 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "step_size_list": [ - 1e-11, - 0.00178359, - 0.00383813, - 0.00587232, - 0.00786375, - 0.00980954, - 0.0112124, - 0.00920591, - 0.0131158, - 0.0168967, - 0.0190387, - 0.0205234, - 0.0200621, - 0.0226342, - 0.0159487, - 0.0291076, - 0.0300184, - 0.00226295, - 0.0337343, - 0.0322913, - 0.0342085, - 0.028667, - 0.0406289, - 0.00338587, - 0.0414784, - 0.0387384, - 0.0442154, - 0.0479015, - 0.0466727, - 0.0530492, - 0.0443379, - 0.0568359, - 0.0549786, - 0.0612622, - 0.0567291, - 0.055747, - 0.0565241, - 0.067701, - 0.0668558, - 0.0671696, - 0.0692549, - 0.0718312, - 0.0764364, - 0.0766684, - 0.0782855, - 0.0801583, - 0.0814046, - 0.0841337, - 0.0844485, - 0.0878307, - 0.0932259, - 0.0909283, - 0.0831359, - 0.0887635 - ], - "train_epoch_time": 4.842854261398315, - "train_loss": 2.6698590013867984, - "train_score": 0.2439685707027293, - "val_loss": 2.700050770346798, - "val_score": 0.2380704654275892 - }, - { - "epoch": 1, - "grad_norm": 1.7721285820007324, - "learning_rate": 0.1, - "model_norm": 87.4465103149414, - "step_logs": { - "grad_norm": { - "54": 1.965592622756958, - "55": 2.045867919921875, - "56": 2.3545002937316895, - "57": 2.237187623977661, - "58": 1.8276700973510742, - "59": 1.9295403957366943, - "60": 2.125190019607544, - "61": 2.9536099433898926, - "62": 2.3407278060913086, - "63": 1.7702795267105103, - "64": 1.8679486513137817, - "65": 2.803359031677246, - "66": 2.3363051414489746, - "67": 1.460381269454956, - "68": 1.6297032833099365, - "69": 2.2212111949920654, - "70": 2.2825076580047607, - "71": 2.4100303649902344, - "72": 2.243866205215454, - "73": 1.718739628791809, - "74": 1.857200026512146, - "75": 2.276668071746826, - "76": 2.097562789916992, - "77": 1.6709188222885132, - "78": 1.8551037311553955, - "79": 2.5150721073150635, - "80": 2.029993772506714, - "81": 1.244290828704834, - "82": 1.4587386846542358, - "83": 2.289640426635742, - "84": 2.1908371448516846, - "85": 1.6843329668045044, - "86": 1.716336727142334, - "87": 2.178058385848999, - "88": 1.9921116828918457, - "89": 1.5284374952316284, - "90": 1.803568959236145, - "91": 2.30283784866333, - "92": 2.015841484069824, - "93": 1.4610092639923096, - "94": 1.5536742210388184, - "95": 1.9504945278167725, - "96": 2.265597343444824, - "97": 2.180148124694824, - "98": 1.6076425313949585, - "99": 1.3786959648132324, - "100": 1.5268253087997437, - "101": 2.073302745819092, - "102": 2.0648410320281982, - "103": 1.7442132234573364, - "104": 1.7094382047653198, - "105": 1.555678129196167, - "106": 1.550230860710144, - "107": 1.7721285820007324 - }, - "loss": { - "54": 2.662569522857666, - "55": 2.6708662509918213, - "56": 2.645061492919922, - "57": 2.718660354614258, - "58": 2.633521318435669, - "59": 2.6217260360717773, - "60": 2.653656482696533, - "61": 2.696911096572876, - "62": 2.7883663177490234, - "63": 2.622990131378174, - "64": 2.6221072673797607, - "65": 2.6512012481689453, - "66": 2.76615834236145, - "67": 2.582188129425049, - "68": 2.585904121398926, - "69": 2.6308398246765137, - "70": 2.7060368061065674, - "71": 2.647770881652832, - "72": 2.708874225616455, - "73": 2.6287479400634766, - "74": 2.6195075511932373, - "75": 2.6110892295837402, - "76": 2.6809592247009277, - "77": 2.590961456298828, - "78": 2.6345272064208984, - "79": 2.626781463623047, - "80": 2.7129602432250977, - "81": 2.5486814975738525, - "82": 2.5531840324401855, - "83": 2.6010518074035645, - "84": 2.6774182319641113, - "85": 2.584240674972534, - "86": 2.594029188156128, - "87": 2.5646281242370605, - "88": 2.6595239639282227, - "89": 2.5524184703826904, - "90": 2.587097406387329, - "91": 2.602904796600342, - "92": 2.6498537063598633, - "93": 2.553509473800659, - "94": 2.564358711242676, - "95": 2.5807104110717773, - "96": 2.6363778114318848, - "97": 2.6329097747802734, - "98": 2.5728330612182617, - "99": 2.562537908554077, - "100": 2.5657973289489746, - "101": 2.579831600189209, - "102": 2.6441688537597656, - "103": 2.5644607543945312, - "104": 2.5824155807495117, - "105": 2.552157402038574, - "106": 2.5535764694213867, - "107": 2.552483081817627 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "step_size_list": [ - 0.0932355, - 0.0927337, - 0.0905147, - 0.091571, - 0.0940362, - 0.0933702, - 0.0921576, - 0.086078, - 0.0910541, - 0.0943629, - 0.0937616, - 0.0870919, - 0.0910198, - 0.0960341, - 0.0951154, - 0.0914271, - 0.091219, - 0.0901159, - 0.0914968, - 0.0946801, - 0.093823, - 0.0909708, - 0.0924167, - 0.0948875, - 0.0938691, - 0.0892534, - 0.0929413, - 0.0970522, - 0.0959995, - 0.090845, - 0.0917739, - 0.0947966, - 0.094627, - 0.0915342, - 0.0930571, - 0.095624, - 0.0940851, - 0.090755, - 0.0928784, - 0.0959881, - 0.0955049, - 0.0931351, - 0.0911288, - 0.0917211, - 0.0952175, - 0.0964238, - 0.0956546, - 0.0923096, - 0.0925393, - 0.0944005, - 0.0946451, - 0.0954733, - 0.0955059, - 0.0942048 - ], - "train_epoch_time": 4.839518308639526, - "train_loss": 2.58684149985676, - "train_score": 0.22548870155681325, - "val_loss": 2.622239226451834, - "val_score": 0.2215843856437335 - }, - { - "epoch": 2, - "grad_norm": 1.5319663286209106, - "learning_rate": 0.1, - "model_norm": 87.46024322509766, - "step_logs": { - "grad_norm": { - "108": 1.7222405672073364, - "109": 1.7508620023727417, - "110": 1.6630184650421143, - "111": 1.5427573919296265, - "112": 1.613377571105957, - "113": 1.6383252143859863, - "114": 1.6512948274612427, - "115": 1.6589441299438477, - "116": 1.666428565979004, - "117": 1.7925798892974854, - "118": 1.8243111371994019, - "119": 1.7605955600738525, - "120": 1.8316779136657715, - "121": 1.771317958831787, - "122": 1.799514651298523, - "123": 1.9349422454833984, - "124": 1.7726531028747559, - "125": 1.4533581733703613, - "126": 1.5049976110458374, - "127": 1.6781474351882935, - "128": 1.6998845338821411, - "129": 1.590766191482544, - "130": 1.7025343179702759, - "131": 1.8275318145751953, - "132": 1.8172345161437988, - "133": 1.591578722000122, - "134": 1.588600516319275, - "135": 1.678156852722168, - "136": 1.7168233394622803, - "137": 1.606050968170166, - "138": 1.5711708068847656, - "139": 1.653882622718811, - "140": 1.618391513824463, - "141": 1.7055188417434692, - "142": 1.6396424770355225, - "143": 1.4647818803787231, - "144": 1.417738914489746, - "145": 1.555832028388977, - "146": 1.672027587890625, - "147": 1.7019164562225342, - "148": 1.7026259899139404, - "149": 1.9225009679794312, - "150": 1.7311534881591797, - "151": 1.4409128427505493, - "152": 1.4107351303100586, - "153": 1.4797552824020386, - "154": 1.5716218948364258, - "155": 1.4705257415771484, - "156": 1.36956787109375, - "157": 1.4272806644439697, - "158": 1.42551589012146, - "159": 1.589177131652832, - "160": 1.6364829540252686, - "161": 1.5319663286209106 - }, - "loss": { - "108": 2.5938098430633545, - "109": 2.5373518466949463, - "110": 2.5869998931884766, - "111": 2.5499298572540283, - "112": 2.5595407485961914, - "113": 2.51430606842041, - "114": 2.590421676635742, - "115": 2.5450260639190674, - "116": 2.580981731414795, - "117": 2.535814046859741, - "118": 2.5899620056152344, - "119": 2.5390784740448, - "120": 2.5932376384735107, - "121": 2.5510153770446777, - "122": 2.5700395107269287, - "123": 2.563209056854248, - "124": 2.615906000137329, - "125": 2.53306245803833, - "126": 2.522599697113037, - "127": 2.527998924255371, - "128": 2.578871250152588, - "129": 2.544523239135742, - "130": 2.5542256832122803, - "131": 2.5489180088043213, - "132": 2.5765740871429443, - "133": 2.5239710807800293, - "134": 2.5494332313537598, - "135": 2.5302553176879883, - "136": 2.556654214859009, - "137": 2.539095878601074, - "138": 2.545562267303467, - "139": 2.538005828857422, - "140": 2.542201042175293, - "141": 2.5334973335266113, - "142": 2.5553481578826904, - "143": 2.524021863937378, - "144": 2.528252601623535, - "145": 2.5096750259399414, - "146": 2.537395477294922, - "147": 2.5060198307037354, - "148": 2.5493154525756836, - "149": 2.52825927734375, - "150": 2.5793089866638184, - "151": 2.5261688232421875, - "152": 2.5134482383728027, - "153": 2.4945778846740723, - "154": 2.523097515106201, - "155": 2.526151657104492, - "156": 2.515076160430908, - "157": 2.529634475708008, - "158": 2.515226125717163, - "159": 2.523855686187744, - "160": 2.5593619346618652, - "161": 2.5114336013793945 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "step_size_list": [ - 0.0945916, - 0.0943033, - 0.094926, - 0.0955411, - 0.0951612, - 0.0949328, - 0.095, - 0.0948705, - 0.0948949, - 0.0940416, - 0.0939629, - 0.0942472, - 0.0939242, - 0.0942066, - 0.0940734, - 0.0931937, - 0.0943342, - 0.0959975, - 0.0957034, - 0.0947239, - 0.0946948, - 0.095263, - 0.0946305, - 0.0938513, - 0.0939775, - 0.0952217, - 0.095284, - 0.0947283, - 0.0945498, - 0.0951662, - 0.0953754, - 0.0948868, - 0.0951009, - 0.094571, - 0.0950025, - 0.095923, - 0.0961769, - 0.0953993, - 0.0947787, - 0.0945366, - 0.0946202, - 0.0931885, - 0.0945095, - 0.0960528, - 0.0961917, - 0.0957957, - 0.0953336, - 0.0958956, - 0.0964051, - 0.0961293, - 0.0961173, - 0.0952352, - 0.0950282, - 0.0955361 - ], - "train_epoch_time": 4.8396055698394775, - "train_loss": 2.531406187771725, - "train_score": 0.24703528509506023, - "val_loss": 2.5778532852124676, - "val_score": 0.24431777441816407 - }, - { - "epoch": 3, - "grad_norm": 1.6726247072219849, - "learning_rate": 0.1, - "model_norm": 87.47252655029297, - "step_logs": { - "grad_norm": { - "162": 1.6153781414031982, - "163": 1.673122525215149, - "164": 1.7028475999832153, - "165": 1.6289361715316772, - "166": 1.5314459800720215, - "167": 1.3829418420791626, - "168": 1.3458621501922607, - "169": 1.4614267349243164, - "170": 1.490400791168213, - "171": 1.575042963027954, - "172": 1.655622959136963, - "173": 1.4333730936050415, - "174": 1.4942058324813843, - "175": 1.755441427230835, - "176": 1.8549357652664185, - "177": 1.8647880554199219, - "178": 1.6314679384231567, - "179": 1.4605293273925781, - "180": 1.4982885122299194, - "181": 1.5453455448150635, - "182": 1.5823681354522705, - "183": 1.4793906211853027, - "184": 1.3944069147109985, - "185": 1.5388894081115723, - "186": 1.4895504713058472, - "187": 1.3919439315795898, - "188": 1.5177021026611328, - "189": 1.5192327499389648, - "190": 1.4143918752670288, - "191": 1.3188138008117676, - "192": 1.3198351860046387, - "193": 1.3574910163879395, - "194": 1.2956126928329468, - "195": 1.2524044513702393, - "196": 1.2873085737228394, - "197": 1.3792039155960083, - "198": 1.3747633695602417, - "199": 1.368151307106018, - "200": 1.3862868547439575, - "201": 1.536097526550293, - "202": 1.4076716899871826, - "203": 1.2324305772781372, - "204": 1.3698409795761108, - "205": 1.6835252046585083, - "206": 1.6933979988098145, - "207": 1.6048285961151123, - "208": 1.4945200681686401, - "209": 1.3977588415145874, - "210": 1.3661211729049683, - "211": 1.3046761751174927, - "212": 1.3673046827316284, - "213": 1.5285789966583252, - "214": 1.8321200609207153, - "215": 1.6726247072219849 - }, - "loss": { - "162": 2.5297117233276367, - "163": 2.5398120880126953, - "164": 2.5312044620513916, - "165": 2.5157523155212402, - "166": 2.53338885307312, - "167": 2.531099796295166, - "168": 2.4975533485412598, - "169": 2.490152597427368, - "170": 2.528290033340454, - "171": 2.500814914703369, - "172": 2.5340723991394043, - "173": 2.482034683227539, - "174": 2.527773857116699, - "175": 2.4900028705596924, - "176": 2.5368003845214844, - "177": 2.526069164276123, - "178": 2.542509078979492, - "179": 2.510859966278076, - "180": 2.500474214553833, - "181": 2.5281267166137695, - "182": 2.512002944946289, - "183": 2.5041253566741943, - "184": 2.505188465118408, - "185": 2.5058794021606445, - "186": 2.519810199737549, - "187": 2.5065155029296875, - "188": 2.5223541259765625, - "189": 2.510152816772461, - "190": 2.5078835487365723, - "191": 2.4967923164367676, - "192": 2.4992456436157227, - "193": 2.4970943927764893, - "194": 2.484459638595581, - "195": 2.4877758026123047, - "196": 2.4717559814453125, - "197": 2.4962782859802246, - "198": 2.4987924098968506, - "199": 2.4934144020080566, - "200": 2.4905261993408203, - "201": 2.486286163330078, - "202": 2.51426362991333, - "203": 2.4798288345336914, - "204": 2.4783501625061035, - "205": 2.5024008750915527, - "206": 2.535660982131958, - "207": 2.4954142570495605, - "208": 2.501339912414551, - "209": 2.503080368041992, - "210": 2.5021119117736816, - "211": 2.4640250205993652, - "212": 2.4925060272216797, - "213": 2.490812301635742, - "214": 2.531944513320923, - "215": 2.521343469619751 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "step_size_list": [ - 0.0950954, - 0.0947769, - 0.0945824, - 0.0949905, - 0.095576, - 0.0963595, - 0.0965007, - 0.0958879, - 0.095792, - 0.0952745, - 0.094869, - 0.0960256, - 0.0957705, - 0.0941727, - 0.093649, - 0.0935602, - 0.095026, - 0.0959252, - 0.095704, - 0.09549, - 0.0952527, - 0.095813, - 0.0962643, - 0.095488, - 0.095783, - 0.0962789, - 0.0956334, - 0.0956046, - 0.0961645, - 0.0966342, - 0.0966324, - 0.0964415, - 0.0967322, - 0.0969439, - 0.0967565, - 0.0963298, - 0.096356, - 0.0963822, - 0.0962851, - 0.0954698, - 0.0962088, - 0.0970285, - 0.0963524, - 0.0946404, - 0.0946481, - 0.0950928, - 0.095726, - 0.0962439, - 0.0964047, - 0.0966613, - 0.0963853, - 0.0955198, - 0.0937834, - 0.0947436 - ], - "train_epoch_time": 4.84104323387146, - "train_loss": 2.494138606480581, - "train_score": 0.26137800383430987, - "val_loss": 2.54036904010926, - "val_score": 0.25223342436189355 - }, - { - "epoch": 4, - "grad_norm": 1.2146599292755127, - "learning_rate": 0.1, - "model_norm": 87.48461151123047, - "step_logs": { - "grad_norm": { - "216": 1.3636150360107422, - "217": 1.4804573059082031, - "218": 1.5302199125289917, - "219": 1.4577096700668335, - "220": 1.3446636199951172, - "221": 1.3021408319473267, - "222": 1.3390588760375977, - "223": 1.4846988916397095, - "224": 1.5785391330718994, - "225": 1.5727964639663696, - "226": 1.4491405487060547, - "227": 1.340945839881897, - "228": 1.2831707000732422, - "229": 1.3626240491867065, - "230": 1.3536001443862915, - "231": 1.2594223022460938, - "232": 1.3504176139831543, - "233": 1.4755704402923584, - "234": 1.4534399509429932, - "235": 1.4111806154251099, - "236": 1.4107003211975098, - "237": 1.3766318559646606, - "238": 1.367026925086975, - "239": 1.4091522693634033, - "240": 1.416739821434021, - "241": 1.3254085779190063, - "242": 1.3123525381088257, - "243": 1.2553538084030151, - "244": 1.2974796295166016, - "245": 1.4017691612243652, - "246": 1.4274452924728394, - "247": 1.4385408163070679, - "248": 1.4232410192489624, - "249": 1.346845269203186, - "250": 1.4480693340301514, - "251": 1.5677980184555054, - "252": 1.4672484397888184, - "253": 1.4176898002624512, - "254": 1.4086984395980835, - "255": 1.716362476348877, - "256": 1.7754286527633667, - "257": 1.4389187097549438, - "258": 1.3786344528198242, - "259": 1.4783419370651245, - "260": 1.510939121246338, - "261": 1.4312331676483154, - "262": 1.4567210674285889, - "263": 1.563134789466858, - "264": 1.646837830543518, - "265": 1.7298420667648315, - "266": 1.6760711669921875, - "267": 1.4475209712982178, - "268": 1.4026678800582886, - "269": 1.2146599292755127 - }, - "loss": { - "216": 2.4945056438446045, - "217": 2.476877212524414, - "218": 2.4778897762298584, - "219": 2.5171587467193604, - "220": 2.506223678588867, - "221": 2.4749577045440674, - "222": 2.499462604522705, - "223": 2.4800853729248047, - "224": 2.521491527557373, - "225": 2.4830408096313477, - "226": 2.501441478729248, - "227": 2.4814603328704834, - "228": 2.4833438396453857, - "229": 2.4706075191497803, - "230": 2.4815661907196045, - "231": 2.476168632507324, - "232": 2.458076000213623, - "233": 2.4829487800598145, - "234": 2.4967284202575684, - "235": 2.47981595993042, - "236": 2.500000476837158, - "237": 2.490534782409668, - "238": 2.477534770965576, - "239": 2.4788246154785156, - "240": 2.491225242614746, - "241": 2.4874119758605957, - "242": 2.4652035236358643, - "243": 2.4688680171966553, - "244": 2.4687812328338623, - "245": 2.467210292816162, - "246": 2.4625253677368164, - "247": 2.4746756553649902, - "248": 2.4987213611602783, - "249": 2.468862295150757, - "250": 2.4748666286468506, - "251": 2.4936366081237793, - "252": 2.50899600982666, - "253": 2.471942901611328, - "254": 2.4647915363311768, - "255": 2.472774028778076, - "256": 2.5127928256988525, - "257": 2.471195697784424, - "258": 2.482217311859131, - "259": 2.4569308757781982, - "260": 2.4877946376800537, - "261": 2.4683563709259033, - "262": 2.462571144104004, - "263": 2.486539840698242, - "264": 2.5109944343566895, - "265": 2.4866538047790527, - "266": 2.493426561355591, - "267": 2.471728563308716, - "268": 2.4728283882141113, - "269": 2.4548778533935547 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "step_size_list": [ - 0.0964068, - 0.095763, - 0.0954882, - 0.0959501, - 0.0965183, - 0.096688, - 0.0965373, - 0.095745, - 0.0952916, - 0.0952552, - 0.0959715, - 0.0965035, - 0.0967912, - 0.0963784, - 0.0964397, - 0.0968966, - 0.0964232, - 0.0957996, - 0.0959412, - 0.0961397, - 0.0961722, - 0.0963348, - 0.0963657, - 0.0961489, - 0.0961276, - 0.0965893, - 0.0966248, - 0.0969071, - 0.0967029, - 0.0961704, - 0.0960272, - 0.0959866, - 0.0961046, - 0.0964564, - 0.0959358, - 0.095303, - 0.0958863, - 0.0960935, - 0.0961302, - 0.0943782, - 0.094098, - 0.0959792, - 0.0963127, - 0.0957418, - 0.095613, - 0.0960159, - 0.0958694, - 0.0953169, - 0.0948763, - 0.0943246, - 0.0946672, - 0.0959338, - 0.096174, - 0.0970826 - ], - "train_epoch_time": 4.841325998306274, - "train_loss": 2.4533854401778625, - "train_score": 0.27739979369910583, - "val_loss": 2.50115905016079, - "val_score": 0.2703923287402612 - }, - { - "epoch": 5, - "grad_norm": 1.604719638824463, - "learning_rate": 0.1, - "model_norm": 87.49969482421875, - "step_logs": { - "grad_norm": { - "270": 1.1493749618530273, - "271": 1.1330543756484985, - "272": 1.237888216972351, - "273": 1.3979982137680054, - "274": 1.4486361742019653, - "275": 1.462369441986084, - "276": 1.534291386604309, - "277": 1.5890707969665527, - "278": 1.4712353944778442, - "279": 1.287907600402832, - "280": 1.2565165758132935, - "281": 1.4038207530975342, - "282": 1.5695432424545288, - "283": 1.639522910118103, - "284": 1.6006356477737427, - "285": 1.3548380136489868, - "286": 1.2830750942230225, - "287": 1.3571367263793945, - "288": 1.2246315479278564, - "289": 1.120367407798767, - "290": 1.1854363679885864, - "291": 1.3918384313583374, - "292": 1.5270098447799683, - "293": 1.581470251083374, - "294": 1.4569016695022583, - "295": 1.372684121131897, - "296": 1.5808931589126587, - "297": 1.7226909399032593, - "298": 1.5298813581466675, - "299": 1.496250033378601, - "300": 1.6258769035339355, - "301": 1.7273105382919312, - "302": 1.6980730295181274, - "303": 1.6322118043899536, - "304": 1.6293002367019653, - "305": 1.4697439670562744, - "306": 1.426142692565918, - "307": 1.7037168741226196, - "308": 1.7753783464431763, - "309": 1.8286687135696411, - "310": 1.5889177322387695, - "311": 1.6861062049865723, - "312": 1.6601293087005615, - "313": 1.6899635791778564, - "314": 1.6301934719085693, - "315": 1.5262844562530518, - "316": 1.5647791624069214, - "317": 1.733574628829956, - "318": 1.7964720726013184, - "319": 1.6857889890670776, - "320": 1.6071444749832153, - "321": 1.5039522647857666, - "322": 1.45350182056427, - "323": 1.604719638824463 - }, - "loss": { - "270": 2.459867000579834, - "271": 2.4469497203826904, - "272": 2.4497859477996826, - "273": 2.4574506282806396, - "274": 2.4623823165893555, - "275": 2.4494693279266357, - "276": 2.4682140350341797, - "277": 2.4670796394348145, - "278": 2.46246075630188, - "279": 2.457047939300537, - "280": 2.436194658279419, - "281": 2.4511539936065674, - "282": 2.471869945526123, - "283": 2.490079879760742, - "284": 2.461785078048706, - "285": 2.449409246444702, - "286": 2.4527413845062256, - "287": 2.4596033096313477, - "288": 2.445344924926758, - "289": 2.424729824066162, - "290": 2.430593967437744, - "291": 2.4237821102142334, - "292": 2.4400696754455566, - "293": 2.438300371170044, - "294": 2.4570446014404297, - "295": 2.4188010692596436, - "296": 2.452712059020996, - "297": 2.454869508743286, - "298": 2.459745407104492, - "299": 2.4303040504455566, - "300": 2.4442968368530273, - "301": 2.4529972076416016, - "302": 2.472748279571533, - "303": 2.4257872104644775, - "304": 2.448838710784912, - "305": 2.4362494945526123, - "306": 2.427426338195801, - "307": 2.4044203758239746, - "308": 2.4801931381225586, - "309": 2.46386456489563, - "310": 2.4611735343933105, - "311": 2.4443514347076416, - "312": 2.441993236541748, - "313": 2.433992624282837, - "314": 2.438014030456543, - "315": 2.411799907684326, - "316": 2.3855862617492676, - "317": 2.4409897327423096, - "318": 2.4333548545837402, - "319": 2.424011707305908, - "320": 2.4327902793884277, - "321": 2.4080169200897217, - "322": 2.3980765342712402, - "323": 2.4144649505615234 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "step_size_list": [ - 0.097385, - 0.0974438, - 0.0969673, - 0.0961756, - 0.095913, - 0.0958173, - 0.0954483, - 0.0951315, - 0.09579, - 0.0967348, - 0.0968613, - 0.0961354, - 0.0952535, - 0.0948789, - 0.0950538, - 0.0963883, - 0.096753, - 0.096391, - 0.0970248, - 0.0974769, - 0.0971904, - 0.0961573, - 0.0954398, - 0.0951215, - 0.0958595, - 0.096251, - 0.0951522, - 0.0943001, - 0.0954584, - 0.0955969, - 0.09487, - 0.0942671, - 0.0944908, - 0.0947946, - 0.0948585, - 0.0957549, - 0.0959791, - 0.0943075, - 0.0940254, - 0.0936451, - 0.0951213, - 0.0945043, - 0.0946584, - 0.0944583, - 0.0948315, - 0.095393, - 0.0951186, - 0.0942011, - 0.093781, - 0.0944626, - 0.0949591, - 0.0955141, - 0.0957809, - 0.0949373 - ], - "train_epoch_time": 4.8408050537109375, - "train_loss": 2.421827687697226, - "train_score": 0.2904624732016149, - "val_loss": 2.4764338429294415, - "val_score": 0.279689294051905 - }, - { - "epoch": 6, - "grad_norm": 1.850106954574585, - "learning_rate": 0.1, - "model_norm": 87.51580810546875, - "step_logs": { - "grad_norm": { - "324": 1.70496666431427, - "325": 1.5352458953857422, - "326": 1.5272716283798218, - "327": 1.486746907234192, - "328": 1.3978716135025024, - "329": 1.3832483291625977, - "330": 1.4204920530319214, - "331": 1.5262407064437866, - "332": 1.687807321548462, - "333": 1.6875107288360596, - "334": 1.487512469291687, - "335": 1.3805464506149292, - "336": 1.3373854160308838, - "337": 1.392936110496521, - "338": 1.544201135635376, - "339": 1.7049697637557983, - "340": 1.7199921607971191, - "341": 1.566656470298767, - "342": 1.4393539428710938, - "343": 1.4776968955993652, - "344": 1.5886560678482056, - "345": 1.5943487882614136, - "346": 1.5563284158706665, - "347": 1.4807718992233276, - "348": 1.4561889171600342, - "349": 1.3084001541137695, - "350": 1.102138638496399, - "351": 1.1093521118164062, - "352": 1.2013018131256104, - "353": 1.3223005533218384, - "354": 1.4350271224975586, - "355": 1.4879282712936401, - "356": 1.5674432516098022, - "357": 1.6384798288345337, - "358": 1.6519635915756226, - "359": 1.6509078741073608, - "360": 1.581932783126831, - "361": 1.5987820625305176, - "362": 1.4949012994766235, - "363": 1.448889136314392, - "364": 1.5567195415496826, - "365": 1.5549774169921875, - "366": 1.3681429624557495, - "367": 1.3798903226852417, - "368": 1.302626609802246, - "369": 1.2896926403045654, - "370": 1.2533292770385742, - "371": 1.1450278759002686, - "372": 1.140235424041748, - "373": 1.2935467958450317, - "374": 1.5056853294372559, - "375": 1.6462469100952148, - "376": 1.8939093351364136, - "377": 1.850106954574585 - }, - "loss": { - "324": 2.4293105602264404, - "325": 2.4545586109161377, - "326": 2.39117431640625, - "327": 2.425450325012207, - "328": 2.395688533782959, - "329": 2.403061866760254, - "330": 2.3921380043029785, - "331": 2.4047253131866455, - "332": 2.391915798187256, - "333": 2.4117398262023926, - "334": 2.3873023986816406, - "335": 2.418776273727417, - "336": 2.3815364837646484, - "337": 2.3877670764923096, - "338": 2.431896686553955, - "339": 2.41985821723938, - "340": 2.4127912521362305, - "341": 2.4208478927612305, - "342": 2.369502544403076, - "343": 2.383650779724121, - "344": 2.3845064640045166, - "345": 2.4073538780212402, - "346": 2.3645572662353516, - "347": 2.3936514854431152, - "348": 2.38271164894104, - "349": 2.3873305320739746, - "350": 2.3465943336486816, - "351": 2.379585027694702, - "352": 2.3605422973632812, - "353": 2.3752424716949463, - "354": 2.375164031982422, - "355": 2.3937907218933105, - "356": 2.3961539268493652, - "357": 2.405888557434082, - "358": 2.401010751724243, - "359": 2.384521007537842, - "360": 2.3716607093811035, - "361": 2.3785009384155273, - "362": 2.375952959060669, - "363": 2.3757760524749756, - "364": 2.3742928504943848, - "365": 2.3953442573547363, - "366": 2.347576141357422, - "367": 2.382486343383789, - "368": 2.3730404376983643, - "369": 2.368624210357666, - "370": 2.371485710144043, - "371": 2.3564565181732178, - "372": 2.3519186973571777, - "373": 2.3728907108306885, - "374": 2.36344838142395, - "375": 2.369312286376953, - "376": 2.397822380065918, - "377": 2.4184350967407227 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "step_size_list": [ - 0.0943548, - 0.0954187, - 0.0953494, - 0.0956419, - 0.0960815, - 0.0961713, - 0.0959531, - 0.0953804, - 0.0943798, - 0.0944253, - 0.095571, - 0.0962095, - 0.0963808, - 0.0960957, - 0.0953265, - 0.0943339, - 0.0942235, - 0.0951753, - 0.0958114, - 0.0956203, - 0.0949739, - 0.0949852, - 0.0951277, - 0.0956204, - 0.0957398, - 0.0965387, - 0.0974771, - 0.0974793, - 0.0970339, - 0.09645, - 0.0958451, - 0.0955801, - 0.0951233, - 0.0947156, - 0.0946226, - 0.094594, - 0.0949885, - 0.0949007, - 0.0955084, - 0.0957689, - 0.0951444, - 0.0951953, - 0.0961662, - 0.0961575, - 0.0965482, - 0.096608, - 0.0967943, - 0.0972934, - 0.0973104, - 0.0965943, - 0.0954234, - 0.0945902, - 0.093041, - 0.093391 - ], - "train_epoch_time": 4.840641260147095, - "train_loss": 2.380497599674263, - "train_score": 0.29963795729996995, - "val_loss": 2.4356343497091264, - "val_score": 0.2938567378422423 - }, - { - "epoch": 7, - "grad_norm": 1.6480035781860352, - "learning_rate": 0.1, - "model_norm": 87.53150939941406, - "step_logs": { - "grad_norm": { - "378": 1.5266987085342407, - "379": 1.3259601593017578, - "380": 1.3328115940093994, - "381": 1.485754370689392, - "382": 1.7592358589172363, - "383": 1.7902156114578247, - "384": 1.8240782022476196, - "385": 1.6953977346420288, - "386": 1.4446179866790771, - "387": 1.3356860876083374, - "388": 1.3338754177093506, - "389": 1.4443869590759277, - "390": 1.5102128982543945, - "391": 1.5769864320755005, - "392": 1.609325647354126, - "393": 1.524599313735962, - "394": 1.1873465776443481, - "395": 0.9014818072319031, - "396": 0.9501115679740906, - "397": 1.0860326290130615, - "398": 1.4255868196487427, - "399": 1.672377586364746, - "400": 1.8315880298614502, - "401": 1.6081513166427612, - "402": 1.409303069114685, - "403": 1.4617589712142944, - "404": 1.4337639808654785, - "405": 1.4396882057189941, - "406": 1.564761996269226, - "407": 1.8757449388504028, - "408": 1.6897828578948975, - "409": 1.6864510774612427, - "410": 1.6438335180282593, - "411": 1.4547008275985718, - "412": 1.3749847412109375, - "413": 1.5135363340377808, - "414": 1.5014058351516724, - "415": 1.7297778129577637, - "416": 1.6596649885177612, - "417": 1.5333251953125, - "418": 1.5313479900360107, - "419": 1.69602370262146, - "420": 1.6985406875610352, - "421": 1.5260009765625, - "422": 1.3236879110336304, - "423": 1.3171796798706055, - "424": 1.4673880338668823, - "425": 1.4992319345474243, - "426": 1.3739311695098877, - "427": 1.3212213516235352, - "428": 1.5277010202407837, - "429": 1.633017659187317, - "430": 1.7155961990356445, - "431": 1.6480035781860352 - }, - "loss": { - "378": 2.3986642360687256, - "379": 2.366818904876709, - "380": 2.35683536529541, - "381": 2.360246181488037, - "382": 2.3710055351257324, - "383": 2.4133899211883545, - "384": 2.3755006790161133, - "385": 2.3968124389648438, - "386": 2.363870143890381, - "387": 2.344353199005127, - "388": 2.320923089981079, - "389": 2.361589193344116, - "390": 2.3946704864501953, - "391": 2.3677263259887695, - "392": 2.3542699813842773, - "393": 2.3507258892059326, - "394": 2.3582303524017334, - "395": 2.3268086910247803, - "396": 2.3252711296081543, - "397": 2.3379464149475098, - "398": 2.3369557857513428, - "399": 2.3873050212860107, - "400": 2.3689322471618652, - "401": 2.396152973175049, - "402": 2.3575778007507324, - "403": 2.369710683822632, - "404": 2.3685595989227295, - "405": 2.3393683433532715, - "406": 2.346273422241211, - "407": 2.3763089179992676, - "408": 2.387648105621338, - "409": 2.3750905990600586, - "410": 2.3492965698242188, - "411": 2.3642311096191406, - "412": 2.353775978088379, - "413": 2.347517728805542, - "414": 2.3600897789001465, - "415": 2.359027862548828, - "416": 2.3703038692474365, - "417": 2.368494987487793, - "418": 2.337583541870117, - "419": 2.3678970336914062, - "420": 2.3678038120269775, - "421": 2.342033863067627, - "422": 2.3472423553466797, - "423": 2.3356282711029053, - "424": 2.3358168601989746, - "425": 2.347956657409668, - "426": 2.3190560340881348, - "427": 2.3381059169769287, - "428": 2.327958583831787, - "429": 2.365657329559326, - "430": 2.3531432151794434, - "431": 2.365602493286133 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "step_size_list": [ - 0.0953666, - 0.0964188, - 0.0963683, - 0.0955326, - 0.0938733, - 0.0937737, - 0.0934551, - 0.094343, - 0.0957724, - 0.0963345, - 0.0963085, - 0.0957698, - 0.0954544, - 0.0950104, - 0.0947863, - 0.0952889, - 0.0970977, - 0.0982837, - 0.0980959, - 0.0975396, - 0.095833, - 0.0944664, - 0.0933876, - 0.0948799, - 0.095958, - 0.0956861, - 0.095841, - 0.0957579, - 0.095041, - 0.0931072, - 0.0943579, - 0.0943509, - 0.0945617, - 0.0957163, - 0.096139, - 0.0953478, - 0.095442, - 0.0940363, - 0.0945087, - 0.0952714, - 0.0952237, - 0.0942739, - 0.0942576, - 0.095264, - 0.0964019, - 0.0964189, - 0.0955939, - 0.0954322, - 0.0960892, - 0.0964013, - 0.0952266, - 0.0946644, - 0.0941142, - 0.0945712 - ], - "train_epoch_time": 4.840502023696899, - "train_loss": 2.3419843219445116, - "train_score": 0.3116133877673484, - "val_loss": 2.4106282238024113, - "val_score": 0.29856127986404296 - }, - { - "epoch": 8, - "grad_norm": 1.6200577020645142, - "learning_rate": 0.1, - "model_norm": 87.547607421875, - "step_logs": { - "grad_norm": { - "432": 1.634557843208313, - "433": 1.5858564376831055, - "434": 1.498273491859436, - "435": 1.468005657196045, - "436": 1.3487775325775146, - "437": 1.3570528030395508, - "438": 1.4803147315979004, - "439": 1.5396385192871094, - "440": 1.586411714553833, - "441": 1.6063412427902222, - "442": 1.609969139099121, - "443": 1.8239301443099976, - "444": 1.7798283100128174, - "445": 1.507086992263794, - "446": 1.2801793813705444, - "447": 1.3306251764297485, - "448": 1.4774353504180908, - "449": 1.4359245300292969, - "450": 1.3928784132003784, - "451": 1.375860333442688, - "452": 1.405258297920227, - "453": 1.4549062252044678, - "454": 1.5258451700210571, - "455": 1.6319407224655151, - "456": 1.6003764867782593, - "457": 1.464892864227295, - "458": 1.3905911445617676, - "459": 1.3548095226287842, - "460": 1.6180012226104736, - "461": 1.834257960319519, - "462": 1.824002981185913, - "463": 1.7079906463623047, - "464": 1.428998351097107, - "465": 1.329432725906372, - "466": 1.1733323335647583, - "467": 1.2698811292648315, - "468": 1.3429077863693237, - "469": 1.3825491666793823, - "470": 1.3863537311553955, - "471": 1.4176111221313477, - "472": 1.4104974269866943, - "473": 1.4849673509597778, - "474": 1.586929202079773, - "475": 1.4831091165542603, - "476": 1.286625862121582, - "477": 1.276072382926941, - "478": 1.2926793098449707, - "479": 1.3325908184051514, - "480": 1.3457083702087402, - "481": 1.3737918138504028, - "482": 1.4687963724136353, - "483": 1.6570770740509033, - "484": 1.761545181274414, - "485": 1.6200577020645142 - }, - "loss": { - "432": 2.3617753982543945, - "433": 2.34804105758667, - "434": 2.3420891761779785, - "435": 2.359283924102783, - "436": 2.334181785583496, - "437": 2.342970848083496, - "438": 2.295233726501465, - "439": 2.3428447246551514, - "440": 2.3305728435516357, - "441": 2.3393940925598145, - "442": 2.3540444374084473, - "443": 2.349869728088379, - "444": 2.3877902030944824, - "445": 2.346569538116455, - "446": 2.3178114891052246, - "447": 2.326875925064087, - "448": 2.3456501960754395, - "449": 2.344538688659668, - "450": 2.3177645206451416, - "451": 2.318577289581299, - "452": 2.3197152614593506, - "453": 2.3167388439178467, - "454": 2.3275227546691895, - "455": 2.3517160415649414, - "456": 2.3250317573547363, - "457": 2.3436105251312256, - "458": 2.307124614715576, - "459": 2.3200998306274414, - "460": 2.305172920227051, - "461": 2.372474193572998, - "462": 2.34440541267395, - "463": 2.350816249847412, - "464": 2.313886880874634, - "465": 2.3120365142822266, - "466": 2.285431385040283, - "467": 2.3201370239257812, - "468": 2.321953296661377, - "469": 2.3189384937286377, - "470": 2.337470054626465, - "471": 2.3227221965789795, - "472": 2.3120028972625732, - "473": 2.322312116622925, - "474": 2.3122975826263428, - "475": 2.3390138149261475, - "476": 2.2942404747009277, - "477": 2.2792513370513916, - "478": 2.3139185905456543, - "479": 2.31300950050354, - "480": 2.3121891021728516, - "481": 2.2937021255493164, - "482": 2.319324493408203, - "483": 2.3062589168548584, - "484": 2.3141398429870605, - "485": 2.3350830078125 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "step_size_list": [ - 0.0946465, - 0.0949168, - 0.0954268, - 0.0956323, - 0.0962493, - 0.0962186, - 0.0954438, - 0.0951846, - 0.0948773, - 0.0947733, - 0.0947819, - 0.0933894, - 0.0937793, - 0.0953838, - 0.0965854, - 0.0963349, - 0.095554, - 0.095788, - 0.0959828, - 0.0960779, - 0.0959173, - 0.0956312, - 0.0952368, - 0.0946411, - 0.0947796, - 0.0956222, - 0.0959778, - 0.0961949, - 0.0946267, - 0.0933788, - 0.0933745, - 0.0941578, - 0.0957739, - 0.0963186, - 0.0970761, - 0.0966415, - 0.0962618, - 0.0960418, - 0.0960511, - 0.0958534, - 0.0958749, - 0.0954675, - 0.0948357, - 0.0955092, - 0.0965179, - 0.0965511, - 0.096515, - 0.0963032, - 0.0962315, - 0.0960485, - 0.0955558, - 0.0943813, - 0.0937167, - 0.0946791 - ], - "train_epoch_time": 4.84143590927124, - "train_loss": 2.3093996868927134, - "train_score": 0.3223569763784258, - "val_loss": 2.369595475092821, - "val_score": 0.3103069389318079 - }, - { - "epoch": 9, - "grad_norm": 1.0912246704101562, - "learning_rate": 0.1, - "model_norm": 87.5638198852539, - "step_logs": { - "grad_norm": { - "486": 1.5025008916854858, - "487": 1.5095657110214233, - "488": 1.592063069343567, - "489": 1.7205984592437744, - "490": 1.773987054824829, - "491": 1.6198124885559082, - "492": 1.5632734298706055, - "493": 1.4860070943832397, - "494": 1.2874897718429565, - "495": 1.2284979820251465, - "496": 1.2067540884017944, - "497": 1.3678255081176758, - "498": 1.597486972808838, - "499": 1.5526199340820312, - "500": 1.5520646572113037, - "501": 1.4706977605819702, - "502": 1.467108964920044, - "503": 1.6719712018966675, - "504": 1.5829285383224487, - "505": 1.4955989122390747, - "506": 1.5018434524536133, - "507": 1.4837757349014282, - "508": 1.3472092151641846, - "509": 1.361122488975525, - "510": 1.3829684257507324, - "511": 1.4686342477798462, - "512": 1.5566428899765015, - "513": 1.5263723134994507, - "514": 1.7206555604934692, - "515": 1.8564311265945435, - "516": 1.7699187994003296, - "517": 1.5642356872558594, - "518": 1.6314109563827515, - "519": 1.4676313400268555, - "520": 1.3388612270355225, - "521": 1.3338922262191772, - "522": 1.3107889890670776, - "523": 1.3294310569763184, - "524": 1.4511823654174805, - "525": 1.467866063117981, - "526": 1.6884526014328003, - "527": 1.5701409578323364, - "528": 1.762635350227356, - "529": 1.6352455615997314, - "530": 1.7891312837600708, - "531": 1.690053105354309, - "532": 1.4078140258789062, - "533": 1.2745277881622314, - "534": 1.269148349761963, - "535": 1.3134315013885498, - "536": 1.3920936584472656, - "537": 1.3074783086776733, - "538": 1.129028081893921, - "539": 1.0912246704101562 - }, - "loss": { - "486": 2.3220713138580322, - "487": 2.330090045928955, - "488": 2.311925172805786, - "489": 2.3156092166900635, - "490": 2.306424379348755, - "491": 2.332205295562744, - "492": 2.317397117614746, - "493": 2.331104278564453, - "494": 2.2727203369140625, - "495": 2.284027099609375, - "496": 2.294236183166504, - "497": 2.2855887413024902, - "498": 2.3141837120056152, - "499": 2.3078932762145996, - "500": 2.3057961463928223, - "501": 2.317169666290283, - "502": 2.289058208465576, - "503": 2.305058240890503, - "504": 2.2939486503601074, - "505": 2.3050730228424072, - "506": 2.30854868888855, - "507": 2.2708868980407715, - "508": 2.3110127449035645, - "509": 2.302642345428467, - "510": 2.2753477096557617, - "511": 2.3006060123443604, - "512": 2.311389923095703, - "513": 2.308623790740967, - "514": 2.305128574371338, - "515": 2.311640739440918, - "516": 2.3273863792419434, - "517": 2.3004872798919678, - "518": 2.325859308242798, - "519": 2.3006064891815186, - "520": 2.276432991027832, - "521": 2.3090970516204834, - "522": 2.2938272953033447, - "523": 2.3008387088775635, - "524": 2.3139688968658447, - "525": 2.293001174926758, - "526": 2.3016910552978516, - "527": 2.290680408477783, - "528": 2.2834925651550293, - "529": 2.2939915657043457, - "530": 2.3023834228515625, - "531": 2.298981189727783, - "532": 2.291498899459839, - "533": 2.288029670715332, - "534": 2.285649061203003, - "535": 2.2905266284942627, - "536": 2.293748140335083, - "537": 2.273296356201172, - "538": 2.2550418376922607, - "539": 2.2850148677825928 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "step_size_list": [ - 0.0953644, - 0.095338, - 0.0948032, - 0.0939917, - 0.0936134, - 0.0946744, - 0.0949913, - 0.0954778, - 0.0964815, - 0.0968018, - 0.0969239, - 0.096068, - 0.0947744, - 0.0950366, - 0.0950357, - 0.0955409, - 0.0955096, - 0.0942829, - 0.0948214, - 0.0953726, - 0.0953424, - 0.0953767, - 0.0962216, - 0.0961327, - 0.0959666, - 0.0955223, - 0.0950193, - 0.0951965, - 0.0939656, - 0.0930628, - 0.0936945, - 0.0949505, - 0.0945881, - 0.0955281, - 0.096212, - 0.0962902, - 0.09639, - 0.0963013, - 0.0956476, - 0.0955126, - 0.0941682, - 0.0948935, - 0.0936304, - 0.0944927, - 0.0935003, - 0.0941513, - 0.0958547, - 0.0965719, - 0.0965963, - 0.0963709, - 0.0959469, - 0.0963763, - 0.0972513, - 0.0974606 - ], - "train_epoch_time": 4.840567588806152, - "train_loss": 2.2588146516890233, - "train_score": 0.32843772405531346, - "val_loss": 2.3237856173487947, - "val_score": 0.30992573229371473 - }, - { - "epoch": 10, - "grad_norm": 1.4195648431777954, - "learning_rate": 0.1, - "model_norm": 87.58116912841797, - "step_logs": { - "grad_norm": { - "540": 1.100197672843933, - "541": 1.1823339462280273, - "542": 1.2725900411605835, - "543": 1.331581950187683, - "544": 1.5299253463745117, - "545": 1.8025225400924683, - "546": 1.7377359867095947, - "547": 1.8973256349563599, - "548": 1.8250856399536133, - "549": 1.4262754917144775, - "550": 1.443953037261963, - "551": 1.2793861627578735, - "552": 1.1450295448303223, - "553": 1.0606615543365479, - "554": 1.1801726818084717, - "555": 1.3393418788909912, - "556": 1.5357460975646973, - "557": 1.671959638595581, - "558": 1.5620275735855103, - "559": 1.5698570013046265, - "560": 1.47999107837677, - "561": 1.5179015398025513, - "562": 1.4943252801895142, - "563": 1.5487949848175049, - "564": 1.7194210290908813, - "565": 1.6637580394744873, - "566": 1.537824273109436, - "567": 1.4476512670516968, - "568": 1.4622572660446167, - "569": 1.6627964973449707, - "570": 1.7058537006378174, - "571": 1.620262622833252, - "572": 1.5235130786895752, - "573": 1.474424123764038, - "574": 1.5082924365997314, - "575": 1.4345836639404297, - "576": 1.3322948217391968, - "577": 1.2427113056182861, - "578": 1.255328893661499, - "579": 1.3651137351989746, - "580": 1.4349088668823242, - "581": 1.398942470550537, - "582": 1.4133671522140503, - "583": 1.484761118888855, - "584": 1.5328387022018433, - "585": 1.5369879007339478, - "586": 1.5777214765548706, - "587": 1.6712647676467896, - "588": 1.747390627861023, - "589": 1.7286624908447266, - "590": 1.9535447359085083, - "591": 1.67344069480896, - "592": 1.2704472541809082, - "593": 1.4195648431777954 - }, - "loss": { - "540": 2.2657480239868164, - "541": 2.260096549987793, - "542": 2.2562482357025146, - "543": 2.2681894302368164, - "544": 2.2761921882629395, - "545": 2.306246280670166, - "546": 2.3245182037353516, - "547": 2.2958405017852783, - "548": 2.304215431213379, - "549": 2.2735280990600586, - "550": 2.2743730545043945, - "551": 2.2598328590393066, - "552": 2.2704267501831055, - "553": 2.247743606567383, - "554": 2.2535901069641113, - "555": 2.2711734771728516, - "556": 2.2813498973846436, - "557": 2.2679498195648193, - "558": 2.259808301925659, - "559": 2.277531147003174, - "560": 2.272543430328369, - "561": 2.2580676078796387, - "562": 2.2870192527770996, - "563": 2.2986292839050293, - "564": 2.279205799102783, - "565": 2.2739334106445312, - "566": 2.2868971824645996, - "567": 2.2592694759368896, - "568": 2.254410743713379, - "569": 2.283161163330078, - "570": 2.304507255554199, - "571": 2.2747268676757812, - "572": 2.2719836235046387, - "573": 2.240689754486084, - "574": 2.250385284423828, - "575": 2.2764248847961426, - "576": 2.22999906539917, - "577": 2.2403783798217773, - "578": 2.2409114837646484, - "579": 2.246426582336426, - "580": 2.2490358352661133, - "581": 2.24831485748291, - "582": 2.22908353805542, - "583": 2.2384352684020996, - "584": 2.262144088745117, - "585": 2.2450196743011475, - "586": 2.2659473419189453, - "587": 2.293720245361328, - "588": 2.271733522415161, - "589": 2.2749931812286377, - "590": 2.272538185119629, - "591": 2.2866933345794678, - "592": 2.227475643157959, - "593": 2.2312941551208496 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "step_size_list": [ - 0.0973983, - 0.0970002, - 0.0965355, - 0.0962384, - 0.0951098, - 0.0934194, - 0.0939008, - 0.09273, - 0.0932593, - 0.0957178, - 0.0956172, - 0.096505, - 0.0971937, - 0.0975586, - 0.0970024, - 0.0962009, - 0.0950849, - 0.0941948, - 0.094878, - 0.0948673, - 0.0954024, - 0.0951459, - 0.0953453, - 0.0950409, - 0.0939094, - 0.0942626, - 0.0950837, - 0.0955676, - 0.0954725, - 0.0942907, - 0.0940614, - 0.0945443, - 0.0951402, - 0.0953734, - 0.0951886, - 0.0956752, - 0.0961725, - 0.0966682, - 0.0966033, - 0.0960174, - 0.0956229, - 0.0958293, - 0.0957114, - 0.0953069, - 0.0950631, - 0.0950017, - 0.0947933, - 0.0942608, - 0.0937028, - 0.0938371, - 0.0922538, - 0.09423, - 0.0965037, - 0.0956794 - ], - "train_epoch_time": 4.840311050415039, - "train_loss": 2.2552605436043214, - "train_score": 0.337925932508288, - "val_loss": 2.31916275254347, - "val_score": 0.31855446384229563 - }, - { - "epoch": 11, - "grad_norm": 1.5625512599945068, - "learning_rate": 0.1, - "model_norm": 87.59830474853516, - "step_logs": { - "grad_norm": { - "594": 1.6534924507141113, - "595": 1.6314411163330078, - "596": 1.4768848419189453, - "597": 1.3829001188278198, - "598": 1.5773581266403198, - "599": 1.7535510063171387, - "600": 1.5041229724884033, - "601": 1.2253459692001343, - "602": 1.2250187397003174, - "603": 1.1967824697494507, - "604": 1.0975279808044434, - "605": 1.055688738822937, - "606": 1.156018853187561, - "607": 1.3279361724853516, - "608": 1.4560978412628174, - "609": 1.496138095855713, - "610": 1.680279016494751, - "611": 1.7630523443222046, - "612": 1.6128991842269897, - "613": 1.5570846796035767, - "614": 1.6538764238357544, - "615": 1.7344026565551758, - "616": 1.7097742557525635, - "617": 1.745121717453003, - "618": 1.4628164768218994, - "619": 1.534752607345581, - "620": 1.5265697240829468, - "621": 1.495509147644043, - "622": 1.4786078929901123, - "623": 1.4330041408538818, - "624": 1.4551668167114258, - "625": 1.5479933023452759, - "626": 1.3500335216522217, - "627": 1.3044919967651367, - "628": 1.5800038576126099, - "629": 1.800116777420044, - "630": 1.5918378829956055, - "631": 1.3650813102722168, - "632": 1.2443476915359497, - "633": 1.3246335983276367, - "634": 1.3975545167922974, - "635": 1.4696154594421387, - "636": 1.3224786520004272, - "637": 1.3797218799591064, - "638": 1.4980833530426025, - "639": 1.4243566989898682, - "640": 1.2857396602630615, - "641": 1.2422525882720947, - "642": 1.2802238464355469, - "643": 1.255600929260254, - "644": 1.2912657260894775, - "645": 1.3614187240600586, - "646": 1.4797443151474, - "647": 1.5625512599945068 - }, - "loss": { - "594": 2.256520986557007, - "595": 2.2483062744140625, - "596": 2.2642526626586914, - "597": 2.229402542114258, - "598": 2.2512664794921875, - "599": 2.227321147918701, - "600": 2.256075859069824, - "601": 2.2356483936309814, - "602": 2.2189698219299316, - "603": 2.227083683013916, - "604": 2.2117767333984375, - "605": 2.224368095397949, - "606": 2.2249794006347656, - "607": 2.2397990226745605, - "608": 2.237287998199463, - "609": 2.2222719192504883, - "610": 2.223987102508545, - "611": 2.274860382080078, - "612": 2.2311863899230957, - "613": 2.2601726055145264, - "614": 2.2343757152557373, - "615": 2.2645065784454346, - "616": 2.2727839946746826, - "617": 2.2672462463378906, - "618": 2.244750499725342, - "619": 2.236546039581299, - "620": 2.237711191177368, - "621": 2.2292706966400146, - "622": 2.2331385612487793, - "623": 2.216628074645996, - "624": 2.249546527862549, - "625": 2.2301745414733887, - "626": 2.2357091903686523, - "627": 2.1932413578033447, - "628": 2.230203151702881, - "629": 2.2156214714050293, - "630": 2.2423171997070312, - "631": 2.205599069595337, - "632": 2.1958603858947754, - "633": 2.2244582176208496, - "634": 2.226994752883911, - "635": 2.2127346992492676, - "636": 2.2161855697631836, - "637": 2.225216865539551, - "638": 2.2458338737487793, - "639": 2.208857536315918, - "640": 2.2217001914978027, - "641": 2.1997947692871094, - "642": 2.206165313720703, - "643": 2.2320661544799805, - "644": 2.2041714191436768, - "645": 2.232840061187744, - "646": 2.2346062660217285, - "647": 2.2509753704071045 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "step_size_list": [ - 0.094288, - 0.0944117, - 0.0954048, - 0.0958873, - 0.0947635, - 0.0935429, - 0.0952254, - 0.0967511, - 0.0967292, - 0.0968846, - 0.0973491, - 0.0975561, - 0.0970844, - 0.0962126, - 0.095476, - 0.0952051, - 0.0940314, - 0.0936049, - 0.0944914, - 0.0949095, - 0.0942321, - 0.0937717, - 0.0939574, - 0.0937065, - 0.0954505, - 0.0949976, - 0.0950506, - 0.0952233, - 0.0953333, - 0.095573, - 0.095505, - 0.0949015, - 0.0960836, - 0.0962655, - 0.0946998, - 0.0931856, - 0.0946519, - 0.0959469, - 0.0965943, - 0.0962056, - 0.095799, - 0.0953468, - 0.0962039, - 0.0958981, - 0.0952413, - 0.0956092, - 0.096413, - 0.0966113, - 0.0964185, - 0.0965889, - 0.0963555, - 0.0960149, - 0.0953294, - 0.0948556 - ], - "train_epoch_time": 4.840200424194336, - "train_loss": 2.2180573551008314, - "train_score": 0.3428084647210804, - "val_loss": 2.2931347347701867, - "val_score": 0.3267975034519397 - }, - { - "epoch": 12, - "grad_norm": 0.8762531280517578, - "learning_rate": 0.1, - "model_norm": 87.6137924194336, - "step_logs": { - "grad_norm": { - "648": 1.5413535833358765, - "649": 1.4510596990585327, - "650": 1.323213815689087, - "651": 1.405971884727478, - "652": 1.4707849025726318, - "653": 1.6178920269012451, - "654": 1.691859245300293, - "655": 1.710860013961792, - "656": 1.5524107217788696, - "657": 1.3920801877975464, - "658": 1.4672938585281372, - "659": 1.3626227378845215, - "660": 1.3100682497024536, - "661": 1.188665509223938, - "662": 1.116929292678833, - "663": 1.1059703826904297, - "664": 1.1361353397369385, - "665": 1.1766940355300903, - "666": 1.1772736310958862, - "667": 1.2098441123962402, - "668": 1.2716014385223389, - "669": 1.1813251972198486, - "670": 1.1426494121551514, - "671": 1.1829032897949219, - "672": 1.2042458057403564, - "673": 1.176770806312561, - "674": 1.0168719291687012, - "675": 0.957220733165741, - "676": 0.9578326940536499, - "677": 0.9323024153709412, - "678": 0.9654185175895691, - "679": 0.9408234357833862, - "680": 0.9756593704223633, - "681": 0.9996417760848999, - "682": 1.0176331996917725, - "683": 1.1016645431518555, - "684": 1.1992191076278687, - "685": 1.2792115211486816, - "686": 1.2094345092773438, - "687": 1.0129423141479492, - "688": 0.9981645941734314, - "689": 0.9855990409851074, - "690": 0.9641879200935364, - "691": 0.9150952100753784, - "692": 0.9417808651924133, - "693": 0.8396671414375305, - "694": 0.7987462878227234, - "695": 0.8581586480140686, - "696": 0.8286956548690796, - "697": 0.7874364256858826, - "698": 0.7451260685920715, - "699": 0.7946550846099854, - "700": 0.8350131511688232, - "701": 0.8762531280517578 - }, - "loss": { - "648": 2.2028181552886963, - "649": 2.228684186935425, - "650": 2.2092976570129395, - "651": 2.204345941543579, - "652": 2.230433940887451, - "653": 2.217052698135376, - "654": 2.21187162399292, - "655": 2.2144064903259277, - "656": 2.2300021648406982, - "657": 2.206143379211426, - "658": 2.2129125595092773, - "659": 2.190964698791504, - "660": 2.1846811771392822, - "661": 2.2048850059509277, - "662": 2.2016615867614746, - "663": 2.19024395942688, - "664": 2.187074661254883, - "665": 2.196682929992676, - "666": 2.1928462982177734, - "667": 2.1666507720947266, - "668": 2.173252820968628, - "669": 2.200779914855957, - "670": 2.180562973022461, - "671": 2.171079635620117, - "672": 2.16996431350708, - "673": 2.2069549560546875, - "674": 2.1732025146484375, - "675": 2.162417411804199, - "676": 2.1946511268615723, - "677": 2.1678366661071777, - "678": 2.1641478538513184, - "679": 2.183716297149658, - "680": 2.1696090698242188, - "681": 2.1490530967712402, - "682": 2.175893783569336, - "683": 2.182178497314453, - "684": 2.1715240478515625, - "685": 2.178544044494629, - "686": 2.1701955795288086, - "687": 2.1578786373138428, - "688": 2.1630373001098633, - "689": 2.1605517864227295, - "690": 2.1448769569396973, - "691": 2.164898157119751, - "692": 2.1333160400390625, - "693": 2.130385398864746, - "694": 2.1412806510925293, - "695": 2.1399881839752197, - "696": 2.1563222408294678, - "697": 2.1380600929260254, - "698": 2.1288375854492188, - "699": 2.135341167449951, - "700": 2.149369716644287, - "701": 2.149366855621338 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "step_size_list": [ - 0.0948833, - 0.0949263, - 0.0950457, - 0.094011, - 0.0931264, - 0.0916691, - 0.0906481, - 0.0899886, - 0.0904173, - 0.0906829, - 0.0897316, - 0.0896684, - 0.0893432, - 0.0893424, - 0.089053, - 0.0884984, - 0.0877887, - 0.0870506, - 0.0864602, - 0.0857158, - 0.0848863, - 0.0846997, - 0.0842402, - 0.0834939, - 0.0828275, - 0.0823822, - 0.0823068, - 0.0818876, - 0.0813103, - 0.0807694, - 0.0800765, - 0.0795603, - 0.0788587, - 0.0781822, - 0.0775542, - 0.0767225, - 0.0758249, - 0.0749874, - 0.0746184, - 0.0745797, - 0.0740247, - 0.0734579, - 0.0729037, - 0.0724278, - 0.071753, - 0.0713693, - 0.0708492, - 0.070131, - 0.0695906, - 0.0690546, - 0.0685181, - 0.0678313, - 0.0671616, - 0.0664849 - ], - "train_epoch_time": 4.840845823287964, - "train_loss": 2.1405552824395286, - "train_score": 0.3687858680484969, - "val_loss": 2.2164132720977925, - "val_score": 0.3456560345591141 - }, - { - "epoch": 13, - "grad_norm": 0.7531309723854065, - "learning_rate": 0.06666666666666668, - "model_norm": 87.62348175048828, - "step_logs": { - "grad_norm": { - "702": 0.8920872211456299, - "703": 0.9194406867027283, - "704": 0.988314688205719, - "705": 1.1157653331756592, - "706": 1.0968096256256104, - "707": 1.1118769645690918, - "708": 1.1954256296157837, - "709": 1.3032866716384888, - "710": 1.1688765287399292, - "711": 0.9599566459655762, - "712": 1.010603666305542, - "713": 1.0050030946731567, - "714": 0.9598475098609924, - "715": 1.0469393730163574, - "716": 1.0152608156204224, - "717": 0.9659439921379089, - "718": 0.8634149432182312, - "719": 0.8878005743026733, - "720": 0.896177351474762, - "721": 0.8501298427581787, - "722": 0.8294501304626465, - "723": 0.7876549363136292, - "724": 0.7377925515174866, - "725": 0.7456555366516113, - "726": 0.7465358972549438, - "727": 0.7019779086112976, - "728": 0.7784258127212524, - "729": 0.8708042502403259, - "730": 0.8745377659797668, - "731": 0.8901929259300232, - "732": 0.8764516115188599, - "733": 0.8969185948371887, - "734": 0.8551320433616638, - "735": 0.7365669012069702, - "736": 0.7328318357467651, - "737": 0.8150904774665833, - "738": 0.8248913288116455, - "739": 0.6946381330490112, - "740": 0.708453357219696, - "741": 0.7869342565536499, - "742": 0.7736213207244873, - "743": 0.8139951825141907, - "744": 0.796597957611084, - "745": 0.7518201470375061, - "746": 0.7371087670326233, - "747": 0.6931148767471313, - "748": 0.7689252495765686, - "749": 0.7809926271438599, - "750": 0.7097293734550476, - "751": 0.7736760377883911, - "752": 0.7193594574928284, - "753": 0.706220269203186, - "754": 0.6613119840621948, - "755": 0.7531309723854065 - }, - "loss": { - "702": 2.1390342712402344, - "703": 2.134037494659424, - "704": 2.140979290008545, - "705": 2.1441988945007324, - "706": 2.1380772590637207, - "707": 2.1422739028930664, - "708": 2.142864942550659, - "709": 2.1670923233032227, - "710": 2.1580002307891846, - "711": 2.1448566913604736, - "712": 2.1190733909606934, - "713": 2.14148211479187, - "714": 2.1272506713867188, - "715": 2.1676583290100098, - "716": 2.123312473297119, - "717": 2.1017513275146484, - "718": 2.1215970516204834, - "719": 2.1481404304504395, - "720": 2.110671043395996, - "721": 2.116016387939453, - "722": 2.13571834564209, - "723": 2.1065866947174072, - "724": 2.1273536682128906, - "725": 2.1519713401794434, - "726": 2.113983392715454, - "727": 2.1189074516296387, - "728": 2.1163251399993896, - "729": 2.1233696937561035, - "730": 2.1128978729248047, - "731": 2.1425318717956543, - "732": 2.1122403144836426, - "733": 2.127007007598877, - "734": 2.115316390991211, - "735": 2.142245292663574, - "736": 2.103114604949951, - "737": 2.1076889038085938, - "738": 2.12778377532959, - "739": 2.1301040649414062, - "740": 2.1092724800109863, - "741": 2.151620626449585, - "742": 2.104706287384033, - "743": 2.105891227722168, - "744": 2.1168055534362793, - "745": 2.098604679107666, - "746": 2.1319808959960938, - "747": 2.116122245788574, - "748": 2.103595733642578, - "749": 2.0936508178710938, - "750": 2.124605655670166, - "751": 2.097956657409668, - "752": 2.1295769214630127, - "753": 2.121328353881836, - "754": 2.1142678260803223, - "755": 2.1180806159973145 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "step_size_list": [ - 0.06585, - 0.0651965, - 0.0644698, - 0.0636178, - 0.0630587, - 0.0624348, - 0.0616683, - 0.0608587, - 0.0605453, - 0.0603192, - 0.0596246, - 0.0590428, - 0.0585084, - 0.0577852, - 0.0572188, - 0.0566851, - 0.0562291, - 0.0555999, - 0.0549745, - 0.0544276, - 0.0538498, - 0.0532823, - 0.0527283, - 0.0521159, - 0.0514998, - 0.0509311, - 0.0502531, - 0.0495575, - 0.0489453, - 0.0483296, - 0.0477303, - 0.0471074, - 0.0465362, - 0.0460265, - 0.0454141, - 0.044744, - 0.0441308, - 0.0436107, - 0.0429889, - 0.0423331, - 0.0417263, - 0.0410907, - 0.0404935, - 0.0399078, - 0.0393083, - 0.038718, - 0.0380669, - 0.0374489, - 0.0368751, - 0.0362315, - 0.0356474, - 0.0350403, - 0.0344448, - 0.033797 - ], - "train_epoch_time": 4.841166734695435, - "train_loss": 2.105520798315106, - "train_score": 0.37542593249973644, - "val_loss": 2.187971991868572, - "val_score": 0.35329362842437184 - }, - { - "epoch": 14, - "grad_norm": 0.6560465693473816, - "learning_rate": 0.03333333333333334, - "model_norm": 87.62673950195312, - "step_logs": { - "grad_norm": { - "756": 0.6808722019195557, - "757": 0.7502784729003906, - "758": 0.7747045159339905, - "759": 0.7402014136314392, - "760": 0.7361641526222229, - "761": 0.7099589705467224, - "762": 0.6706918478012085, - "763": 0.6561278104782104, - "764": 0.7447983026504517, - "765": 0.6444520950317383, - "766": 0.6793810725212097, - "767": 0.6898874044418335, - "768": 0.7020453810691833, - "769": 0.7227084636688232, - "770": 0.7597751617431641, - "771": 0.656038224697113, - "772": 0.6833887100219727, - "773": 0.7233046889305115, - "774": 0.7019231915473938, - "775": 0.7187379598617554, - "776": 0.6745233535766602, - "777": 0.6481652855873108, - "778": 0.6588069796562195, - "779": 0.7660980820655823, - "780": 0.6767035126686096, - "781": 0.7113878130912781, - "782": 0.6418550610542297, - "783": 0.6614627242088318, - "784": 0.6701388359069824, - "785": 0.7062239050865173, - "786": 0.7151431441307068, - "787": 0.6770057678222656, - "788": 0.6858407258987427, - "789": 0.6519733667373657, - "790": 0.6951921582221985, - "791": 0.6831929087638855, - "792": 0.5835621356964111, - "793": 0.6608464121818542, - "794": 0.6295688152313232, - "795": 0.602670431137085, - "796": 0.6260209679603577, - "797": 0.6543610692024231, - "798": 0.6884217262268066, - "799": 0.6450269818305969, - "800": 0.6900261640548706, - "801": 0.6984003186225891, - "802": 0.6354591846466064, - "803": 0.6883190274238586, - "804": 0.6483860611915588, - "805": 0.6386538147926331, - "806": 0.640887975692749, - "807": 0.679754912853241, - "808": 0.6633153557777405, - "809": 0.6560465693473816 - }, - "loss": { - "756": 2.1119837760925293, - "757": 2.1168980598449707, - "758": 2.1193339824676514, - "759": 2.119785785675049, - "760": 2.1012823581695557, - "761": 2.110630989074707, - "762": 2.10760235786438, - "763": 2.113950729370117, - "764": 2.067664623260498, - "765": 2.088411808013916, - "766": 2.1109251976013184, - "767": 2.107616424560547, - "768": 2.121735095977783, - "769": 2.0850253105163574, - "770": 2.132988929748535, - "771": 2.0806896686553955, - "772": 2.1015868186950684, - "773": 2.0957279205322266, - "774": 2.0994162559509277, - "775": 2.083505153656006, - "776": 2.1091468334198, - "777": 2.096006393432617, - "778": 2.097630739212036, - "779": 2.112154483795166, - "780": 2.0990288257598877, - "781": 2.064929962158203, - "782": 2.115248203277588, - "783": 2.0665059089660645, - "784": 2.1107029914855957, - "785": 2.1103851795196533, - "786": 2.0840463638305664, - "787": 2.112826347351074, - "788": 2.12038254737854, - "789": 2.060861825942993, - "790": 2.079716205596924, - "791": 2.112527370452881, - "792": 2.109745502471924, - "793": 2.083125352859497, - "794": 2.101740837097168, - "795": 2.0904805660247803, - "796": 2.1020026206970215, - "797": 2.0670418739318848, - "798": 2.090491533279419, - "799": 2.1115198135375977, - "800": 2.0831446647644043, - "801": 2.072781562805176, - "802": 2.107959747314453, - "803": 2.097409725189209, - "804": 2.1161949634552, - "805": 2.1058473587036133, - "806": 2.1048529148101807, - "807": 2.0991337299346924, - "808": 2.091625213623047, - "809": 2.08298397064209 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "step_size_list": [ - 0.0332118, - 0.0325744, - 0.0319535, - 0.0313539, - 0.0307418, - 0.0301381, - 0.0295362, - 0.0289269, - 0.0282873, - 0.0277013, - 0.0270801, - 0.0264639, - 0.0258481, - 0.0252287, - 0.0246091, - 0.0240143, - 0.0233958, - 0.0227746, - 0.0221644, - 0.0215472, - 0.0209403, - 0.0203289, - 0.0197128, - 0.0190851, - 0.0184812, - 0.0178621, - 0.0172549, - 0.0166373, - 0.016022, - 0.015404, - 0.0147879, - 0.0141757, - 0.0135598, - 0.0129457, - 0.012328, - 0.0117132, - 0.0111012, - 0.0104823, - 0.00986736, - 0.00925182, - 0.00863502, - 0.00801803, - 0.00740119, - 0.00678558, - 0.00616849, - 0.00555193, - 0.00493594, - 0.00431888, - 0.00370234, - 0.0030855, - 0.00246854, - 0.00185147, - 0.00123441, - 0.000617245 - ], - "train_epoch_time": 4.840723752975464, - "train_loss": 2.093103907200664, - "train_score": 0.3785397239275266, - "val_loss": 2.1785143122470476, - "val_score": 0.35478706283218964 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:25:22.001760", - "final_model_norm": 87.62673950195312, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:23:40.473868", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 2.8237144947052, - "learning_rate": 2.15e-11, - "model_norm": 87.43746185302734, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.703997611999512, - "3": 7.326879501342773, - "4": 21.96558952331543, - "5": 7.369707107543945, - "6": 5.422325611114502, - "7": 4.3268046379089355, - "8": 3.9849159717559814, - "9": 8.04224967956543, - "10": 6.365480899810791, - "11": 4.813935279846191, - "12": 34.062808990478516, - "13": 4.699474811553955, - "14": 6.995782375335693, - "15": 8.23671817779541, - "16": 3.417386293411255, - "17": 9.862605094909668, - "18": 4.489058017730713, - "19": 13.414119720458984, - "20": 5.0882062911987305, - "21": 37.26375961303711, - "22": 7.999494552612305, - "23": 9.745227813720703, - "24": 4.009833812713623, - "25": 36.2741584777832, - "26": 3.2594213485717773, - "27": 4.331626892089844, - "28": 2.405197858810425, - "29": 3.363208055496216, - "30": 4.6236419677734375, - "31": 3.833583354949951, - "32": 4.102850437164307, - "33": 5.206770896911621, - "34": 27.12824058532715, - "35": 3.4224886894226074, - "36": 6.442839622497559, - "37": 5.323864936828613, - "38": 8.582418441772461, - "39": 4.82737922668457, - "40": 4.733067512512207, - "41": 27.60544776916504, - "42": 3.88569712638855, - "43": 5.079615116119385, - "44": 7.171720027923584, - "45": 6.145301818847656, - "46": 2.9072279930114746, - "47": 6.455597877502441, - "48": 3.6128594875335693, - "49": 7.043355464935303, - "50": 2.3525805473327637, - "51": 2.960170269012451, - "52": 6.720886707305908, - "53": 2.8237144947052 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.860989570617676, - "3": 3.6825003623962402, - "4": 4.148896217346191, - "5": 4.060670375823975, - "6": 3.558018684387207, - "7": 3.6036229133605957, - "8": 3.4577369689941406, - "9": 3.5245635509490967, - "10": 3.764491558074951, - "11": 3.3649075031280518, - "12": 3.5065202713012695, - "13": 3.3472676277160645, - "14": 3.5612375736236572, - "15": 3.181992769241333, - "16": 3.2212250232696533, - "17": 3.297783851623535, - "18": 3.2949283123016357, - "19": 3.5093657970428467, - "20": 3.236755609512329, - "21": 4.153029441833496, - "22": 3.4655709266662598, - "23": 3.8188352584838867, - "24": 3.2795932292938232, - "25": 4.070119857788086, - "26": 3.0209813117980957, - "27": 3.2271833419799805, - "28": 2.8818459510803223, - "29": 2.9359586238861084, - "30": 3.0909037590026855, - "31": 3.120154619216919, - "32": 2.940892219543457, - "33": 3.044102430343628, - "34": 4.243189811706543, - "35": 3.1709108352661133, - "36": 3.156486988067627, - "37": 3.0236644744873047, - "38": 2.9539918899536133, - "39": 3.2043702602386475, - "40": 3.2703495025634766, - "41": 4.97122859954834, - "42": 3.2410924434661865, - "43": 3.352753162384033, - "44": 3.334832191467285, - "45": 3.126955032348633, - "46": 2.9199960231781006, - "47": 3.1369423866271973, - "48": 3.067199945449829, - "49": 3.5366017818450928, - "50": 2.9732704162597656, - "51": 2.9412715435028076, - "52": 3.3356733322143555, - "53": 3.2908201217651367 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 2.15e-11, - 0.00343811, - 0.00819006, - 0.0117913, - 0.00859949, - 0.0187973, - 0.0233147, - 0.0279172, - 0.0318817, - 0.0285591, - 0.0349191, - 0.040675, - 0.00541052, - 0.0471964, - 0.0425847, - 0.0382198, - 0.0611709, - 0.0351768, - 0.0625865, - 0.0264014, - 0.0639908, - 0.00561004, - 0.0504965, - 0.0443546, - 0.0823638, - 0.00584982, - 0.0934328, - 0.0868034, - 0.107419, - 0.100547, - 0.0892048, - 0.101451, - 0.0987226, - 0.0869553, - 0.0106883, - 0.117764, - 0.0767146, - 0.0911385, - 0.0537997, - 0.104175, - 0.108237, - 0.0121478, - 0.127124, - 0.108035, - 0.076941, - 0.0892336, - 0.153778, - 0.0862767, - 0.143415, - 0.085036, - 0.179151, - 0.162846, - 0.0875507, - 0.170572 - ], - "train_epoch_time": 4.843285083770752, - "train_loss": 2.936614261774969, - "train_score": 0.18691714488536418, - "val_loss": 2.96571738985458, - "val_score": 0.18380005022962112 - }, - { - "epoch": 1, - "grad_norm": 1.3842122554779053, - "learning_rate": 0.215, - "model_norm": 87.45478820800781, - "step_logs": { - "grad_norm": { - "54": 2.8213584423065186, - "55": 2.51896071434021, - "56": 2.485569477081299, - "57": 3.002544403076172, - "58": 2.7876532077789307, - "59": 2.2174365520477295, - "60": 1.964108943939209, - "61": 3.3254904747009277, - "62": 2.0521535873413086, - "63": 1.5978906154632568, - "64": 1.8865280151367188, - "65": 1.7981300354003906, - "66": 1.7488449811935425, - "67": 2.379397392272949, - "68": 2.1411471366882324, - "69": 1.9745234251022339, - "70": 2.0030853748321533, - "71": 3.452584981918335, - "72": 1.9897698163986206, - "73": 1.8727123737335205, - "74": 1.5652499198913574, - "75": 2.163674831390381, - "76": 1.918753981590271, - "77": 2.1521198749542236, - "78": 1.8960561752319336, - "79": 2.127223253250122, - "80": 1.8088271617889404, - "81": 1.5839332342147827, - "82": 1.7778187990188599, - "83": 2.045609712600708, - "84": 1.6922314167022705, - "85": 1.2461128234863281, - "86": 1.4176477193832397, - "87": 2.2051236629486084, - "88": 1.6134084463119507, - "89": 1.1456162929534912, - "90": 1.2071748971939087, - "91": 1.4080860614776611, - "92": 1.487453579902649, - "93": 1.8650773763656616, - "94": 1.7297559976577759, - "95": 1.4833413362503052, - "96": 1.4754977226257324, - "97": 1.8308939933776855, - "98": 1.6010785102844238, - "99": 1.493167757987976, - "100": 1.4946473836898804, - "101": 1.7533742189407349, - "102": 1.4729269742965698, - "103": 1.3775416612625122, - "104": 1.589133858680725, - "105": 1.4126205444335938, - "106": 1.304403305053711, - "107": 1.3842122554779053 - }, - "loss": { - "54": 2.9311022758483887, - "55": 2.976961851119995, - "56": 2.9425265789031982, - "57": 2.9114437103271484, - "58": 3.0787158012390137, - "59": 2.944338083267212, - "60": 2.780221939086914, - "61": 2.8786559104919434, - "62": 3.070720911026001, - "63": 2.746140718460083, - "64": 2.731119155883789, - "65": 2.7641401290893555, - "66": 2.752035617828369, - "67": 2.7536611557006836, - "68": 2.956040382385254, - "69": 2.7385635375976562, - "70": 2.791156053543091, - "71": 2.824984073638916, - "72": 2.9984707832336426, - "73": 2.7185893058776855, - "74": 2.694605588912964, - "75": 2.708252429962158, - "76": 2.836428642272949, - "77": 2.730621099472046, - "78": 2.8064048290252686, - "79": 2.716933250427246, - "80": 2.816277265548706, - "81": 2.6442980766296387, - "82": 2.71547794342041, - "83": 2.7009973526000977, - "84": 2.7831273078918457, - "85": 2.622030258178711, - "86": 2.6283698081970215, - "87": 2.7053375244140625, - "88": 2.77567195892334, - "89": 2.625410556793213, - "90": 2.6169495582580566, - "91": 2.613706350326538, - "92": 2.6670570373535156, - "93": 2.637639284133911, - "94": 2.7454357147216797, - "95": 2.6323599815368652, - "96": 2.655250072479248, - "97": 2.6189985275268555, - "98": 2.702467918395996, - "99": 2.60925555229187, - "100": 2.6488986015319824, - "101": 2.6088175773620605, - "102": 2.7031571865081787, - "103": 2.5998706817626953, - "104": 2.6384899616241455, - "105": 2.614164352416992, - "106": 2.5784783363342285, - "107": 2.626443386077881 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.166416, - 0.174921, - 0.175409, - 0.161306, - 0.169113, - 0.182277, - 0.187093, - 0.152161, - 0.187375, - 0.195464, - 0.188582, - 0.190985, - 0.192055, - 0.176082, - 0.184277, - 0.186463, - 0.186222, - 0.147908, - 0.188276, - 0.188815, - 0.195857, - 0.181308, - 0.188674, - 0.181843, - 0.188976, - 0.182351, - 0.19113, - 0.195101, - 0.19109, - 0.184305, - 0.193587, - 0.202132, - 0.19867, - 0.180185, - 0.19531, - 0.204035, - 0.202857, - 0.198789, - 0.197396, - 0.188304, - 0.192453, - 0.197274, - 0.197585, - 0.188995, - 0.195105, - 0.196912, - 0.197128, - 0.190826, - 0.197924, - 0.199358, - 0.194942, - 0.198695, - 0.200759, - 0.199365 - ], - "train_epoch_time": 4.8404860496521, - "train_loss": 2.6236208142645903, - "train_score": 0.2179799587646208, - "val_loss": 2.649655413271776, - "val_score": 0.21359249436212052 - }, - { - "epoch": 2, - "grad_norm": 1.34837806224823, - "learning_rate": 0.215, - "model_norm": 87.4744644165039, - "step_logs": { - "grad_norm": { - "108": 1.5592724084854126, - "109": 2.103461742401123, - "110": 1.5801812410354614, - "111": 1.093946099281311, - "112": 1.136074423789978, - "113": 1.3073256015777588, - "114": 1.2383995056152344, - "115": 1.588004469871521, - "116": 1.532800316810608, - "117": 1.181828260421753, - "118": 1.3396342992782593, - "119": 1.5502949953079224, - "120": 1.3899825811386108, - "121": 1.2337206602096558, - "122": 1.3734662532806396, - "123": 1.4737595319747925, - "124": 1.4598513841629028, - "125": 1.4499573707580566, - "126": 1.4866669178009033, - "127": 1.3141696453094482, - "128": 1.296535849571228, - "129": 1.2824946641921997, - "130": 1.5136902332305908, - "131": 1.574975848197937, - "132": 1.4911938905715942, - "133": 1.5007022619247437, - "134": 1.4506276845932007, - "135": 1.5785022974014282, - "136": 1.3659920692443848, - "137": 1.0307453870773315, - "138": 1.0795910358428955, - "139": 1.2393927574157715, - "140": 1.3061350584030151, - "141": 1.3156977891921997, - "142": 1.4069629907608032, - "143": 1.5293782949447632, - "144": 1.4213391542434692, - "145": 1.1267948150634766, - "146": 1.1314029693603516, - "147": 1.36870276927948, - "148": 1.3218140602111816, - "149": 1.237614393234253, - "150": 1.2959716320037842, - "151": 1.545208215713501, - "152": 1.490453839302063, - "153": 1.357922911643982, - "154": 1.3239798545837402, - "155": 1.262686848640442, - "156": 1.19283127784729, - "157": 1.1629854440689087, - "158": 1.2180469036102295, - "159": 1.1661994457244873, - "160": 1.271503210067749, - "161": 1.34837806224823 - }, - "loss": { - "108": 2.6187286376953125, - "109": 2.640376329421997, - "110": 2.7371749877929688, - "111": 2.5788116455078125, - "112": 2.5667243003845215, - "113": 2.5690135955810547, - "114": 2.593691349029541, - "115": 2.5491881370544434, - "116": 2.6932168006896973, - "117": 2.562211513519287, - "118": 2.5980257987976074, - "119": 2.603912115097046, - "120": 2.6333229541778564, - "121": 2.5534958839416504, - "122": 2.6163206100463867, - "123": 2.5902304649353027, - "124": 2.63192081451416, - "125": 2.581106185913086, - "126": 2.5988032817840576, - "127": 2.581291675567627, - "128": 2.590378522872925, - "129": 2.5595955848693848, - "130": 2.581890821456909, - "131": 2.6275458335876465, - "132": 2.6293866634368896, - "133": 2.591433048248291, - "134": 2.6372129917144775, - "135": 2.601038932800293, - "136": 2.647150993347168, - "137": 2.539316177368164, - "138": 2.5527286529541016, - "139": 2.5431435108184814, - "140": 2.5849978923797607, - "141": 2.550452947616577, - "142": 2.581634998321533, - "143": 2.590792655944824, - "144": 2.6380398273468018, - "145": 2.548248291015625, - "146": 2.5381650924682617, - "147": 2.5606985092163086, - "148": 2.59602689743042, - "149": 2.5511741638183594, - "150": 2.580410957336426, - "151": 2.5843911170959473, - "152": 2.638113498687744, - "153": 2.5573649406433105, - "154": 2.595184326171875, - "155": 2.539991855621338, - "156": 2.563715934753418, - "157": 2.5298314094543457, - "158": 2.5551815032958984, - "159": 2.5379412174224854, - "160": 2.5451836585998535, - "161": 2.5429296493530273 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 0.195489, - 0.182182, - 0.195799, - 0.204784, - 0.203974, - 0.20065, - 0.20215, - 0.194334, - 0.196566, - 0.203098, - 0.200138, - 0.195593, - 0.199282, - 0.202053, - 0.199534, - 0.197222, - 0.197784, - 0.19769, - 0.19699, - 0.200574, - 0.200979, - 0.201108, - 0.196276, - 0.195191, - 0.197083, - 0.19663, - 0.198015, - 0.194927, - 0.199856, - 0.205746, - 0.204941, - 0.201891, - 0.200757, - 0.20038, - 0.198627, - 0.19598, - 0.198647, - 0.20407, - 0.203943, - 0.199324, - 0.200494, - 0.201965, - 0.20094, - 0.195576, - 0.197153, - 0.199534, - 0.200445, - 0.201409, - 0.202895, - 0.203315, - 0.202368, - 0.203289, - 0.201257, - 0.199655 - ], - "train_epoch_time": 4.839672327041626, - "train_loss": 2.562408241411535, - "train_score": 0.24528896164688865, - "val_loss": 2.6070068703726976, - "val_score": 0.24454201331916003 - }, - { - "epoch": 3, - "grad_norm": 1.2930474281311035, - "learning_rate": 0.215, - "model_norm": 87.4908447265625, - "step_logs": { - "grad_norm": { - "162": 1.361615538597107, - "163": 1.392086148262024, - "164": 1.619390606880188, - "165": 1.4804311990737915, - "166": 1.2816448211669922, - "167": 1.4333467483520508, - "168": 1.5159316062927246, - "169": 1.311583399772644, - "170": 1.1395535469055176, - "171": 1.2622100114822388, - "172": 1.2925114631652832, - "173": 1.2221670150756836, - "174": 1.094551920890808, - "175": 1.0472376346588135, - "176": 1.0356019735336304, - "177": 1.0175933837890625, - "178": 1.0302155017852783, - "179": 1.2447725534439087, - "180": 1.3009377717971802, - "181": 1.3946105241775513, - "182": 1.45509672164917, - "183": 1.340830683708191, - "184": 1.3039110898971558, - "185": 1.366570234298706, - "186": 1.3199069499969482, - "187": 1.1985795497894287, - "188": 1.3332396745681763, - "189": 1.301342487335205, - "190": 1.2098512649536133, - "191": 1.1354190111160278, - "192": 1.1484336853027344, - "193": 1.3417631387710571, - "194": 1.3261157274246216, - "195": 1.3717952966690063, - "196": 1.304904818534851, - "197": 1.1362359523773193, - "198": 1.2044957876205444, - "199": 1.3503167629241943, - "200": 1.2129201889038086, - "201": 1.0190742015838623, - "202": 1.0551466941833496, - "203": 1.1930627822875977, - "204": 1.1593202352523804, - "205": 1.0762124061584473, - "206": 1.098315715789795, - "207": 1.3510856628417969, - "208": 1.2960174083709717, - "209": 1.1468430757522583, - "210": 1.2015275955200195, - "211": 1.298621416091919, - "212": 1.2985262870788574, - "213": 1.2118626832962036, - "214": 1.205458641052246, - "215": 1.2930474281311035 - }, - "loss": { - "162": 2.5700790882110596, - "163": 2.5708465576171875, - "164": 2.5671658515930176, - "165": 2.6177597045898438, - "166": 2.5604801177978516, - "167": 2.57741641998291, - "168": 2.568589687347412, - "169": 2.5828664302825928, - "170": 2.5363388061523438, - "171": 2.5402991771698, - "172": 2.5580549240112305, - "173": 2.5598230361938477, - "174": 2.514066219329834, - "175": 2.500812292098999, - "176": 2.520977258682251, - "177": 2.502847909927368, - "178": 2.489912271499634, - "179": 2.5389232635498047, - "180": 2.561051845550537, - "181": 2.5480899810791016, - "182": 2.5768723487854004, - "183": 2.558711528778076, - "184": 2.581841230392456, - "185": 2.5424580574035645, - "186": 2.585146903991699, - "187": 2.529067039489746, - "188": 2.5568032264709473, - "189": 2.563478946685791, - "190": 2.553945302963257, - "191": 2.5235276222229004, - "192": 2.5243968963623047, - "193": 2.530046224594116, - "194": 2.5652194023132324, - "195": 2.529226541519165, - "196": 2.5821762084960938, - "197": 2.530539035797119, - "198": 2.5176146030426025, - "199": 2.532806396484375, - "200": 2.5715689659118652, - "201": 2.503427267074585, - "202": 2.520176649093628, - "203": 2.515115261077881, - "204": 2.5395665168762207, - "205": 2.5176780223846436, - "206": 2.5222558975219727, - "207": 2.5116360187530518, - "208": 2.559041976928711, - "209": 2.504093647003174, - "210": 2.521381378173828, - "211": 2.5288586616516113, - "212": 2.530637264251709, - "213": 2.531001567840576, - "214": 2.5445199012756348, - "215": 2.527702808380127 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 0.199527, - 0.198884, - 0.193726, - 0.197247, - 0.201129, - 0.198031, - 0.196136, - 0.200635, - 0.203784, - 0.20142, - 0.200896, - 0.20231, - 0.204523, - 0.205321, - 0.205598, - 0.205845, - 0.20558, - 0.201763, - 0.200739, - 0.198696, - 0.197551, - 0.199901, - 0.200786, - 0.199266, - 0.200476, - 0.202627, - 0.200049, - 0.200744, - 0.202522, - 0.203807, - 0.203567, - 0.199722, - 0.200243, - 0.199077, - 0.200768, - 0.203822, - 0.202458, - 0.199557, - 0.202544, - 0.205821, - 0.205253, - 0.20267, - 0.203427, - 0.204868, - 0.204487, - 0.199419, - 0.20083, - 0.203509, - 0.202534, - 0.200618, - 0.200629, - 0.202376, - 0.202564, - 0.200727 - ], - "train_epoch_time": 4.840532064437866, - "train_loss": 2.545092095466734, - "train_score": 0.2581678174404024, - "val_loss": 2.6027835313269247, - "val_score": 0.25135440628304134 - }, - { - "epoch": 4, - "grad_norm": 1.3145029544830322, - "learning_rate": 0.215, - "model_norm": 87.51119232177734, - "step_logs": { - "grad_norm": { - "216": 1.2182166576385498, - "217": 1.0294996500015259, - "218": 1.1008670330047607, - "219": 1.19731605052948, - "220": 1.2013248205184937, - "221": 1.154158115386963, - "222": 1.2316288948059082, - "223": 1.135221004486084, - "224": 1.057712435722351, - "225": 1.0639936923980713, - "226": 1.0720773935317993, - "227": 1.3009594678878784, - "228": 1.5026884078979492, - "229": 1.3627347946166992, - "230": 1.1201125383377075, - "231": 1.1280312538146973, - "232": 1.2300243377685547, - "233": 1.3506304025650024, - "234": 1.4924287796020508, - "235": 1.6115299463272095, - "236": 1.462262511253357, - "237": 1.0871405601501465, - "238": 1.1100214719772339, - "239": 1.2975413799285889, - "240": 1.3630799055099487, - "241": 1.2814949750900269, - "242": 1.246268391609192, - "243": 1.2167056798934937, - "244": 1.1162651777267456, - "245": 1.1490232944488525, - "246": 1.1480011940002441, - "247": 1.1182979345321655, - "248": 1.065187692642212, - "249": 1.080504059791565, - "250": 1.117148995399475, - "251": 1.1543786525726318, - "252": 1.3190550804138184, - "253": 1.281561017036438, - "254": 1.2185546159744263, - "255": 1.1256446838378906, - "256": 1.0067986249923706, - "257": 1.0031418800354004, - "258": 1.0006635189056396, - "259": 1.0217490196228027, - "260": 1.0725486278533936, - "261": 1.2708911895751953, - "262": 1.2438842058181763, - "263": 1.1742539405822754, - "264": 1.2642422914505005, - "265": 1.2814592123031616, - "266": 1.2299879789352417, - "267": 1.1052236557006836, - "268": 1.0351731777191162, - "269": 1.3145029544830322 - }, - "loss": { - "216": 2.563236713409424, - "217": 2.5135276317596436, - "218": 2.5074362754821777, - "219": 2.4941115379333496, - "220": 2.531029224395752, - "221": 2.5122103691101074, - "222": 2.513491153717041, - "223": 2.5352628231048584, - "224": 2.4982762336730957, - "225": 2.4997172355651855, - "226": 2.5034897327423096, - "227": 2.5220346450805664, - "228": 2.5605366230010986, - "229": 2.5451760292053223, - "230": 2.509514093399048, - "231": 2.5111324787139893, - "232": 2.5351409912109375, - "233": 2.506951332092285, - "234": 2.586801528930664, - "235": 2.5398645401000977, - "236": 2.5718321800231934, - "237": 2.520191192626953, - "238": 2.502455234527588, - "239": 2.500775098800659, - "240": 2.5360636711120605, - "241": 2.5270955562591553, - "242": 2.528837203979492, - "243": 2.504053831100464, - "244": 2.5205416679382324, - "245": 2.496408462524414, - "246": 2.5048253536224365, - "247": 2.4868712425231934, - "248": 2.5096635818481445, - "249": 2.478222370147705, - "250": 2.517609119415283, - "251": 2.4875168800354004, - "252": 2.5157854557037354, - "253": 2.521237373352051, - "254": 2.51275634765625, - "255": 2.5100231170654297, - "256": 2.4955272674560547, - "257": 2.489665985107422, - "258": 2.464855194091797, - "259": 2.4700636863708496, - "260": 2.4838998317718506, - "261": 2.502572774887085, - "262": 2.5432636737823486, - "263": 2.4720115661621094, - "264": 2.5000240802764893, - "265": 2.4900641441345215, - "266": 2.4900217056274414, - "267": 2.456181526184082, - "268": 2.463703155517578, - "269": 2.468128204345703 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 0.202403, - 0.205677, - 0.204381, - 0.202488, - 0.202583, - 0.203406, - 0.201901, - 0.20386, - 0.205125, - 0.205019, - 0.204888, - 0.200533, - 0.196383, - 0.199363, - 0.204034, - 0.203893, - 0.202038, - 0.199402, - 0.196785, - 0.193708, - 0.197361, - 0.204681, - 0.204192, - 0.20049, - 0.199303, - 0.200961, - 0.201684, - 0.202153, - 0.204151, - 0.203434, - 0.20349, - 0.203973, - 0.205035, - 0.204637, - 0.204122, - 0.203293, - 0.200122, - 0.200929, - 0.202158, - 0.203933, - 0.206005, - 0.206047, - 0.206004, - 0.205656, - 0.204804, - 0.201051, - 0.201802, - 0.202837, - 0.201174, - 0.200767, - 0.201818, - 0.204089, - 0.205396, - 0.199952 - ], - "train_epoch_time": 4.839815616607666, - "train_loss": 2.5160131506461494, - "train_score": 0.2526228478683461, - "val_loss": 2.559002738595693, - "val_score": 0.24101248576843642 - }, - { - "epoch": 5, - "grad_norm": 1.2344266176223755, - "learning_rate": 0.215, - "model_norm": 87.53753662109375, - "step_logs": { - "grad_norm": { - "270": 1.2850974798202515, - "271": 1.2239179611206055, - "272": 1.24366295337677, - "273": 1.286130666732788, - "274": 1.2779815196990967, - "275": 1.2388229370117188, - "276": 1.2091007232666016, - "277": 1.1750178337097168, - "278": 1.080449104309082, - "279": 1.0785633325576782, - "280": 1.1624637842178345, - "281": 1.357628345489502, - "282": 1.378400444984436, - "283": 1.4578369855880737, - "284": 1.3952049016952515, - "285": 1.2184643745422363, - "286": 1.1723947525024414, - "287": 1.4340606927871704, - "288": 1.367334246635437, - "289": 1.3118513822555542, - "290": 1.3222535848617554, - "291": 1.277795672416687, - "292": 1.1154917478561401, - "293": 1.4842160940170288, - "294": 1.3879642486572266, - "295": 1.2027218341827393, - "296": 1.1791794300079346, - "297": 1.2830088138580322, - "298": 1.4632295370101929, - "299": 1.297553539276123, - "300": 1.2972139120101929, - "301": 1.2117164134979248, - "302": 1.1019034385681152, - "303": 1.1867973804473877, - "304": 1.2630165815353394, - "305": 1.3319343328475952, - "306": 1.463961124420166, - "307": 1.420211672782898, - "308": 1.1998400688171387, - "309": 1.186724305152893, - "310": 1.2939151525497437, - "311": 1.3201467990875244, - "312": 1.2927229404449463, - "313": 1.0932945013046265, - "314": 0.9508772492408752, - "315": 1.0865265130996704, - "316": 1.1584097146987915, - "317": 1.266713261604309, - "318": 1.3500679731369019, - "319": 1.4456214904785156, - "320": 1.3497941493988037, - "321": 1.1739401817321777, - "322": 1.196366548538208, - "323": 1.2344266176223755 - }, - "loss": { - "270": 2.4963231086730957, - "271": 2.457305431365967, - "272": 2.504878044128418, - "273": 2.470299243927002, - "274": 2.4994258880615234, - "275": 2.4559359550476074, - "276": 2.4859466552734375, - "277": 2.4565062522888184, - "278": 2.4518582820892334, - "279": 2.449338436126709, - "280": 2.458657741546631, - "281": 2.4458084106445312, - "282": 2.5018844604492188, - "283": 2.4499363899230957, - "284": 2.503763198852539, - "285": 2.4574086666107178, - "286": 2.454789161682129, - "287": 2.4405882358551025, - "288": 2.5001232624053955, - "289": 2.434464931488037, - "290": 2.4658236503601074, - "291": 2.454690456390381, - "292": 2.4201343059539795, - "293": 2.466909885406494, - "294": 2.4778316020965576, - "295": 2.4609146118164062, - "296": 2.412415027618408, - "297": 2.442077398300171, - "298": 2.4399757385253906, - "299": 2.464625120162964, - "300": 2.43546462059021, - "301": 2.495152473449707, - "302": 2.419623374938965, - "303": 2.4138479232788086, - "304": 2.4415388107299805, - "305": 2.4420111179351807, - "306": 2.4415626525878906, - "307": 2.4777493476867676, - "308": 2.436062812805176, - "309": 2.4265921115875244, - "310": 2.425304651260376, - "311": 2.4613921642303467, - "312": 2.412060260772705, - "313": 2.4266796112060547, - "314": 2.389078140258789, - "315": 2.381617546081543, - "316": 2.4306089878082275, - "317": 2.392611503601074, - "318": 2.4400267601013184, - "319": 2.4236679077148438, - "320": 2.433302164077759, - "321": 2.413728713989258, - "322": 2.413306713104248, - "323": 2.3973727226257324 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 0.200725, - 0.201777, - 0.201617, - 0.200563, - 0.200889, - 0.201466, - 0.202216, - 0.20275, - 0.204532, - 0.204556, - 0.203006, - 0.198888, - 0.198773, - 0.19666, - 0.198417, - 0.201888, - 0.202793, - 0.197142, - 0.199002, - 0.199815, - 0.199773, - 0.200652, - 0.203739, - 0.196169, - 0.198417, - 0.202222, - 0.202456, - 0.200473, - 0.196467, - 0.200291, - 0.200135, - 0.202209, - 0.203996, - 0.20231, - 0.20089, - 0.199426, - 0.196461, - 0.197699, - 0.202157, - 0.202374, - 0.200147, - 0.199793, - 0.200097, - 0.204188, - 0.206595, - 0.204123, - 0.202955, - 0.200542, - 0.199018, - 0.196762, - 0.198984, - 0.202567, - 0.202114, - 0.201249 - ], - "train_epoch_time": 4.840233087539673, - "train_loss": 2.4109062772228183, - "train_score": 0.28348166238799843, - "val_loss": 2.4727224501621845, - "val_score": 0.27163910068545905 - }, - { - "epoch": 6, - "grad_norm": 1.0698927640914917, - "learning_rate": 0.215, - "model_norm": 87.56552124023438, - "step_logs": { - "grad_norm": { - "324": 1.309128999710083, - "325": 1.2631114721298218, - "326": 1.2900135517120361, - "327": 1.2306216955184937, - "328": 1.1494269371032715, - "329": 1.2550078630447388, - "330": 1.5193229913711548, - "331": 1.3595046997070312, - "332": 1.072856068611145, - "333": 1.0630807876586914, - "334": 1.389190435409546, - "335": 1.4626983404159546, - "336": 1.2771999835968018, - "337": 1.2225881814956665, - "338": 1.3771287202835083, - "339": 1.3456461429595947, - "340": 1.0806690454483032, - "341": 1.149245262145996, - "342": 1.0304591655731201, - "343": 1.0370887517929077, - "344": 1.1288073062896729, - "345": 1.2489933967590332, - "346": 1.1935603618621826, - "347": 1.1135035753250122, - "348": 1.317034125328064, - "349": 1.181177020072937, - "350": 1.167004108428955, - "351": 1.1846635341644287, - "352": 1.1871981620788574, - "353": 1.3269562721252441, - "354": 1.3967430591583252, - "355": 1.3563923835754395, - "356": 1.1799196004867554, - "357": 1.1373260021209717, - "358": 1.121716856956482, - "359": 1.0838626623153687, - "360": 1.1106144189834595, - "361": 1.2185466289520264, - "362": 1.4820932149887085, - "363": 1.3594046831130981, - "364": 1.3651905059814453, - "365": 1.270385980606079, - "366": 1.1773992776870728, - "367": 1.304679274559021, - "368": 1.2258681058883667, - "369": 1.167794942855835, - "370": 1.2519911527633667, - "371": 1.1871514320373535, - "372": 1.2129698991775513, - "373": 1.272553563117981, - "374": 1.3957493305206299, - "375": 1.2517162561416626, - "376": 1.0713804960250854, - "377": 1.0698927640914917 - }, - "loss": { - "324": 2.386399269104004, - "325": 2.445016860961914, - "326": 2.396030902862549, - "327": 2.4223196506500244, - "328": 2.4171853065490723, - "329": 2.3736605644226074, - "330": 2.4139795303344727, - "331": 2.4522480964660645, - "332": 2.3881003856658936, - "333": 2.3730225563049316, - "334": 2.3800950050354004, - "335": 2.431530475616455, - "336": 2.421450138092041, - "337": 2.4071640968322754, - "338": 2.3899314403533936, - "339": 2.4413974285125732, - "340": 2.3730709552764893, - "341": 2.3817226886749268, - "342": 2.366791248321533, - "343": 2.3388569355010986, - "344": 2.3523006439208984, - "345": 2.3732428550720215, - "346": 2.3952674865722656, - "347": 2.381603956222534, - "348": 2.39683198928833, - "349": 2.4112133979797363, - "350": 2.375216484069824, - "351": 2.3985180854797363, - "352": 2.3709568977355957, - "353": 2.396017551422119, - "354": 2.400608539581299, - "355": 2.3932509422302246, - "356": 2.3683276176452637, - "357": 2.367392063140869, - "358": 2.38385009765625, - "359": 2.3542637825012207, - "360": 2.32932710647583, - "361": 2.3663556575775146, - "362": 2.3727309703826904, - "363": 2.4313533306121826, - "364": 2.352915048599243, - "365": 2.4125397205352783, - "366": 2.368180274963379, - "367": 2.3804922103881836, - "368": 2.3812994956970215, - "369": 2.351163625717163, - "370": 2.355973482131958, - "371": 2.3940610885620117, - "372": 2.355034351348877, - "373": 2.3667378425598145, - "374": 2.373485565185547, - "375": 2.3836302757263184, - "376": 2.3319592475891113, - "377": 2.3355607986450195 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 0.199591, - 0.200907, - 0.200063, - 0.20146, - 0.203068, - 0.200685, - 0.194959, - 0.198886, - 0.204409, - 0.204529, - 0.197762, - 0.196421, - 0.200481, - 0.201546, - 0.198101, - 0.199124, - 0.204197, - 0.202904, - 0.205108, - 0.204872, - 0.203169, - 0.20081, - 0.20208, - 0.203605, - 0.199481, - 0.20241, - 0.202517, - 0.202277, - 0.202086, - 0.199258, - 0.197726, - 0.198589, - 0.202221, - 0.203072, - 0.203456, - 0.204054, - 0.20342, - 0.201414, - 0.19554, - 0.19876, - 0.198129, - 0.200576, - 0.202272, - 0.199653, - 0.201341, - 0.202381, - 0.200649, - 0.202204, - 0.201469, - 0.200269, - 0.197568, - 0.20081, - 0.204195, - 0.204239 - ], - "train_epoch_time": 4.840440273284912, - "train_loss": 2.3239737161775915, - "train_score": 0.3152371771195356, - "val_loss": 2.37487924112655, - "val_score": 0.30604639064293915 - }, - { - "epoch": 7, - "grad_norm": 1.1673446893692017, - "learning_rate": 0.215, - "model_norm": 87.59405517578125, - "step_logs": { - "grad_norm": { - "378": 1.0856293439865112, - "379": 1.1353424787521362, - "380": 1.2636758089065552, - "381": 1.34560227394104, - "382": 1.5504242181777954, - "383": 1.4265116453170776, - "384": 1.2201482057571411, - "385": 1.1396905183792114, - "386": 1.0633373260498047, - "387": 1.0913819074630737, - "388": 1.0919487476348877, - "389": 1.1365587711334229, - "390": 1.4007723331451416, - "391": 1.4686057567596436, - "392": 1.5081781148910522, - "393": 1.3079806566238403, - "394": 1.214347004890442, - "395": 1.1966195106506348, - "396": 1.063156247138977, - "397": 1.1483653783798218, - "398": 1.2561752796173096, - "399": 1.2642120122909546, - "400": 1.1772046089172363, - "401": 1.1589879989624023, - "402": 1.135513424873352, - "403": 1.152271032333374, - "404": 1.1834492683410645, - "405": 1.256049633026123, - "406": 1.1937779188156128, - "407": 1.1257883310317993, - "408": 1.1758086681365967, - "409": 1.1744511127471924, - "410": 1.2455041408538818, - "411": 1.415810227394104, - "412": 1.293781042098999, - "413": 1.3222863674163818, - "414": 1.2363009452819824, - "415": 1.3613141775131226, - "416": 1.3382188081741333, - "417": 1.1950047016143799, - "418": 1.121564269065857, - "419": 1.1536364555358887, - "420": 1.1189732551574707, - "421": 1.1400444507598877, - "422": 1.1774303913116455, - "423": 1.3642103672027588, - "424": 1.2602027654647827, - "425": 1.2266814708709717, - "426": 1.2392609119415283, - "427": 1.1484966278076172, - "428": 1.1638505458831787, - "429": 1.224527359008789, - "430": 1.1911342144012451, - "431": 1.1673446893692017 - }, - "loss": { - "378": 2.328446865081787, - "379": 2.3256967067718506, - "380": 2.3429102897644043, - "381": 2.358628511428833, - "382": 2.345167875289917, - "383": 2.405836582183838, - "384": 2.3169164657592773, - "385": 2.3362081050872803, - "386": 2.3163704872131348, - "387": 2.324340581893921, - "388": 2.3124208450317383, - "389": 2.331120729446411, - "390": 2.339414358139038, - "391": 2.39945650100708, - "392": 2.326516628265381, - "393": 2.3788561820983887, - "394": 2.302523136138916, - "395": 2.321241855621338, - "396": 2.3199801445007324, - "397": 2.310746669769287, - "398": 2.338313102722168, - "399": 2.3144783973693848, - "400": 2.3162448406219482, - "401": 2.3223297595977783, - "402": 2.3196043968200684, - "403": 2.3204143047332764, - "404": 2.31657075881958, - "405": 2.3323097229003906, - "406": 2.3416872024536133, - "407": 2.3012123107910156, - "408": 2.300736904144287, - "409": 2.329308032989502, - "410": 2.3181538581848145, - "411": 2.356184959411621, - "412": 2.3543615341186523, - "413": 2.3153626918792725, - "414": 2.3357255458831787, - "415": 2.320425271987915, - "416": 2.3382961750030518, - "417": 2.308790683746338, - "418": 2.291630744934082, - "419": 2.3231401443481445, - "420": 2.3026514053344727, - "421": 2.285475254058838, - "422": 2.3193094730377197, - "423": 2.309246063232422, - "424": 2.347034454345703, - "425": 2.297950029373169, - "426": 2.3215527534484863, - "427": 2.2853357791900635, - "428": 2.2994134426116943, - "429": 2.30588698387146, - "430": 2.3058009147644043, - "431": 2.2755813598632812 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 0.203905, - 0.20291, - 0.200322, - 0.19861, - 0.193661, - 0.19708, - 0.201108, - 0.202875, - 0.204281, - 0.203774, - 0.203708, - 0.202913, - 0.197218, - 0.196055, - 0.194552, - 0.199571, - 0.201151, - 0.201629, - 0.2043, - 0.202572, - 0.200458, - 0.200143, - 0.202007, - 0.202414, - 0.202877, - 0.202541, - 0.201879, - 0.200426, - 0.201798, - 0.202982, - 0.201954, - 0.202133, - 0.200571, - 0.196985, - 0.199735, - 0.198857, - 0.20087, - 0.198001, - 0.198645, - 0.201596, - 0.20302, - 0.202527, - 0.203126, - 0.202614, - 0.202019, - 0.197858, - 0.200421, - 0.200861, - 0.200726, - 0.202439, - 0.202196, - 0.200952, - 0.201661, - 0.201997 - ], - "train_epoch_time": 4.840257883071899, - "train_loss": 2.3028510326292455, - "train_score": 0.30849735465761235, - "val_loss": 2.353059915669613, - "val_score": 0.2998887769595627 - }, - { - "epoch": 8, - "grad_norm": 1.1082972288131714, - "learning_rate": 0.215, - "model_norm": 87.62403106689453, - "step_logs": { - "grad_norm": { - "432": 1.1565858125686646, - "433": 1.108580470085144, - "434": 1.0306254625320435, - "435": 0.9733507037162781, - "436": 0.987404465675354, - "437": 1.2669869661331177, - "438": 1.2660613059997559, - "439": 1.2554049491882324, - "440": 1.1647225618362427, - "441": 1.0433356761932373, - "442": 1.162516474723816, - "443": 1.2768657207489014, - "444": 1.3018276691436768, - "445": 1.2005199193954468, - "446": 1.149218201637268, - "447": 0.9853675961494446, - "448": 1.0335965156555176, - "449": 1.2494053840637207, - "450": 1.3953478336334229, - "451": 1.3595314025878906, - "452": 1.358251929283142, - "453": 1.4063332080841064, - "454": 1.296646237373352, - "455": 1.2485666275024414, - "456": 1.2314960956573486, - "457": 1.2004880905151367, - "458": 1.1638100147247314, - "459": 1.0473333597183228, - "460": 1.0785902738571167, - "461": 1.17708158493042, - "462": 1.2843329906463623, - "463": 1.3547714948654175, - "464": 1.364400029182434, - "465": 1.191654086112976, - "466": 1.0111395120620728, - "467": 1.068028211593628, - "468": 1.358184814453125, - "469": 1.7749218940734863, - "470": 1.2150791883468628, - "471": 0.9120576977729797, - "472": 0.8075768947601318, - "473": 0.8076027035713196, - "474": 0.9236871600151062, - "475": 1.017232060432434, - "476": 1.153769850730896, - "477": 1.2207270860671997, - "478": 1.1661633253097534, - "479": 1.1541210412979126, - "480": 1.129042148590088, - "481": 1.1720428466796875, - "482": 1.1354085206985474, - "483": 1.135421633720398, - "484": 1.0487339496612549, - "485": 1.1082972288131714 - }, - "loss": { - "432": 2.3171162605285645, - "433": 2.2820565700531006, - "434": 2.2835030555725098, - "435": 2.2673423290252686, - "436": 2.2827863693237305, - "437": 2.2763357162475586, - "438": 2.3395254611968994, - "439": 2.2768778800964355, - "440": 2.3267083168029785, - "441": 2.275376796722412, - "442": 2.2747557163238525, - "443": 2.285367012023926, - "444": 2.318378210067749, - "445": 2.3075637817382812, - "446": 2.2737903594970703, - "447": 2.2553725242614746, - "448": 2.244853973388672, - "449": 2.2789177894592285, - "450": 2.3110175132751465, - "451": 2.3002848625183105, - "452": 2.2897796630859375, - "453": 2.296938419342041, - "454": 2.325416326522827, - "455": 2.2813148498535156, - "456": 2.2858972549438477, - "457": 2.292266368865967, - "458": 2.2870535850524902, - "459": 2.267334461212158, - "460": 2.252924919128418, - "461": 2.2743101119995117, - "462": 2.2688026428222656, - "463": 2.2813546657562256, - "464": 2.2884106636047363, - "465": 2.306873083114624, - "466": 2.2575159072875977, - "467": 2.2513270378112793, - "468": 2.2696642875671387, - "469": 2.3163137435913086, - "470": 2.3101627826690674, - "471": 2.2393722534179688, - "472": 2.231386661529541, - "473": 2.2225685119628906, - "474": 2.213020086288452, - "475": 2.2348809242248535, - "476": 2.224827289581299, - "477": 2.266059160232544, - "478": 2.2679929733276367, - "479": 2.260349750518799, - "480": 2.2663614749908447, - "481": 2.2581393718719482, - "482": 2.235718011856079, - "483": 2.2321369647979736, - "484": 2.250542163848877, - "485": 2.262209177017212 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 0.202437, - 0.203234, - 0.204761, - 0.205758, - 0.205562, - 0.19985, - 0.200251, - 0.20011, - 0.202319, - 0.204484, - 0.202093, - 0.199686, - 0.199336, - 0.201473, - 0.202364, - 0.20549, - 0.204536, - 0.200254, - 0.197145, - 0.197905, - 0.197863, - 0.196785, - 0.199495, - 0.200287, - 0.200687, - 0.201389, - 0.202131, - 0.204371, - 0.203693, - 0.201785, - 0.199414, - 0.197886, - 0.19771, - 0.201656, - 0.205019, - 0.203894, - 0.197725, - 0.187575, - 0.201178, - 0.206744, - 0.208451, - 0.208425, - 0.206444, - 0.204806, - 0.202007, - 0.200805, - 0.20198, - 0.202191, - 0.202741, - 0.201803, - 0.202451, - 0.202432, - 0.204269, - 0.203143 - ], - "train_epoch_time": 4.8408043384552, - "train_loss": 2.2564530308311603, - "train_score": 0.33053151899293987, - "val_loss": 2.3155812620432314, - "val_score": 0.3140651910778576 - }, - { - "epoch": 9, - "grad_norm": 1.1109791994094849, - "learning_rate": 0.215, - "model_norm": 87.653564453125, - "step_logs": { - "grad_norm": { - "486": 1.274440884590149, - "487": 1.5721691846847534, - "488": 1.6626176834106445, - "489": 1.3811311721801758, - "490": 1.2371716499328613, - "491": 1.2855167388916016, - "492": 1.1732789278030396, - "493": 1.2300169467926025, - "494": 1.3019015789031982, - "495": 1.2852145433425903, - "496": 1.131698727607727, - "497": 1.0360064506530762, - "498": 1.1023600101470947, - "499": 1.217097282409668, - "500": 1.267598271369934, - "501": 1.1315768957138062, - "502": 1.0207968950271606, - "503": 0.9872194528579712, - "504": 0.8996853828430176, - "505": 0.9872053861618042, - "506": 1.1195306777954102, - "507": 1.2206776142120361, - "508": 1.2348135709762573, - "509": 1.0950978994369507, - "510": 1.0299195051193237, - "511": 0.9195706248283386, - "512": 0.9519891738891602, - "513": 1.1784167289733887, - "514": 1.181997299194336, - "515": 1.0261567831039429, - "516": 1.0205830335617065, - "517": 1.1702600717544556, - "518": 1.313822865486145, - "519": 1.4962973594665527, - "520": 1.5798176527023315, - "521": 1.3525158166885376, - "522": 1.2542475461959839, - "523": 1.3560701608657837, - "524": 1.3302637338638306, - "525": 1.183883547782898, - "526": 1.1254867315292358, - "527": 1.0960978269577026, - "528": 1.0458887815475464, - "529": 0.9787492752075195, - "530": 1.0804812908172607, - "531": 1.3128334283828735, - "532": 1.3985402584075928, - "533": 1.4587278366088867, - "534": 1.537503957748413, - "535": 1.4038366079330444, - "536": 1.2967747449874878, - "537": 1.1811518669128418, - "538": 1.1067805290222168, - "539": 1.1109791994094849 - }, - "loss": { - "486": 2.2600908279418945, - "487": 2.292318344116211, - "488": 2.319120168685913, - "489": 2.2957048416137695, - "490": 2.2406320571899414, - "491": 2.269606351852417, - "492": 2.263916015625, - "493": 2.237008571624756, - "494": 2.28615403175354, - "495": 2.264836311340332, - "496": 2.2612414360046387, - "497": 2.2186059951782227, - "498": 2.25152850151062, - "499": 2.1991467475891113, - "500": 2.2622876167297363, - "501": 2.2540721893310547, - "502": 2.2395267486572266, - "503": 2.232537031173706, - "504": 2.2400033473968506, - "505": 2.196216583251953, - "506": 2.228848934173584, - "507": 2.2368764877319336, - "508": 2.2414612770080566, - "509": 2.239764928817749, - "510": 2.2245795726776123, - "511": 2.192011833190918, - "512": 2.211658000946045, - "513": 2.2534379959106445, - "514": 2.246367931365967, - "515": 2.220266103744507, - "516": 2.1989667415618896, - "517": 2.2220592498779297, - "518": 2.246497631072998, - "519": 2.263000726699829, - "520": 2.2741599082946777, - "521": 2.2593367099761963, - "522": 2.2507152557373047, - "523": 2.260756492614746, - "524": 2.2669968605041504, - "525": 2.241084098815918, - "526": 2.2304186820983887, - "527": 2.1926121711730957, - "528": 2.201482057571411, - "529": 2.1710610389709473, - "530": 2.2309653759002686, - "531": 2.208761215209961, - "532": 2.2589619159698486, - "533": 2.2592267990112305, - "534": 2.260641098022461, - "535": 2.2182810306549072, - "536": 2.244582176208496, - "537": 2.210331439971924, - "538": 2.199983835220337, - "539": 2.19126033782959 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 0.199582, - 0.192667, - 0.19058, - 0.19737, - 0.200292, - 0.199393, - 0.201809, - 0.200428, - 0.199129, - 0.199369, - 0.202661, - 0.204371, - 0.20321, - 0.200483, - 0.199749, - 0.202626, - 0.204758, - 0.205363, - 0.206961, - 0.205211, - 0.202744, - 0.200633, - 0.200349, - 0.203298, - 0.204517, - 0.206439, - 0.205929, - 0.201642, - 0.201526, - 0.20457, - 0.204583, - 0.20164, - 0.198596, - 0.194332, - 0.192311, - 0.197785, - 0.199974, - 0.197712, - 0.198355, - 0.201456, - 0.202629, - 0.20304, - 0.204098, - 0.205264, - 0.20355, - 0.198361, - 0.196692, - 0.195233, - 0.193274, - 0.196257, - 0.198975, - 0.201339, - 0.202858, - 0.202725 - ], - "train_epoch_time": 4.840954065322876, - "train_loss": 2.2157062061889956, - "train_score": 0.3290026452654232, - "val_loss": 2.2771258649815103, - "val_score": 0.3184557977623288 - }, - { - "epoch": 10, - "grad_norm": 1.2484924793243408, - "learning_rate": 0.215, - "model_norm": 87.6837387084961, - "step_logs": { - "grad_norm": { - "540": 1.0730115175247192, - "541": 1.0840245485305786, - "542": 1.153657078742981, - "543": 1.2076923847198486, - "544": 1.3433432579040527, - "545": 1.5389057397842407, - "546": 1.326480746269226, - "547": 1.233006238937378, - "548": 1.295018196105957, - "549": 1.4531220197677612, - "550": 1.2425044775009155, - "551": 1.1023786067962646, - "552": 1.0534733533859253, - "553": 1.0195304155349731, - "554": 1.0069540739059448, - "555": 1.0201994180679321, - "556": 1.0369800329208374, - "557": 1.1429098844528198, - "558": 1.2822461128234863, - "559": 1.3738548755645752, - "560": 1.3105756044387817, - "561": 1.264311671257019, - "562": 1.191485047340393, - "563": 1.1162223815917969, - "564": 1.241092562675476, - "565": 1.3615766763687134, - "566": 1.4249286651611328, - "567": 1.2953282594680786, - "568": 1.240553617477417, - "569": 1.2388930320739746, - "570": 1.287211298942566, - "571": 1.2208629846572876, - "572": 1.1403542757034302, - "573": 1.0927703380584717, - "574": 1.0186707973480225, - "575": 1.097156047821045, - "576": 1.2424495220184326, - "577": 1.2379029989242554, - "578": 1.2404221296310425, - "579": 1.3435301780700684, - "580": 1.2760157585144043, - "581": 1.2772034406661987, - "582": 1.449062466621399, - "583": 1.4790936708450317, - "584": 1.2563692331314087, - "585": 1.3491219282150269, - "586": 1.3592236042022705, - "587": 1.6502348184585571, - "588": 1.714043378829956, - "589": 1.5939899682998657, - "590": 1.428162693977356, - "591": 1.483228087425232, - "592": 1.2308692932128906, - "593": 1.2484924793243408 - }, - "loss": { - "540": 2.224256753921509, - "541": 2.1956377029418945, - "542": 2.2281646728515625, - "543": 2.2300729751586914, - "544": 2.21303653717041, - "545": 2.217317581176758, - "546": 2.2570157051086426, - "547": 2.2219507694244385, - "548": 2.2281265258789062, - "549": 2.2299113273620605, - "550": 2.247556209564209, - "551": 2.2148499488830566, - "552": 2.188930034637451, - "553": 2.1814873218536377, - "554": 2.1891088485717773, - "555": 2.2002689838409424, - "556": 2.1906023025512695, - "557": 2.180880069732666, - "558": 2.1901936531066895, - "559": 2.2383034229278564, - "560": 2.192107677459717, - "561": 2.226154327392578, - "562": 2.2302803993225098, - "563": 2.1916139125823975, - "564": 2.2360498905181885, - "565": 2.1873154640197754, - "566": 2.2202963829040527, - "567": 2.2145023345947266, - "568": 2.2246246337890625, - "569": 2.217089891433716, - "570": 2.2217774391174316, - "571": 2.2137060165405273, - "572": 2.2099876403808594, - "573": 2.2021801471710205, - "574": 2.1595754623413086, - "575": 2.167999029159546, - "576": 2.1851797103881836, - "577": 2.1996870040893555, - "578": 2.2081122398376465, - "579": 2.218878984451294, - "580": 2.21826171875, - "581": 2.1950340270996094, - "582": 2.242854118347168, - "583": 2.1975955963134766, - "584": 2.2296929359436035, - "585": 2.1791646480560303, - "586": 2.203380584716797, - "587": 2.185145139694214, - "588": 2.2737479209899902, - "589": 2.2485265731811523, - "590": 2.2436585426330566, - "591": 2.2177751064300537, - "592": 2.20528507232666, - "593": 2.1836795806884766 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 0.203667, - 0.203303, - 0.202027, - 0.200877, - 0.197672, - 0.192857, - 0.198375, - 0.200269, - 0.198906, - 0.195136, - 0.200216, - 0.203025, - 0.203887, - 0.204524, - 0.204802, - 0.204596, - 0.204223, - 0.201994, - 0.198945, - 0.19713, - 0.198297, - 0.199593, - 0.20123, - 0.202617, - 0.200177, - 0.197046, - 0.195756, - 0.198807, - 0.200118, - 0.200108, - 0.199043, - 0.200489, - 0.202209, - 0.203157, - 0.20444, - 0.20289, - 0.199825, - 0.200021, - 0.200017, - 0.19771, - 0.199276, - 0.199095, - 0.19534, - 0.194216, - 0.199795, - 0.197286, - 0.197223, - 0.189599, - 0.188778, - 0.191712, - 0.19586, - 0.194282, - 0.200214, - 0.199678 - ], - "train_epoch_time": 4.840428113937378, - "train_loss": 2.2075779918960725, - "train_score": 0.34894749815529014, - "val_loss": 2.2799201676820915, - "val_score": 0.3267840485074627 - }, - { - "epoch": 11, - "grad_norm": 1.0301048755645752, - "learning_rate": 0.215, - "model_norm": 87.71573638916016, - "step_logs": { - "grad_norm": { - "594": 1.1950290203094482, - "595": 1.1960158348083496, - "596": 1.2385780811309814, - "597": 1.150543212890625, - "598": 1.0552761554718018, - "599": 1.118019700050354, - "600": 1.1891049146652222, - "601": 1.1616593599319458, - "602": 1.1191298961639404, - "603": 1.1218911409378052, - "604": 1.1111873388290405, - "605": 1.0850526094436646, - "606": 1.0855337381362915, - "607": 1.144742727279663, - "608": 1.219439148902893, - "609": 1.2101095914840698, - "610": 1.1669107675552368, - "611": 1.288949966430664, - "612": 1.4554662704467773, - "613": 1.5926470756530762, - "614": 1.5956976413726807, - "615": 1.4946579933166504, - "616": 1.100767731666565, - "617": 1.048630952835083, - "618": 1.1599191427230835, - "619": 1.1526316404342651, - "620": 1.1674768924713135, - "621": 1.1618609428405762, - "622": 1.0434201955795288, - "623": 1.0317045450210571, - "624": 1.1041157245635986, - "625": 1.245929479598999, - "626": 1.3019353151321411, - "627": 1.2685775756835938, - "628": 1.1965506076812744, - "629": 1.2156870365142822, - "630": 1.2426406145095825, - "631": 1.120285153388977, - "632": 1.0547648668289185, - "633": 1.0630483627319336, - "634": 1.0560157299041748, - "635": 1.1546316146850586, - "636": 1.1806838512420654, - "637": 1.0786075592041016, - "638": 0.9861094355583191, - "639": 1.0212830305099487, - "640": 1.0212528705596924, - "641": 1.0391095876693726, - "642": 1.1018105745315552, - "643": 1.253267765045166, - "644": 1.4574978351593018, - "645": 1.5392931699752808, - "646": 1.2028312683105469, - "647": 1.0301048755645752 - }, - "loss": { - "594": 2.202592611312866, - "595": 2.21405291557312, - "596": 2.1930947303771973, - "597": 2.1749229431152344, - "598": 2.1603639125823975, - "599": 2.1472935676574707, - "600": 2.159951686859131, - "601": 2.1965529918670654, - "602": 2.1713905334472656, - "603": 2.187828540802002, - "604": 2.150160789489746, - "605": 2.175309658050537, - "606": 2.1545028686523438, - "607": 2.1715760231018066, - "608": 2.1555254459381104, - "609": 2.2049925327301025, - "610": 2.1803390979766846, - "611": 2.1594817638397217, - "612": 2.2130517959594727, - "613": 2.2050609588623047, - "614": 2.2264647483825684, - "615": 2.2138705253601074, - "616": 2.1906564235687256, - "617": 2.174870014190674, - "618": 2.2114453315734863, - "619": 2.192110776901245, - "620": 2.173429012298584, - "621": 2.189335346221924, - "622": 2.1438052654266357, - "623": 2.148369312286377, - "624": 2.156013011932373, - "625": 2.1522488594055176, - "626": 2.18947172164917, - "627": 2.207270622253418, - "628": 2.1691670417785645, - "629": 2.1718530654907227, - "630": 2.148488998413086, - "631": 2.157902717590332, - "632": 2.1440021991729736, - "633": 2.1581244468688965, - "634": 2.121485948562622, - "635": 2.1491992473602295, - "636": 2.16536021232605, - "637": 2.173429250717163, - "638": 2.156740188598633, - "639": 2.1143622398376465, - "640": 2.1299097537994385, - "641": 2.1474719047546387, - "642": 2.1455202102661133, - "643": 2.1713802814483643, - "644": 2.16792631149292, - "645": 2.193194627761841, - "646": 2.1946706771850586, - "647": 2.1413674354553223 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 0.200991, - 0.201037, - 0.199963, - 0.201797, - 0.203712, - 0.202338, - 0.200865, - 0.20168, - 0.202447, - 0.202478, - 0.202499, - 0.203179, - 0.203061, - 0.201902, - 0.200156, - 0.200673, - 0.201474, - 0.198577, - 0.19494, - 0.191339, - 0.191462, - 0.19396, - 0.202934, - 0.203917, - 0.201802, - 0.201849, - 0.201421, - 0.201635, - 0.20387, - 0.204128, - 0.20268, - 0.199529, - 0.198482, - 0.199374, - 0.200756, - 0.200345, - 0.19958, - 0.202349, - 0.203641, - 0.203542, - 0.203501, - 0.201559, - 0.201084, - 0.203301, - 0.205061, - 0.204173, - 0.204248, - 0.203975, - 0.202672, - 0.199488, - 0.194511, - 0.192629, - 0.200772, - 0.204126 - ], - "train_epoch_time": 4.839927434921265, - "train_loss": 2.1419006591889915, - "train_score": 0.36307388820299286, - "val_loss": 2.2275397684489273, - "val_score": 0.3394446045878834 - }, - { - "epoch": 12, - "grad_norm": 0.8815606832504272, - "learning_rate": 0.215, - "model_norm": 87.74436950683594, - "step_logs": { - "grad_norm": { - "648": 1.1736186742782593, - "649": 1.1927375793457031, - "650": 1.192915678024292, - "651": 1.415801763534546, - "652": 1.352135419845581, - "653": 1.1469707489013672, - "654": 1.0822917222976685, - "655": 1.0758342742919922, - "656": 1.109924077987671, - "657": 1.0968343019485474, - "658": 1.1693978309631348, - "659": 1.1531552076339722, - "660": 1.1513526439666748, - "661": 1.1378060579299927, - "662": 1.0363950729370117, - "663": 1.019266963005066, - "664": 1.0714060068130493, - "665": 1.1098425388336182, - "666": 1.0873985290527344, - "667": 1.0098145008087158, - "668": 0.9995170831680298, - "669": 1.0532211065292358, - "670": 1.0094290971755981, - "671": 1.0100833177566528, - "672": 1.0575320720672607, - "673": 1.041836142539978, - "674": 0.9846584796905518, - "675": 0.9802210330963135, - "676": 1.006662130355835, - "677": 1.0164037942886353, - "678": 1.035056710243225, - "679": 0.9977219104766846, - "680": 0.9377416372299194, - "681": 0.9368072152137756, - "682": 1.042328119277954, - "683": 1.0932453870773315, - "684": 1.1079999208450317, - "685": 1.1131280660629272, - "686": 0.9918408989906311, - "687": 0.8160966038703918, - "688": 0.792847216129303, - "689": 0.8231911659240723, - "690": 0.8266491889953613, - "691": 0.8108084797859192, - "692": 0.8159105777740479, - "693": 0.812720775604248, - "694": 0.8996869325637817, - "695": 0.9295957088470459, - "696": 0.8622173070907593, - "697": 0.7823448777198792, - "698": 0.8126835823059082, - "699": 0.9474884271621704, - "700": 0.9538929462432861, - "701": 0.8815606832504272 - }, - "loss": { - "648": 2.140043020248413, - "649": 2.166339874267578, - "650": 2.1256985664367676, - "651": 2.16790509223938, - "652": 2.194870948791504, - "653": 2.138918399810791, - "654": 2.144619941711426, - "655": 2.1440577507019043, - "656": 2.148167610168457, - "657": 2.1287899017333984, - "658": 2.128760814666748, - "659": 2.150738477706909, - "660": 2.136873245239258, - "661": 2.110874891281128, - "662": 2.1498284339904785, - "663": 2.1460490226745605, - "664": 2.1357290744781494, - "665": 2.144239902496338, - "666": 2.107335090637207, - "667": 2.0888638496398926, - "668": 2.1202988624572754, - "669": 2.118359088897705, - "670": 2.1162853240966797, - "671": 2.123889446258545, - "672": 2.127480983734131, - "673": 2.094712257385254, - "674": 2.116825819015503, - "675": 2.101317882537842, - "676": 2.076444387435913, - "677": 2.095736503601074, - "678": 2.091125726699829, - "679": 2.1160941123962402, - "680": 2.101346969604492, - "681": 2.1070613861083984, - "682": 2.0930233001708984, - "683": 2.1106696128845215, - "684": 2.1121487617492676, - "685": 2.101912498474121, - "686": 2.1231493949890137, - "687": 2.1286134719848633, - "688": 2.078831434249878, - "689": 2.073833465576172, - "690": 2.080270290374756, - "691": 2.0773372650146484, - "692": 2.052178382873535, - "693": 2.049058675765991, - "694": 2.0780670642852783, - "695": 2.0962839126586914, - "696": 2.0633857250213623, - "697": 2.0995097160339355, - "698": 2.065528392791748, - "699": 2.1045262813568115, - "700": 2.093109369277954, - "701": 2.065265417098999 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 0.201087, - 0.199665, - 0.198254, - 0.192262, - 0.192849, - 0.195817, - 0.195958, - 0.194889, - 0.193068, - 0.192037, - 0.189453, - 0.18871, - 0.187497, - 0.186441, - 0.187233, - 0.186295, - 0.184175, - 0.182359, - 0.181386, - 0.181382, - 0.180445, - 0.178389, - 0.177847, - 0.176648, - 0.174737, - 0.173641, - 0.173329, - 0.172116, - 0.170443, - 0.169153, - 0.167661, - 0.167028, - 0.16652, - 0.16531, - 0.162703, - 0.160872, - 0.159472, - 0.158161, - 0.158524, - 0.159176, - 0.158045, - 0.156481, - 0.1552, - 0.154082, - 0.152726, - 0.151487, - 0.149469, - 0.147969, - 0.147282, - 0.146761, - 0.145192, - 0.142827, - 0.141498, - 0.140828 - ], - "train_epoch_time": 4.840087652206421, - "train_loss": 2.0689332987348865, - "train_score": 0.3863309272136031, - "val_loss": 2.1656318819481797, - "val_score": 0.35967548073501454 - }, - { - "epoch": 13, - "grad_norm": 0.5793240070343018, - "learning_rate": 0.14333333333333334, - "model_norm": 87.76300048828125, - "step_logs": { - "grad_norm": { - "702": 0.8874034285545349, - "703": 0.905583381652832, - "704": 0.8495563268661499, - "705": 0.7218671441078186, - "706": 0.6585786938667297, - "707": 0.6682102680206299, - "708": 0.7008153796195984, - "709": 0.7035977840423584, - "710": 0.6829692125320435, - "711": 0.638738214969635, - "712": 0.6198923587799072, - "713": 0.6988686323165894, - "714": 0.7239283323287964, - "715": 0.7918804287910461, - "716": 0.7934989929199219, - "717": 0.7834325432777405, - "718": 0.807168185710907, - "719": 0.7808082699775696, - "720": 0.6788971424102783, - "721": 0.6064842939376831, - "722": 0.6180140376091003, - "723": 0.6385231614112854, - "724": 0.6860033869743347, - "725": 0.7703426480293274, - "726": 0.8105504512786865, - "727": 0.8015021681785583, - "728": 0.8084497451782227, - "729": 0.8108120560646057, - "730": 0.6805449724197388, - "731": 0.6301560401916504, - "732": 0.6809912323951721, - "733": 0.647938072681427, - "734": 0.575066089630127, - "735": 0.5573794841766357, - "736": 0.6263303160667419, - "737": 0.6469056010246277, - "738": 0.6373700499534607, - "739": 0.6472172737121582, - "740": 0.5829676389694214, - "741": 0.6078576445579529, - "742": 0.5903823375701904, - "743": 0.6300957798957825, - "744": 0.6502521634101868, - "745": 0.6128755211830139, - "746": 0.5980471968650818, - "747": 0.5541452169418335, - "748": 0.5953189730644226, - "749": 0.5821541547775269, - "750": 0.598314106464386, - "751": 0.5398235321044922, - "752": 0.58330237865448, - "753": 0.5274991989135742, - "754": 0.5851951241493225, - "755": 0.5793240070343018 - }, - "loss": { - "702": 2.05574369430542, - "703": 2.0688095092773438, - "704": 2.0423386096954346, - "705": 2.0397229194641113, - "706": 2.0588929653167725, - "707": 2.0642752647399902, - "708": 2.0728094577789307, - "709": 2.0478193759918213, - "710": 2.0581283569335938, - "711": 2.05910062789917, - "712": 2.06070876121521, - "713": 2.0621001720428467, - "714": 2.043877601623535, - "715": 2.0738160610198975, - "716": 2.04972505569458, - "717": 2.0713717937469482, - "718": 2.0618996620178223, - "719": 2.0546112060546875, - "720": 2.0492255687713623, - "721": 2.0458288192749023, - "722": 2.041354179382324, - "723": 2.023507595062256, - "724": 2.042503595352173, - "725": 2.0306506156921387, - "726": 2.048412322998047, - "727": 2.0250191688537598, - "728": 2.0446135997772217, - "729": 2.0537240505218506, - "730": 2.0677785873413086, - "731": 2.021470546722412, - "732": 2.054034471511841, - "733": 2.035071611404419, - "734": 2.0356285572052, - "735": 2.0282442569732666, - "736": 2.0338058471679688, - "737": 2.0151429176330566, - "738": 2.027527332305908, - "739": 2.0506112575531006, - "740": 2.0555806159973145, - "741": 2.0270466804504395, - "742": 2.0533108711242676, - "743": 2.055283546447754, - "744": 2.03786039352417, - "745": 2.0498759746551514, - "746": 2.025909662246704, - "747": 2.0287299156188965, - "748": 2.026238441467285, - "749": 1.9891866445541382, - "750": 2.0195558071136475, - "751": 2.0114665031433105, - "752": 2.0062484741210938, - "753": 2.015991687774658, - "754": 2.008403778076172, - "755": 2.008389711380005 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 0.139504, - 0.138119, - 0.137267, - 0.136915, - 0.136047, - 0.134706, - 0.133234, - 0.131906, - 0.13075, - 0.129701, - 0.128503, - 0.126801, - 0.12536, - 0.123722, - 0.122408, - 0.121209, - 0.119788, - 0.118646, - 0.117861, - 0.116876, - 0.115528, - 0.114135, - 0.112655, - 0.110979, - 0.109523, - 0.108263, - 0.106967, - 0.105682, - 0.104925, - 0.103777, - 0.102323, - 0.101125, - 0.100045, - 0.098784, - 0.0972884, - 0.0959178, - 0.0946495, - 0.0933302, - 0.0921932, - 0.0908161, - 0.0895609, - 0.0881626, - 0.0868038, - 0.085589, - 0.0843062, - 0.0830854, - 0.0816961, - 0.0804023, - 0.0790716, - 0.0778608, - 0.0764761, - 0.0752553, - 0.073853, - 0.0725513 - ], - "train_epoch_time": 4.840941429138184, - "train_loss": 2.019192037691858, - "train_score": 0.40129461072951855, - "val_loss": 2.1238986813515663, - "val_score": 0.3741613454030384 - }, - { - "epoch": 14, - "grad_norm": 0.48611950874328613, - "learning_rate": 0.07166666666666667, - "model_norm": 87.7691650390625, - "step_logs": { - "grad_norm": { - "756": 0.6146167516708374, - "757": 0.5796166658401489, - "758": 0.5638355016708374, - "759": 0.6140334010124207, - "760": 0.5936705470085144, - "761": 0.5168383717536926, - "762": 0.5016950964927673, - "763": 0.5049790143966675, - "764": 0.5676010251045227, - "765": 0.5727619528770447, - "766": 0.5642577409744263, - "767": 0.5229026675224304, - "768": 0.5591195225715637, - "769": 0.5427458882331848, - "770": 0.5678499341011047, - "771": 0.5436962842941284, - "772": 0.5067341327667236, - "773": 0.5637845993041992, - "774": 0.5042881369590759, - "775": 0.531964123249054, - "776": 0.5424506664276123, - "777": 0.5415850877761841, - "778": 0.5109631419181824, - "779": 0.5317161083221436, - "780": 0.5138946771621704, - "781": 0.5129923820495605, - "782": 0.5475530028343201, - "783": 0.5945153832435608, - "784": 0.5471187233924866, - "785": 0.5442525744438171, - "786": 0.5080510973930359, - "787": 0.5497950911521912, - "788": 0.5333343744277954, - "789": 0.5105999112129211, - "790": 0.5084962248802185, - "791": 0.5214089751243591, - "792": 0.48692893981933594, - "793": 0.47998690605163574, - "794": 0.5257403254508972, - "795": 0.49471163749694824, - "796": 0.5057258009910583, - "797": 0.505268394947052, - "798": 0.4844566881656647, - "799": 0.5054225325584412, - "800": 0.47709977626800537, - "801": 0.481972336769104, - "802": 0.5098870992660522, - "803": 0.48065146803855896, - "804": 0.49018529057502747, - "805": 0.5300552248954773, - "806": 0.4903004765510559, - "807": 0.5177924036979675, - "808": 0.5555498003959656, - "809": 0.48611950874328613 - }, - "loss": { - "756": 2.026135206222534, - "757": 2.0169997215270996, - "758": 2.0174005031585693, - "759": 2.0081300735473633, - "760": 1.9965903759002686, - "761": 2.020857334136963, - "762": 2.0221657752990723, - "763": 2.0069189071655273, - "764": 2.016416072845459, - "765": 2.008983612060547, - "766": 2.039663314819336, - "767": 2.0164713859558105, - "768": 2.0417206287384033, - "769": 1.9965007305145264, - "770": 2.0104260444641113, - "771": 1.990578293800354, - "772": 2.0226285457611084, - "773": 2.006784200668335, - "774": 2.0326991081237793, - "775": 2.0209670066833496, - "776": 2.0039942264556885, - "777": 2.0059654712677, - "778": 2.0240859985351562, - "779": 2.0161609649658203, - "780": 2.0291223526000977, - "781": 2.000490665435791, - "782": 1.9832587242126465, - "783": 2.021136999130249, - "784": 2.014061689376831, - "785": 2.0211124420166016, - "786": 1.9889352321624756, - "787": 2.0232739448547363, - "788": 2.015451431274414, - "789": 2.0262560844421387, - "790": 1.9943532943725586, - "791": 2.020418643951416, - "792": 2.0105700492858887, - "793": 2.015556573867798, - "794": 2.0287084579467773, - "795": 2.0081353187561035, - "796": 1.9942378997802734, - "797": 2.015990972518921, - "798": 2.0090925693511963, - "799": 1.9954313039779663, - "800": 1.9980233907699585, - "801": 2.00476336479187, - "802": 1.9976757764816284, - "803": 1.9871928691864014, - "804": 1.9835243225097656, - "805": 2.0080370903015137, - "806": 2.0102531909942627, - "807": 1.9849202632904053, - "808": 2.0381040573120117, - "809": 2.0150413513183594 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 0.0711911, - 0.0699299, - 0.0686391, - 0.0672578, - 0.0659716, - 0.0647526, - 0.0634521, - 0.0621303, - 0.0607531, - 0.0594324, - 0.0581301, - 0.056848, - 0.0555039, - 0.054196, - 0.0528614, - 0.0515611, - 0.0502712, - 0.0489147, - 0.0476354, - 0.0463, - 0.0449745, - 0.0436565, - 0.0423531, - 0.0410236, - 0.0397119, - 0.0383905, - 0.0370564, - 0.0357214, - 0.0344179, - 0.0330985, - 0.0317862, - 0.0304552, - 0.0291375, - 0.0278205, - 0.0264976, - 0.0251733, - 0.0238553, - 0.0225327, - 0.0212039, - 0.0198833, - 0.0185581, - 0.0172343, - 0.0159111, - 0.0145851, - 0.0132616, - 0.0119362, - 0.01061, - 0.00928511, - 0.00795912, - 0.00663272, - 0.00530696, - 0.00398041, - 0.00265379, - 0.00132706 - ], - "train_epoch_time": 4.8407135009765625, - "train_loss": 2.0041962477876947, - "train_score": 0.40498004843035934, - "val_loss": 2.112407008761243, - "val_score": 0.3762467705081169 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:27:03.667579", - "final_model_norm": 87.7691650390625, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:25:22.141603", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 8.009259223937988, - "learning_rate": 2.15e-11, - "model_norm": 87.43960571289062, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.895608901977539, - "3": 8.547172546386719, - "4": 20.509349822998047, - "5": 8.080146789550781, - "6": 5.149311542510986, - "7": 4.696757793426514, - "8": 4.1927337646484375, - "9": 6.789407253265381, - "10": 4.837518215179443, - "11": 10.126346588134766, - "12": 1.9325710535049438, - "13": 4.5322651863098145, - "14": 7.024761199951172, - "15": 25.329877853393555, - "16": 7.749931812286377, - "17": 25.037580490112305, - "18": 4.084115505218506, - "19": 5.071617603302002, - "20": 5.837102890014648, - "21": 6.439196586608887, - "22": 4.204246997833252, - "23": 5.8123674392700195, - "24": 3.6390187740325928, - "25": 24.066499710083008, - "26": 3.986567974090576, - "27": 13.763236045837402, - "28": 5.870517253875732, - "29": 27.57763671875, - "30": 3.9071195125579834, - "31": 3.903594732284546, - "32": 5.579819202423096, - "33": 3.242835760116577, - "34": 8.32712459564209, - "35": 3.9553658962249756, - "36": 2.4306905269622803, - "37": 6.125897407531738, - "38": 3.652567148208618, - "39": 4.9440388679504395, - "40": 4.961220741271973, - "41": 5.906675815582275, - "42": 4.929117679595947, - "43": 4.015328884124756, - "44": 3.1502163410186768, - "45": 3.341864585876465, - "46": 3.961202383041382, - "47": 3.1543006896972656, - "48": 5.882992744445801, - "49": 4.062252998352051, - "50": 10.771195411682129, - "51": 5.780614852905273, - "52": 2.9459245204925537, - "53": 8.009259223937988 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.855247974395752, - "3": 3.6929891109466553, - "4": 4.129785537719727, - "5": 4.1043596267700195, - "6": 3.5702595710754395, - "7": 3.5731899738311768, - "8": 3.539045810699463, - "9": 3.4656357765197754, - "10": 3.7069530487060547, - "11": 3.3295164108276367, - "12": 3.3303349018096924, - "13": 3.270383358001709, - "14": 3.419666290283203, - "15": 5.847019672393799, - "16": 3.521577835083008, - "17": 4.828075408935547, - "18": 3.4318320751190186, - "19": 3.0727391242980957, - "20": 3.274855613708496, - "21": 3.561856269836426, - "22": 3.0707850456237793, - "23": 3.165107011795044, - "24": 2.922550678253174, - "25": 5.4841156005859375, - "26": 3.095470428466797, - "27": 3.479790210723877, - "28": 3.0608644485473633, - "29": 4.4806013107299805, - "30": 3.3425827026367188, - "31": 3.0492196083068848, - "32": 3.2768490314483643, - "33": 3.048379421234131, - "34": 3.403010845184326, - "35": 3.2578158378601074, - "36": 2.8687338829040527, - "37": 3.1075387001037598, - "38": 2.9389617443084717, - "39": 3.2360987663269043, - "40": 3.3155179023742676, - "41": 3.0283236503601074, - "42": 3.1334774494171143, - "43": 3.3440723419189453, - "44": 2.9469733238220215, - "45": 2.946594715118408, - "46": 3.0986151695251465, - "47": 2.9633421897888184, - "48": 3.1910109519958496, - "49": 3.0943214893341064, - "50": 3.6288628578186035, - "51": 3.363668441772461, - "52": 3.04264235496521, - "53": 3.458778142929077 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 2.15e-11, - 0.00345153, - 0.00816687, - 0.0114403, - 0.00916873, - 0.0183603, - 0.0235443, - 0.0275411, - 0.0316924, - 0.0307785, - 0.0378612, - 0.0273667, - 0.050149, - 0.047552, - 0.0419701, - 0.0142107, - 0.0433604, - 0.0127226, - 0.0651462, - 0.0608817, - 0.0594179, - 0.0591903, - 0.0743557, - 0.0647328, - 0.0836436, - 0.0161007, - 0.0868686, - 0.0279085, - 0.0717604, - 0.0107657, - 0.0996469, - 0.0999944, - 0.0832078, - 0.113998, - 0.0587264, - 0.11055, - 0.133516, - 0.0811468, - 0.119194, - 0.102672, - 0.104978, - 0.0874694, - 0.106225, - 0.127894, - 0.14349, - 0.141582, - 0.131795, - 0.150902, - 0.0973905, - 0.134906, - 0.0484574, - 0.103969, - 0.164547, - 0.0718163 - ], - "train_epoch_time": 4.842504978179932, - "train_loss": 2.7792877371034117, - "train_score": 0.22943754480080078, - "val_loss": 2.8057689620213995, - "val_score": 0.2243514995418925 - }, - { - "epoch": 1, - "grad_norm": 1.44816255569458, - "learning_rate": 0.215, - "model_norm": 87.4570083618164, - "step_logs": { - "grad_norm": { - "54": 1.894862413406372, - "55": 4.422108173370361, - "56": 2.442340850830078, - "57": 1.8581420183181763, - "58": 2.252784252166748, - "59": 2.876319646835327, - "60": 2.35105562210083, - "61": 2.063795328140259, - "62": 2.091115713119507, - "63": 1.8670570850372314, - "64": 1.9457156658172607, - "65": 3.7123773097991943, - "66": 2.3150274753570557, - "67": 1.7374805212020874, - "68": 1.8530274629592896, - "69": 2.2747626304626465, - "70": 2.5020837783813477, - "71": 1.7543612718582153, - "72": 1.455753207206726, - "73": 1.6155232191085815, - "74": 1.8848108053207397, - "75": 1.824452519416809, - "76": 2.532111644744873, - "77": 1.7775555849075317, - "78": 1.2957557439804077, - "79": 1.6579508781433105, - "80": 2.500919818878174, - "81": 1.7985024452209473, - "82": 1.5269691944122314, - "83": 1.5915164947509766, - "84": 2.01928448677063, - "85": 1.7772475481033325, - "86": 1.4115211963653564, - "87": 1.5029006004333496, - "88": 2.4845919609069824, - "89": 1.6661263704299927, - "90": 1.1188994646072388, - "91": 0.9967756867408752, - "92": 1.0742534399032593, - "93": 1.3105775117874146, - "94": 1.556591272354126, - "95": 1.9750018119812012, - "96": 1.867174744606018, - "97": 1.6376919746398926, - "98": 1.5593616962432861, - "99": 1.676844835281372, - "100": 1.7076938152313232, - "101": 1.5882302522659302, - "102": 1.5186654329299927, - "103": 1.5854926109313965, - "104": 1.5250271558761597, - "105": 1.397335171699524, - "106": 1.3582278490066528, - "107": 1.44816255569458 - }, - "loss": { - "54": 2.7846832275390625, - "55": 3.0264081954956055, - "56": 3.1928818225860596, - "57": 2.8545303344726562, - "58": 2.768885612487793, - "59": 3.0259578227996826, - "60": 2.9479548931121826, - "61": 2.8161587715148926, - "62": 2.7697196006774902, - "63": 2.816161632537842, - "64": 2.7519495487213135, - "65": 2.9004955291748047, - "66": 3.116636037826538, - "67": 2.734819173812866, - "68": 2.7392799854278564, - "69": 2.8012518882751465, - "70": 2.82958984375, - "71": 2.8844799995422363, - "72": 2.680906057357788, - "73": 2.6877853870391846, - "74": 2.7061243057250977, - "75": 2.78086519241333, - "76": 2.7352066040039062, - "77": 2.876542568206787, - "78": 2.6281580924987793, - "79": 2.6675047874450684, - "80": 2.7303972244262695, - "81": 2.8466410636901855, - "82": 2.658008575439453, - "83": 2.649508476257324, - "84": 2.6930394172668457, - "85": 2.755890369415283, - "86": 2.6223273277282715, - "87": 2.6280159950256348, - "88": 2.692025661468506, - "89": 2.847379207611084, - "90": 2.6395182609558105, - "91": 2.556950092315674, - "92": 2.6009671688079834, - "93": 2.5640203952789307, - "94": 2.634103298187256, - "95": 2.6527695655822754, - "96": 2.7670319080352783, - "97": 2.6478259563446045, - "98": 2.6880149841308594, - "99": 2.621617317199707, - "100": 2.731639862060547, - "101": 2.6194186210632324, - "102": 2.682933807373047, - "103": 2.592667818069458, - "104": 2.6661670207977295, - "105": 2.589637041091919, - "106": 2.6064839363098145, - "107": 2.599349021911621 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.188827, - 0.126873, - 0.179042, - 0.190261, - 0.179611, - 0.166163, - 0.178933, - 0.184932, - 0.183805, - 0.189751, - 0.187301, - 0.14231, - 0.181457, - 0.192194, - 0.189469, - 0.179379, - 0.173689, - 0.192876, - 0.198161, - 0.194678, - 0.188411, - 0.190489, - 0.171727, - 0.192294, - 0.201184, - 0.193558, - 0.172517, - 0.191596, - 0.196473, - 0.194964, - 0.184904, - 0.191416, - 0.198766, - 0.196816, - 0.172481, - 0.194605, - 0.204569, - 0.206379, - 0.205212, - 0.200557, - 0.195653, - 0.185654, - 0.189353, - 0.193888, - 0.195945, - 0.192773, - 0.192866, - 0.194831, - 0.196812, - 0.194706, - 0.196567, - 0.19888, - 0.199798, - 0.197841 - ], - "train_epoch_time": 4.840651988983154, - "train_loss": 2.6558046503764827, - "train_score": 0.229878048754833, - "val_loss": 2.692034591900358, - "val_score": 0.22710515921411503 - }, - { - "epoch": 2, - "grad_norm": 1.271525502204895, - "learning_rate": 0.215, - "model_norm": 87.47675323486328, - "step_logs": { - "grad_norm": { - "108": 1.4894136190414429, - "109": 1.3828215599060059, - "110": 1.4760452508926392, - "111": 1.4988921880722046, - "112": 1.4483243227005005, - "113": 1.403370976448059, - "114": 1.3012850284576416, - "115": 1.4540739059448242, - "116": 1.492295742034912, - "117": 1.346686601638794, - "118": 1.2770353555679321, - "119": 1.5110337734222412, - "120": 1.3763647079467773, - "121": 1.1737613677978516, - "122": 1.269608974456787, - "123": 1.3859057426452637, - "124": 1.3578894138336182, - "125": 1.1743577718734741, - "126": 1.2089983224868774, - "127": 1.333441972732544, - "128": 1.6433228254318237, - "129": 1.5958722829818726, - "130": 1.5101783275604248, - "131": 1.4649394750595093, - "132": 1.4888304471969604, - "133": 1.6221187114715576, - "134": 1.5748037099838257, - "135": 1.4491182565689087, - "136": 1.34010648727417, - "137": 1.4760727882385254, - "138": 1.827445387840271, - "139": 1.5479952096939087, - "140": 1.0878485441207886, - "141": 1.0872464179992676, - "142": 1.3751699924468994, - "143": 1.4036115407943726, - "144": 1.2593539953231812, - "145": 1.2724465131759644, - "146": 1.360815405845642, - "147": 1.260218858718872, - "148": 1.1086946725845337, - "149": 1.2605623006820679, - "150": 1.6293182373046875, - "151": 1.35448157787323, - "152": 0.9810760021209717, - "153": 0.9596835374832153, - "154": 1.2186592817306519, - "155": 1.3001915216445923, - "156": 1.3584920167922974, - "157": 1.4139901399612427, - "158": 1.8265085220336914, - "159": 1.5426403284072876, - "160": 1.20428466796875, - "161": 1.271525502204895 - }, - "loss": { - "108": 2.6572370529174805, - "109": 2.57897686958313, - "110": 2.6341605186462402, - "111": 2.6219968795776367, - "112": 2.6115472316741943, - "113": 2.593916416168213, - "114": 2.607882022857666, - "115": 2.5862903594970703, - "116": 2.674384117126465, - "117": 2.586663007736206, - "118": 2.607825756072998, - "119": 2.576197385787964, - "120": 2.636868715286255, - "121": 2.555208444595337, - "122": 2.557699203491211, - "123": 2.583102226257324, - "124": 2.5912489891052246, - "125": 2.570688486099243, - "126": 2.564873695373535, - "127": 2.588277816772461, - "128": 2.5981221199035645, - "129": 2.6688568592071533, - "130": 2.564763069152832, - "131": 2.6411280632019043, - "132": 2.5746936798095703, - "133": 2.6381752490997314, - "134": 2.6091103553771973, - "135": 2.5993170738220215, - "136": 2.562162399291992, - "137": 2.598769187927246, - "138": 2.600484848022461, - "139": 2.6600613594055176, - "140": 2.555112361907959, - "141": 2.544739246368408, - "142": 2.562129020690918, - "143": 2.6135807037353516, - "144": 2.5602288246154785, - "145": 2.5906286239624023, - "146": 2.5725152492523193, - "147": 2.5803475379943848, - "148": 2.5332067012786865, - "149": 2.552424430847168, - "150": 2.5680408477783203, - "151": 2.6129841804504395, - "152": 2.525791645050049, - "153": 2.5124363899230957, - "154": 2.525808811187744, - "155": 2.5710768699645996, - "156": 2.546027421951294, - "157": 2.5899622440338135, - "158": 2.575076103210449, - "159": 2.6611618995666504, - "160": 2.541957139968872, - "161": 2.5649991035461426 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 0.197294, - 0.199128, - 0.197445, - 0.196866, - 0.197911, - 0.198776, - 0.200972, - 0.197632, - 0.197336, - 0.199931, - 0.201457, - 0.196298, - 0.199586, - 0.203221, - 0.201358, - 0.199086, - 0.199722, - 0.203277, - 0.202589, - 0.200214, - 0.193391, - 0.194996, - 0.196241, - 0.197729, - 0.196787, - 0.19418, - 0.195068, - 0.19782, - 0.199935, - 0.197225, - 0.188919, - 0.196018, - 0.204803, - 0.204774, - 0.199195, - 0.198884, - 0.201577, - 0.201464, - 0.199558, - 0.201658, - 0.204341, - 0.201514, - 0.193497, - 0.199911, - 0.206539, - 0.206849, - 0.202218, - 0.200807, - 0.199458, - 0.198525, - 0.188717, - 0.196144, - 0.202575, - 0.201356 - ], - "train_epoch_time": 4.840525388717651, - "train_loss": 2.5537030063366446, - "train_score": 0.24254617999325181, - "val_loss": 2.5877756248522297, - "val_score": 0.23805252567840363 - }, - { - "epoch": 3, - "grad_norm": 1.1912970542907715, - "learning_rate": 0.215, - "model_norm": 87.49517822265625, - "step_logs": { - "grad_norm": { - "162": 1.2298322916030884, - "163": 1.1380009651184082, - "164": 1.181266188621521, - "165": 1.3488095998764038, - "166": 1.589025616645813, - "167": 1.3674647808074951, - "168": 1.069467306137085, - "169": 1.0639724731445312, - "170": 1.2408781051635742, - "171": 1.2359596490859985, - "172": 1.1599916219711304, - "173": 1.1227874755859375, - "174": 1.137825846672058, - "175": 1.204203486442566, - "176": 1.2972830533981323, - "177": 1.2549153566360474, - "178": 1.2810430526733398, - "179": 1.2485711574554443, - "180": 1.2574368715286255, - "181": 1.1830329895019531, - "182": 1.1039347648620605, - "183": 1.1476048231124878, - "184": 1.2330516576766968, - "185": 1.2602540254592896, - "186": 1.1536614894866943, - "187": 1.218313455581665, - "188": 1.2707144021987915, - "189": 1.3096917867660522, - "190": 1.197945237159729, - "191": 1.0904420614242554, - "192": 1.0734726190567017, - "193": 1.043286681175232, - "194": 0.9201653003692627, - "195": 0.9867708086967468, - "196": 1.0302892923355103, - "197": 1.052281379699707, - "198": 1.070786476135254, - "199": 1.1601042747497559, - "200": 1.2059375047683716, - "201": 1.265130877494812, - "202": 1.1423442363739014, - "203": 1.0491350889205933, - "204": 1.209293007850647, - "205": 1.464426040649414, - "206": 1.348534107208252, - "207": 1.0735282897949219, - "208": 1.0005972385406494, - "209": 1.0789453983306885, - "210": 1.2591506242752075, - "211": 1.2928924560546875, - "212": 1.428507924079895, - "213": 1.4925732612609863, - "214": 1.4553636312484741, - "215": 1.1912970542907715 - }, - "loss": { - "162": 2.5573983192443848, - "163": 2.5325074195861816, - "164": 2.540314197540283, - "165": 2.571876049041748, - "166": 2.57242488861084, - "167": 2.5828452110290527, - "168": 2.541640520095825, - "169": 2.550708293914795, - "170": 2.532912492752075, - "171": 2.567884922027588, - "172": 2.5111184120178223, - "173": 2.5614099502563477, - "174": 2.5236496925354004, - "175": 2.5592501163482666, - "176": 2.529876232147217, - "177": 2.5750584602355957, - "178": 2.5294413566589355, - "179": 2.5599637031555176, - "180": 2.5318305492401123, - "181": 2.5704944133758545, - "182": 2.5227646827697754, - "183": 2.5377259254455566, - "184": 2.5326521396636963, - "185": 2.556995391845703, - "186": 2.5199294090270996, - "187": 2.5326504707336426, - "188": 2.524649143218994, - "189": 2.5595850944519043, - "190": 2.538721799850464, - "191": 2.511291265487671, - "192": 2.517062187194824, - "193": 2.51430082321167, - "194": 2.491483688354492, - "195": 2.5122554302215576, - "196": 2.5037684440612793, - "197": 2.4930014610290527, - "198": 2.5202560424804688, - "199": 2.5206403732299805, - "200": 2.538309097290039, - "201": 2.524566888809204, - "202": 2.543318748474121, - "203": 2.4883763790130615, - "204": 2.5055623054504395, - "205": 2.5367953777313232, - "206": 2.5493545532226562, - "207": 2.521625518798828, - "208": 2.501934051513672, - "209": 2.4813055992126465, - "210": 2.536578416824341, - "211": 2.5319321155548096, - "212": 2.531804084777832, - "213": 2.5508384704589844, - "214": 2.542196273803711, - "215": 2.5534985065460205 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 0.202148, - 0.203797, - 0.203012, - 0.199806, - 0.194479, - 0.199475, - 0.205079, - 0.205209, - 0.201812, - 0.202077, - 0.20329, - 0.204196, - 0.203763, - 0.202656, - 0.200651, - 0.201737, - 0.200983, - 0.20179, - 0.201474, - 0.203112, - 0.204386, - 0.203639, - 0.201966, - 0.201543, - 0.203449, - 0.202257, - 0.201169, - 0.200552, - 0.202684, - 0.204587, - 0.204915, - 0.205439, - 0.207422, - 0.2064, - 0.205628, - 0.205202, - 0.204975, - 0.203329, - 0.202526, - 0.201282, - 0.203761, - 0.205241, - 0.202307, - 0.197089, - 0.199687, - 0.204932, - 0.206133, - 0.204677, - 0.201463, - 0.200752, - 0.197857, - 0.196547, - 0.197326, - 0.202879 - ], - "train_epoch_time": 4.8404014110565186, - "train_loss": 2.5008059670627545, - "train_score": 0.25275399024722567, - "val_loss": 2.54108883291654, - "val_score": 0.2455959390040517 - }, - { - "epoch": 4, - "grad_norm": 1.2254489660263062, - "learning_rate": 0.215, - "model_norm": 87.51608276367188, - "step_logs": { - "grad_norm": { - "216": 1.0964171886444092, - "217": 1.1696780920028687, - "218": 1.4042595624923706, - "219": 1.2265911102294922, - "220": 1.0236988067626953, - "221": 1.1062026023864746, - "222": 1.31465744972229, - "223": 1.2222636938095093, - "224": 1.0862435102462769, - "225": 1.0885816812515259, - "226": 1.2770341634750366, - "227": 1.379095196723938, - "228": 1.425399661064148, - "229": 1.24801766872406, - "230": 0.9755950570106506, - "231": 1.0528799295425415, - "232": 1.2775955200195312, - "233": 1.3246861696243286, - "234": 1.0958911180496216, - "235": 1.0020177364349365, - "236": 1.0134462118148804, - "237": 1.1333707571029663, - "238": 1.1260290145874023, - "239": 1.1110936403274536, - "240": 1.23879873752594, - "241": 1.2753078937530518, - "242": 1.4504419565200806, - "243": 1.3589779138565063, - "244": 1.1650276184082031, - "245": 1.2573984861373901, - "246": 1.3570982217788696, - "247": 1.2701681852340698, - "248": 1.1740875244140625, - "249": 1.1700470447540283, - "250": 1.244688868522644, - "251": 1.152511477470398, - "252": 1.1083216667175293, - "253": 1.1838114261627197, - "254": 1.3190199136734009, - "255": 1.2746427059173584, - "256": 1.272968053817749, - "257": 1.1549628973007202, - "258": 0.9561015963554382, - "259": 0.9624854922294617, - "260": 1.0395781993865967, - "261": 1.0860049724578857, - "262": 1.1191737651824951, - "263": 1.153929591178894, - "264": 1.173606276512146, - "265": 1.2698942422866821, - "266": 1.4086724519729614, - "267": 1.3684756755828857, - "268": 1.2066751718521118, - "269": 1.2254489660263062 - }, - "loss": { - "216": 2.507450580596924, - "217": 2.51652455329895, - "218": 2.531987428665161, - "219": 2.5652501583099365, - "220": 2.4923508167266846, - "221": 2.5162174701690674, - "222": 2.5088624954223633, - "223": 2.5503973960876465, - "224": 2.513488292694092, - "225": 2.513392448425293, - "226": 2.517491579055786, - "227": 2.541356086730957, - "228": 2.529808521270752, - "229": 2.549508571624756, - "230": 2.4958877563476562, - "231": 2.4786903858184814, - "232": 2.512308120727539, - "233": 2.5520105361938477, - "234": 2.4845728874206543, - "235": 2.4899091720581055, - "236": 2.4968483448028564, - "237": 2.494290828704834, - "238": 2.5014572143554688, - "239": 2.512244701385498, - "240": 2.5045242309570312, - "241": 2.524292230606079, - "242": 2.5382938385009766, - "243": 2.568535327911377, - "244": 2.5154201984405518, - "245": 2.5185720920562744, - "246": 2.509398937225342, - "247": 2.531099319458008, - "248": 2.499382972717285, - "249": 2.494936466217041, - "250": 2.497504711151123, - "251": 2.4962716102600098, - "252": 2.4703593254089355, - "253": 2.4983773231506348, - "254": 2.4947195053100586, - "255": 2.540478467941284, - "256": 2.4801793098449707, - "257": 2.5115981101989746, - "258": 2.4627747535705566, - "259": 2.4627232551574707, - "260": 2.4589145183563232, - "261": 2.486844539642334, - "262": 2.4743926525115967, - "263": 2.4681613445281982, - "264": 2.462559700012207, - "265": 2.4892711639404297, - "266": 2.491407871246338, - "267": 2.4934535026550293, - "268": 2.4579625129699707, - "269": 2.473228931427002 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 0.204462, - 0.203128, - 0.19839, - 0.202248, - 0.205702, - 0.204318, - 0.200176, - 0.202264, - 0.204671, - 0.204629, - 0.201003, - 0.198991, - 0.197913, - 0.20175, - 0.206533, - 0.205137, - 0.200964, - 0.200201, - 0.20438, - 0.206067, - 0.205895, - 0.203722, - 0.20389, - 0.204212, - 0.201713, - 0.201073, - 0.197411, - 0.199574, - 0.203213, - 0.201408, - 0.199278, - 0.201213, - 0.202966, - 0.203024, - 0.201559, - 0.203367, - 0.204091, - 0.202773, - 0.200006, - 0.20117, - 0.20089, - 0.203388, - 0.20675, - 0.206644, - 0.2053, - 0.20457, - 0.203904, - 0.203214, - 0.202806, - 0.201002, - 0.198043, - 0.198938, - 0.202128, - 0.201826 - ], - "train_epoch_time": 4.840526580810547, - "train_loss": 2.4780933330869743, - "train_score": 0.25790553260567883, - "val_loss": 2.5173659404027613, - "val_score": 0.2527671138126185 - }, - { - "epoch": 5, - "grad_norm": 1.0441583395004272, - "learning_rate": 0.215, - "model_norm": 87.5417251586914, - "step_logs": { - "grad_norm": { - "270": 1.4257049560546875, - "271": 1.4027704000473022, - "272": 1.2084821462631226, - "273": 1.3101686239242554, - "274": 1.5494297742843628, - "275": 1.5516151189804077, - "276": 1.2794824838638306, - "277": 1.0849683284759521, - "278": 1.3223330974578857, - "279": 1.2245690822601318, - "280": 0.9998918771743774, - "281": 0.9589459300041199, - "282": 1.0402448177337646, - "283": 1.1229039430618286, - "284": 1.0915417671203613, - "285": 1.1122839450836182, - "286": 1.222483515739441, - "287": 1.3077948093414307, - "288": 1.4777703285217285, - "289": 1.2915337085723877, - "290": 1.184860110282898, - "291": 1.3461042642593384, - "292": 1.8270072937011719, - "293": 1.5730558633804321, - "294": 1.1169146299362183, - "295": 1.2847164869308472, - "296": 1.4979956150054932, - "297": 1.4181334972381592, - "298": 1.3067734241485596, - "299": 1.2265006303787231, - "300": 1.2106889486312866, - "301": 1.155633568763733, - "302": 1.1394784450531006, - "303": 1.2937053442001343, - "304": 1.1369987726211548, - "305": 1.0279014110565186, - "306": 1.3103567361831665, - "307": 1.5478700399398804, - "308": 2.0410711765289307, - "309": 1.5554461479187012, - "310": 1.065980076789856, - "311": 1.0688998699188232, - "312": 1.1154381036758423, - "313": 1.3959089517593384, - "314": 1.3033171892166138, - "315": 1.1002565622329712, - "316": 1.0401891469955444, - "317": 1.254061222076416, - "318": 1.4104152917861938, - "319": 1.422568440437317, - "320": 1.5920617580413818, - "321": 1.3658921718597412, - "322": 1.0920586585998535, - "323": 1.0441583395004272 - }, - "loss": { - "270": 2.4630017280578613, - "271": 2.5044703483581543, - "272": 2.46856689453125, - "273": 2.481046199798584, - "274": 2.509547710418701, - "275": 2.5006208419799805, - "276": 2.486436605453491, - "277": 2.4444892406463623, - "278": 2.4406542778015137, - "279": 2.4858808517456055, - "280": 2.427652597427368, - "281": 2.425168752670288, - "282": 2.430178642272949, - "283": 2.4459736347198486, - "284": 2.411555767059326, - "285": 2.4211831092834473, - "286": 2.424304962158203, - "287": 2.430389404296875, - "288": 2.446146249771118, - "289": 2.4616544246673584, - "290": 2.4561920166015625, - "291": 2.4272093772888184, - "292": 2.466007709503174, - "293": 2.524806499481201, - "294": 2.4194936752319336, - "295": 2.439279556274414, - "296": 2.4590671062469482, - "297": 2.4588913917541504, - "298": 2.471344470977783, - "299": 2.4302077293395996, - "300": 2.4199886322021484, - "301": 2.4159021377563477, - "302": 2.401740789413452, - "303": 2.4411728382110596, - "304": 2.4191832542419434, - "305": 2.391284465789795, - "306": 2.388394594192505, - "307": 2.481267213821411, - "308": 2.495596408843994, - "309": 2.5316739082336426, - "310": 2.4105443954467773, - "311": 2.408486843109131, - "312": 2.395228624343872, - "313": 2.416781425476074, - "314": 2.440106153488159, - "315": 2.4157421588897705, - "316": 2.391627788543701, - "317": 2.411679744720459, - "318": 2.4394097328186035, - "319": 2.4460887908935547, - "320": 2.437913656234741, - "321": 2.482391595840454, - "322": 2.4095778465270996, - "323": 2.383333683013916 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 0.19748, - 0.198255, - 0.202144, - 0.200116, - 0.194951, - 0.194835, - 0.200789, - 0.204418, - 0.199626, - 0.201907, - 0.205885, - 0.206579, - 0.205179, - 0.203711, - 0.204157, - 0.203805, - 0.201638, - 0.199879, - 0.196173, - 0.200402, - 0.202554, - 0.199028, - 0.187689, - 0.194507, - 0.203709, - 0.200422, - 0.195793, - 0.197624, - 0.200134, - 0.201586, - 0.201857, - 0.20294, - 0.203191, - 0.200242, - 0.20332, - 0.205251, - 0.199576, - 0.194781, - 0.182288, - 0.19497, - 0.20463, - 0.204568, - 0.203629, - 0.197852, - 0.200031, - 0.20401, - 0.205029, - 0.200916, - 0.197671, - 0.19744, - 0.193386, - 0.198928, - 0.204139, - 0.204923 - ], - "train_epoch_time": 4.843405485153198, - "train_loss": 2.3760311295004453, - "train_score": 0.29545709300930567, - "val_loss": 2.432093657253804, - "val_score": 0.28322779143445115 - }, - { - "epoch": 6, - "grad_norm": 1.1946622133255005, - "learning_rate": 0.215, - "model_norm": 87.57019805908203, - "step_logs": { - "grad_norm": { - "324": 1.209341287612915, - "325": 1.2679269313812256, - "326": 1.4041210412979126, - "327": 1.3800965547561646, - "328": 1.187725305557251, - "329": 1.2039364576339722, - "330": 1.3025007247924805, - "331": 1.2542200088500977, - "332": 1.2112531661987305, - "333": 1.1725139617919922, - "334": 1.1050729751586914, - "335": 1.1464771032333374, - "336": 1.2001454830169678, - "337": 1.114606499671936, - "338": 1.0524088144302368, - "339": 1.194870114326477, - "340": 1.422607183456421, - "341": 1.3638899326324463, - "342": 1.287209391593933, - "343": 1.2944172620773315, - "344": 1.3072091341018677, - "345": 1.230619192123413, - "346": 1.1629935503005981, - "347": 1.3371115922927856, - "348": 1.3612323999404907, - "349": 1.4867256879806519, - "350": 1.2252507209777832, - "351": 1.1664153337478638, - "352": 1.478360652923584, - "353": 1.4365437030792236, - "354": 1.2908767461776733, - "355": 1.244454026222229, - "356": 1.1970903873443604, - "357": 1.2073019742965698, - "358": 1.1434781551361084, - "359": 1.2041680812835693, - "360": 1.3366196155548096, - "361": 1.353727102279663, - "362": 1.359007477760315, - "363": 1.4227417707443237, - "364": 1.2858036756515503, - "365": 1.372061014175415, - "366": 1.3342736959457397, - "367": 1.3146791458129883, - "368": 1.150631308555603, - "369": 1.3066638708114624, - "370": 1.6268926858901978, - "371": 1.4209587574005127, - "372": 1.132925033569336, - "373": 1.2390443086624146, - "374": 1.3550221920013428, - "375": 1.1910380125045776, - "376": 1.0154519081115723, - "377": 1.1946622133255005 - }, - "loss": { - "324": 2.3801145553588867, - "325": 2.4180331230163574, - "326": 2.3995344638824463, - "327": 2.4644646644592285, - "328": 2.3831417560577393, - "329": 2.4095840454101562, - "330": 2.392375946044922, - "331": 2.441563367843628, - "332": 2.373298168182373, - "333": 2.3981404304504395, - "334": 2.3750412464141846, - "335": 2.372262954711914, - "336": 2.3678417205810547, - "337": 2.3876588344573975, - "338": 2.358581066131592, - "339": 2.3888301849365234, - "340": 2.393829107284546, - "341": 2.4342761039733887, - "342": 2.3749585151672363, - "343": 2.4016263484954834, - "344": 2.4009885787963867, - "345": 2.397040605545044, - "346": 2.389812469482422, - "347": 2.3754165172576904, - "348": 2.388915777206421, - "349": 2.398926258087158, - "350": 2.3806843757629395, - "351": 2.3700313568115234, - "352": 2.3611607551574707, - "353": 2.404709815979004, - "354": 2.401388168334961, - "355": 2.3730459213256836, - "356": 2.344261884689331, - "357": 2.3675432205200195, - "358": 2.357637405395508, - "359": 2.3717997074127197, - "360": 2.3724632263183594, - "361": 2.3820204734802246, - "362": 2.3731064796447754, - "363": 2.389777183532715, - "364": 2.3574070930480957, - "365": 2.366885185241699, - "366": 2.3855202198028564, - "367": 2.344898223876953, - "368": 2.3641915321350098, - "369": 2.3563342094421387, - "370": 2.3924169540405273, - "371": 2.4021754264831543, - "372": 2.343140125274658, - "373": 2.3370323181152344, - "374": 2.318009614944458, - "375": 2.394998073577881, - "376": 2.328307628631592, - "377": 2.349825143814087 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 0.201678, - 0.200659, - 0.197551, - 0.198508, - 0.202137, - 0.201941, - 0.199771, - 0.201073, - 0.201603, - 0.202519, - 0.203739, - 0.202914, - 0.201804, - 0.203611, - 0.204668, - 0.20202, - 0.197088, - 0.198679, - 0.2, - 0.2, - 0.19972, - 0.201326, - 0.202669, - 0.198906, - 0.198453, - 0.195624, - 0.201351, - 0.202503, - 0.195543, - 0.196841, - 0.200075, - 0.200905, - 0.201743, - 0.201654, - 0.202903, - 0.201741, - 0.198899, - 0.198577, - 0.198401, - 0.197057, - 0.199927, - 0.198065, - 0.199032, - 0.199215, - 0.202792, - 0.199463, - 0.192148, - 0.197183, - 0.203044, - 0.200819, - 0.198129, - 0.20213, - 0.205229, - 0.201823 - ], - "train_epoch_time": 4.841535329818726, - "train_loss": 2.3503131663269086, - "train_score": 0.3076892037388242, - "val_loss": 2.3986090609181487, - "val_score": 0.29954793368360344 - }, - { - "epoch": 7, - "grad_norm": 1.055579662322998, - "learning_rate": 0.215, - "model_norm": 87.59964752197266, - "step_logs": { - "grad_norm": { - "378": 1.3451377153396606, - "379": 1.1908732652664185, - "380": 1.2701956033706665, - "381": 1.2041716575622559, - "382": 1.0092896223068237, - "383": 1.0356189012527466, - "384": 1.1023406982421875, - "385": 1.0996164083480835, - "386": 1.043758511543274, - "387": 1.098724603652954, - "388": 1.2740857601165771, - "389": 1.294141173362732, - "390": 1.4785574674606323, - "391": 1.2953040599822998, - "392": 1.2396036386489868, - "393": 1.4562002420425415, - "394": 1.3477267026901245, - "395": 1.2786006927490234, - "396": 1.4313055276870728, - "397": 1.4589734077453613, - "398": 1.3237029314041138, - "399": 1.2125753164291382, - "400": 1.0950778722763062, - "401": 1.11845064163208, - "402": 1.1946892738342285, - "403": 1.3943697214126587, - "404": 1.3681559562683105, - "405": 1.3384937047958374, - "406": 1.1290283203125, - "407": 1.0381335020065308, - "408": 1.0361014604568481, - "409": 1.1948539018630981, - "410": 1.45245361328125, - "411": 1.2811055183410645, - "412": 1.1782766580581665, - "413": 1.1835105419158936, - "414": 1.1730504035949707, - "415": 1.244701862335205, - "416": 1.2350660562515259, - "417": 1.165492296218872, - "418": 1.124158501625061, - "419": 1.1993390321731567, - "420": 1.2762608528137207, - "421": 1.2548555135726929, - "422": 1.0719361305236816, - "423": 1.0902810096740723, - "424": 1.0921398401260376, - "425": 1.0465158224105835, - "426": 1.0896083116531372, - "427": 1.1653156280517578, - "428": 1.2361220121383667, - "429": 0.9908849000930786, - "430": 0.8620224595069885, - "431": 1.055579662322998 - }, - "loss": { - "378": 2.3538331985473633, - "379": 2.360398292541504, - "380": 2.3280625343322754, - "381": 2.3678481578826904, - "382": 2.3326385021209717, - "383": 2.313075065612793, - "384": 2.3166229724884033, - "385": 2.320791721343994, - "386": 2.3170175552368164, - "387": 2.3302412033081055, - "388": 2.315654754638672, - "389": 2.3693933486938477, - "390": 2.306424617767334, - "391": 2.3838586807250977, - "392": 2.3433754444122314, - "393": 2.338879108428955, - "394": 2.3733837604522705, - "395": 2.3242640495300293, - "396": 2.356722116470337, - "397": 2.3543171882629395, - "398": 2.369826078414917, - "399": 2.3480446338653564, - "400": 2.3046045303344727, - "401": 2.3149924278259277, - "402": 2.3065247535705566, - "403": 2.302978038787842, - "404": 2.333252429962158, - "405": 2.3221964836120605, - "406": 2.3024978637695312, - "407": 2.278294086456299, - "408": 2.2879509925842285, - "409": 2.298394203186035, - "410": 2.336047649383545, - "411": 2.3536183834075928, - "412": 2.305198907852173, - "413": 2.3184239864349365, - "414": 2.3139631748199463, - "415": 2.3176610469818115, - "416": 2.2976155281066895, - "417": 2.315744400024414, - "418": 2.281510591506958, - "419": 2.301280975341797, - "420": 2.307081699371338, - "421": 2.312774896621704, - "422": 2.3024771213531494, - "423": 2.2889058589935303, - "424": 2.284330368041992, - "425": 2.2692503929138184, - "426": 2.2893426418304443, - "427": 2.301239013671875, - "428": 2.283637046813965, - "429": 2.302217483520508, - "430": 2.2413077354431152, - "431": 2.2743871212005615 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 0.198589, - 0.201956, - 0.200093, - 0.201721, - 0.205359, - 0.204792, - 0.203524, - 0.203597, - 0.204656, - 0.203658, - 0.199933, - 0.199817, - 0.195119, - 0.199877, - 0.200842, - 0.195906, - 0.198656, - 0.199886, - 0.196626, - 0.195954, - 0.199169, - 0.20144, - 0.203611, - 0.203197, - 0.20159, - 0.197111, - 0.19793, - 0.198534, - 0.202923, - 0.204596, - 0.204676, - 0.201542, - 0.195975, - 0.200007, - 0.201927, - 0.201888, - 0.202082, - 0.200586, - 0.200678, - 0.202247, - 0.202917, - 0.201463, - 0.199833, - 0.200337, - 0.204053, - 0.203632, - 0.203573, - 0.204396, - 0.203647, - 0.202175, - 0.200573, - 0.205575, - 0.207601, - 0.204243 - ], - "train_epoch_time": 4.840423345565796, - "train_loss": 2.297888960421, - "train_score": 0.3165317879089153, - "val_loss": 2.355872270844697, - "val_score": 0.3031581878046217 - }, - { - "epoch": 8, - "grad_norm": 1.1550438404083252, - "learning_rate": 0.215, - "model_norm": 87.62825012207031, - "step_logs": { - "grad_norm": { - "432": 1.365153193473816, - "433": 1.3560038805007935, - "434": 1.368341088294983, - "435": 1.3174641132354736, - "436": 1.146405577659607, - "437": 1.0643583536148071, - "438": 1.0985276699066162, - "439": 1.2951935529708862, - "440": 1.3509509563446045, - "441": 1.3641767501831055, - "442": 1.2514740228652954, - "443": 1.1132721900939941, - "444": 1.1015610694885254, - "445": 1.2110471725463867, - "446": 1.303719401359558, - "447": 1.410711407661438, - "448": 1.2663205862045288, - "449": 1.5121196508407593, - "450": 1.2035973072052002, - "451": 1.2652722597122192, - "452": 1.3201920986175537, - "453": 1.1836808919906616, - "454": 1.1488877534866333, - "455": 1.12118661403656, - "456": 1.0805782079696655, - "457": 1.07243812084198, - "458": 1.1381369829177856, - "459": 1.186643362045288, - "460": 1.3064751625061035, - "461": 1.2733253240585327, - "462": 1.2638254165649414, - "463": 1.2106198072433472, - "464": 1.1537352800369263, - "465": 1.130597710609436, - "466": 1.1394665241241455, - "467": 1.2455463409423828, - "468": 1.1818020343780518, - "469": 1.0450785160064697, - "470": 1.0058342218399048, - "471": 1.0737264156341553, - "472": 1.1295397281646729, - "473": 1.4419820308685303, - "474": 1.3979628086090088, - "475": 1.0647788047790527, - "476": 0.9907328486442566, - "477": 1.1762112379074097, - "478": 1.1809062957763672, - "479": 1.027660846710205, - "480": 1.0184491872787476, - "481": 1.176929235458374, - "482": 1.2625895738601685, - "483": 1.3737468719482422, - "484": 1.1834872961044312, - "485": 1.1550438404083252 - }, - "loss": { - "432": 2.3010950088500977, - "433": 2.330348014831543, - "434": 2.313136100769043, - "435": 2.305276870727539, - "436": 2.3067116737365723, - "437": 2.2573041915893555, - "438": 2.2753310203552246, - "439": 2.26004695892334, - "440": 2.326977252960205, - "441": 2.2865724563598633, - "442": 2.302429676055908, - "443": 2.262026309967041, - "444": 2.3001110553741455, - "445": 2.2458291053771973, - "446": 2.293147563934326, - "447": 2.274738311767578, - "448": 2.3027405738830566, - "449": 2.3040218353271484, - "450": 2.344588279724121, - "451": 2.261910915374756, - "452": 2.3321967124938965, - "453": 2.269744873046875, - "454": 2.2844362258911133, - "455": 2.259955883026123, - "456": 2.2510881423950195, - "457": 2.2372021675109863, - "458": 2.2662062644958496, - "459": 2.2566254138946533, - "460": 2.279414176940918, - "461": 2.288125991821289, - "462": 2.2763781547546387, - "463": 2.271963119506836, - "464": 2.252501964569092, - "465": 2.269219160079956, - "466": 2.2501087188720703, - "467": 2.260176658630371, - "468": 2.2875587940216064, - "469": 2.252750873565674, - "470": 2.235483169555664, - "471": 2.267218589782715, - "472": 2.2557997703552246, - "473": 2.2781195640563965, - "474": 2.320575714111328, - "475": 2.249013662338257, - "476": 2.237639904022217, - "477": 2.2713351249694824, - "478": 2.2765660285949707, - "479": 2.2186012268066406, - "480": 2.2426602840423584, - "481": 2.236692428588867, - "482": 2.2795021533966064, - "483": 2.296290397644043, - "484": 2.2837154865264893, - "485": 2.263822317123413 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 0.197781, - 0.198189, - 0.197789, - 0.198901, - 0.202592, - 0.203994, - 0.203403, - 0.199112, - 0.198282, - 0.197703, - 0.200349, - 0.203041, - 0.203461, - 0.200897, - 0.199133, - 0.196518, - 0.200026, - 0.194274, - 0.201609, - 0.199798, - 0.199012, - 0.201621, - 0.202427, - 0.202869, - 0.203645, - 0.20374, - 0.202554, - 0.201485, - 0.198982, - 0.199782, - 0.19992, - 0.201057, - 0.202158, - 0.202724, - 0.202442, - 0.200226, - 0.201758, - 0.20435, - 0.205025, - 0.203856, - 0.202677, - 0.195789, - 0.197151, - 0.203948, - 0.205318, - 0.201787, - 0.201717, - 0.204534, - 0.204817, - 0.20158, - 0.199967, - 0.197547, - 0.201702, - 0.202191 - ], - "train_epoch_time": 4.840659856796265, - "train_loss": 2.265298646354949, - "train_score": 0.33486594321738017, - "val_loss": 2.3233141140877587, - "val_score": 0.31987747632129593 - }, - { - "epoch": 9, - "grad_norm": 1.2153007984161377, - "learning_rate": 0.215, - "model_norm": 87.65998077392578, - "step_logs": { - "grad_norm": { - "486": 1.233638882637024, - "487": 1.2141677141189575, - "488": 1.2064309120178223, - "489": 1.191043734550476, - "490": 1.1241681575775146, - "491": 1.0938643217086792, - "492": 1.2959375381469727, - "493": 1.3498784303665161, - "494": 1.279057502746582, - "495": 1.1091612577438354, - "496": 1.1037935018539429, - "497": 1.1127700805664062, - "498": 1.1932390928268433, - "499": 1.2705918550491333, - "500": 1.3297266960144043, - "501": 1.190787434577942, - "502": 1.0845469236373901, - "503": 1.155734896659851, - "504": 1.386297583580017, - "505": 1.594787359237671, - "506": 1.3469895124435425, - "507": 1.053652048110962, - "508": 1.1587133407592773, - "509": 1.2236223220825195, - "510": 1.1223140954971313, - "511": 1.035866141319275, - "512": 1.042704701423645, - "513": 1.0608100891113281, - "514": 1.095118761062622, - "515": 1.1616548299789429, - "516": 1.1331512928009033, - "517": 1.0162184238433838, - "518": 1.0010215044021606, - "519": 1.3247815370559692, - "520": 1.68272864818573, - "521": 1.511479377746582, - "522": 1.2918812036514282, - "523": 1.2313283681869507, - "524": 1.375147819519043, - "525": 1.3673951625823975, - "526": 1.2110605239868164, - "527": 1.150638461112976, - "528": 1.1753302812576294, - "529": 1.12422776222229, - "530": 1.2279469966888428, - "531": 1.2788223028182983, - "532": 1.2954473495483398, - "533": 1.2269160747528076, - "534": 1.0226807594299316, - "535": 1.04542875289917, - "536": 1.160315752029419, - "537": 1.2120037078857422, - "538": 1.2115342617034912, - "539": 1.2153007984161377 - }, - "loss": { - "486": 2.271240234375, - "487": 2.2528562545776367, - "488": 2.2755236625671387, - "489": 2.2544641494750977, - "490": 2.235544443130493, - "491": 2.2373178005218506, - "492": 2.252967357635498, - "493": 2.288891315460205, - "494": 2.2466163635253906, - "495": 2.2325289249420166, - "496": 2.2303414344787598, - "497": 2.237607717514038, - "498": 2.219994068145752, - "499": 2.2615089416503906, - "500": 2.243333339691162, - "501": 2.2542519569396973, - "502": 2.2028679847717285, - "503": 2.2445478439331055, - "504": 2.240562677383423, - "505": 2.289966106414795, - "506": 2.2813310623168945, - "507": 2.2160959243774414, - "508": 2.2253313064575195, - "509": 2.2478623390197754, - "510": 2.2122726440429688, - "511": 2.2347638607025146, - "512": 2.200835704803467, - "513": 2.2348477840423584, - "514": 2.2356576919555664, - "515": 2.218681812286377, - "516": 2.206712245941162, - "517": 2.1890621185302734, - "518": 2.1951513290405273, - "519": 2.2241313457489014, - "520": 2.2651076316833496, - "521": 2.264780044555664, - "522": 2.2271225452423096, - "523": 2.2212657928466797, - "524": 2.248021125793457, - "525": 2.278855323791504, - "526": 2.2251272201538086, - "527": 2.2155160903930664, - "528": 2.2381434440612793, - "529": 2.230454206466675, - "530": 2.2266294956207275, - "531": 2.232898235321045, - "532": 2.2049005031585693, - "533": 2.2213573455810547, - "534": 2.1983048915863037, - "535": 2.211486339569092, - "536": 2.213494300842285, - "537": 2.2402520179748535, - "538": 2.2305421829223633, - "539": 2.2173516750335693 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 0.200554, - 0.20087, - 0.201168, - 0.201378, - 0.202683, - 0.203311, - 0.199049, - 0.198051, - 0.199391, - 0.202976, - 0.203075, - 0.202928, - 0.201133, - 0.199677, - 0.198206, - 0.201383, - 0.203329, - 0.202073, - 0.196849, - 0.192068, - 0.198066, - 0.204013, - 0.201905, - 0.200634, - 0.2026, - 0.204447, - 0.204158, - 0.20396, - 0.203278, - 0.201805, - 0.202343, - 0.204623, - 0.204943, - 0.198188, - 0.18953, - 0.193966, - 0.198971, - 0.200303, - 0.19717, - 0.197574, - 0.200774, - 0.202022, - 0.201622, - 0.202655, - 0.200411, - 0.199308, - 0.198739, - 0.200401, - 0.204539, - 0.204154, - 0.201805, - 0.200843, - 0.200796, - 0.200634 - ], - "train_epoch_time": 4.840081453323364, - "train_loss": 2.2086696682222606, - "train_score": 0.35030039462502754, - "val_loss": 2.2746743570102206, - "val_score": 0.3350764209860091 - }, - { - "epoch": 10, - "grad_norm": 1.1135305166244507, - "learning_rate": 0.215, - "model_norm": 87.6910629272461, - "step_logs": { - "grad_norm": { - "540": 1.1924806833267212, - "541": 1.2105599641799927, - "542": 1.3154199123382568, - "543": 1.315085768699646, - "544": 1.369020700454712, - "545": 1.4047414064407349, - "546": 1.2444480657577515, - "547": 1.0646004676818848, - "548": 1.072293758392334, - "549": 1.1664706468582153, - "550": 1.284791350364685, - "551": 1.4361481666564941, - "552": 1.721239686012268, - "553": 1.5366016626358032, - "554": 1.2604020833969116, - "555": 1.208587646484375, - "556": 1.2766797542572021, - "557": 1.367066502571106, - "558": 1.3245970010757446, - "559": 1.2827086448669434, - "560": 1.23690927028656, - "561": 1.0754646062850952, - "562": 1.1085658073425293, - "563": 1.15536367893219, - "564": 1.195185661315918, - "565": 1.199941635131836, - "566": 1.1892069578170776, - "567": 1.1026349067687988, - "568": 1.0973957777023315, - "569": 1.1464769840240479, - "570": 1.1958860158920288, - "571": 1.1105296611785889, - "572": 1.0993361473083496, - "573": 1.131054401397705, - "574": 1.1951467990875244, - "575": 1.2088366746902466, - "576": 1.193009853363037, - "577": 1.228737711906433, - "578": 1.2117626667022705, - "579": 1.3306143283843994, - "580": 1.319515347480774, - "581": 1.3573763370513916, - "582": 1.191267728805542, - "583": 1.140681505203247, - "584": 1.2900415658950806, - "585": 1.2409026622772217, - "586": 1.0990839004516602, - "587": 1.152770757675171, - "588": 1.1782840490341187, - "589": 1.2219172716140747, - "590": 1.2784974575042725, - "591": 1.2382553815841675, - "592": 1.2049047946929932, - "593": 1.1135305166244507 - }, - "loss": { - "540": 2.213007926940918, - "541": 2.212836265563965, - "542": 2.208578109741211, - "543": 2.2307372093200684, - "544": 2.1928415298461914, - "545": 2.2565293312072754, - "546": 2.224583148956299, - "547": 2.2006492614746094, - "548": 2.173956871032715, - "549": 2.208059310913086, - "550": 2.198654890060425, - "551": 2.2533912658691406, - "552": 2.217897891998291, - "553": 2.2837929725646973, - "554": 2.219416618347168, - "555": 2.191779136657715, - "556": 2.2330005168914795, - "557": 2.2229256629943848, - "558": 2.2020926475524902, - "559": 2.1982059478759766, - "560": 2.2267231941223145, - "561": 2.181406021118164, - "562": 2.1991848945617676, - "563": 2.1945605278015137, - "564": 2.1959872245788574, - "565": 2.1900382041931152, - "566": 2.2034618854522705, - "567": 2.199899196624756, - "568": 2.197300910949707, - "569": 2.1927294731140137, - "570": 2.1972815990448, - "571": 2.1866087913513184, - "572": 2.190741539001465, - "573": 2.1684489250183105, - "574": 2.176062822341919, - "575": 2.1910810470581055, - "576": 2.2188782691955566, - "577": 2.2018861770629883, - "578": 2.1876392364501953, - "579": 2.20829439163208, - "580": 2.2024965286254883, - "581": 2.231093406677246, - "582": 2.2048001289367676, - "583": 2.161870241165161, - "584": 2.213850975036621, - "585": 2.209580183029175, - "586": 2.2153677940368652, - "587": 2.149094820022583, - "588": 2.1656312942504883, - "589": 2.2077109813690186, - "590": 2.1999168395996094, - "591": 2.18867826461792, - "592": 2.178313732147217, - "593": 2.17741060256958 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 0.201108, - 0.200711, - 0.198299, - 0.19846, - 0.196908, - 0.196525, - 0.20003, - 0.203721, - 0.203433, - 0.201642, - 0.198944, - 0.19574, - 0.188003, - 0.193495, - 0.199639, - 0.200627, - 0.199357, - 0.197179, - 0.198038, - 0.198989, - 0.200212, - 0.203406, - 0.202816, - 0.201804, - 0.200948, - 0.200808, - 0.201123, - 0.202943, - 0.203038, - 0.201984, - 0.200941, - 0.202709, - 0.202964, - 0.202178, - 0.200829, - 0.200617, - 0.201131, - 0.20024, - 0.200531, - 0.19794, - 0.19816, - 0.19747, - 0.201086, - 0.201935, - 0.198925, - 0.200016, - 0.203095, - 0.201599, - 0.201138, - 0.200428, - 0.199097, - 0.199943, - 0.200626, - 0.202598 - ], - "train_epoch_time": 4.840947866439819, - "train_loss": 2.16334964568851, - "train_score": 0.3608444673686664, - "val_loss": 2.2341386738108167, - "val_score": 0.3450416187604998 - }, - { - "epoch": 11, - "grad_norm": 1.312681794166565, - "learning_rate": 0.215, - "model_norm": 87.7225341796875, - "step_logs": { - "grad_norm": { - "594": 1.008500099182129, - "595": 1.0446864366531372, - "596": 1.1473726034164429, - "597": 1.2513790130615234, - "598": 1.241938829421997, - "599": 1.3187003135681152, - "600": 1.3406378030776978, - "601": 1.1958459615707397, - "602": 1.173087477684021, - "603": 1.232597827911377, - "604": 1.2778140306472778, - "605": 1.2159404754638672, - "606": 1.1979267597198486, - "607": 1.2797011137008667, - "608": 1.179030418395996, - "609": 1.2141413688659668, - "610": 1.3440357446670532, - "611": 1.5633234977722168, - "612": 1.482245922088623, - "613": 1.1294686794281006, - "614": 1.0476479530334473, - "615": 1.1184669733047485, - "616": 1.195744514465332, - "617": 1.306510329246521, - "618": 1.3374907970428467, - "619": 1.2111223936080933, - "620": 1.089604139328003, - "621": 1.0641323328018188, - "622": 1.2376214265823364, - "623": 1.367131233215332, - "624": 1.3086905479431152, - "625": 1.2927950620651245, - "626": 1.3221948146820068, - "627": 1.1964964866638184, - "628": 1.1774989366531372, - "629": 1.1938879489898682, - "630": 1.165571689605713, - "631": 1.2354921102523804, - "632": 1.168449878692627, - "633": 1.0851802825927734, - "634": 1.0655956268310547, - "635": 1.0434281826019287, - "636": 1.0470391511917114, - "637": 1.1096960306167603, - "638": 1.128406047821045, - "639": 1.202164649963379, - "640": 1.2895901203155518, - "641": 1.3830327987670898, - "642": 1.2526203393936157, - "643": 1.2544100284576416, - "644": 1.288190245628357, - "645": 1.3050748109817505, - "646": 1.2612011432647705, - "647": 1.312681794166565 - }, - "loss": { - "594": 2.1835596561431885, - "595": 2.1635217666625977, - "596": 2.15520977973938, - "597": 2.2058725357055664, - "598": 2.162757635116577, - "599": 2.1719326972961426, - "600": 2.2152485847473145, - "601": 2.183899164199829, - "602": 2.1836190223693848, - "603": 2.188340187072754, - "604": 2.182103157043457, - "605": 2.1895699501037598, - "606": 2.1926074028015137, - "607": 2.1965651512145996, - "608": 2.1853103637695312, - "609": 2.175611972808838, - "610": 2.1703147888183594, - "611": 2.2041213512420654, - "612": 2.2091188430786133, - "613": 2.1748290061950684, - "614": 2.1578235626220703, - "615": 2.12998628616333, - "616": 2.177206516265869, - "617": 2.161860942840576, - "618": 2.1763672828674316, - "619": 2.1999690532684326, - "620": 2.154531478881836, - "621": 2.1640496253967285, - "622": 2.1672894954681396, - "623": 2.2073581218719482, - "624": 2.1857070922851562, - "625": 2.1778488159179688, - "626": 2.15925931930542, - "627": 2.1949963569641113, - "628": 2.1528680324554443, - "629": 2.15946888923645, - "630": 2.137371301651001, - "631": 2.164529323577881, - "632": 2.1640875339508057, - "633": 2.15228271484375, - "634": 2.143721580505371, - "635": 2.1501126289367676, - "636": 2.116665840148926, - "637": 2.1433191299438477, - "638": 2.146568536758423, - "639": 2.1621246337890625, - "640": 2.1805591583251953, - "641": 2.1786248683929443, - "642": 2.1812148094177246, - "643": 2.1379170417785645, - "644": 2.1585628986358643, - "645": 2.1806044578552246, - "646": 2.1574254035949707, - "647": 2.160951852798462 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 0.204748, - 0.203941, - 0.201752, - 0.199756, - 0.199691, - 0.197961, - 0.197752, - 0.200861, - 0.201358, - 0.200068, - 0.198993, - 0.200449, - 0.200868, - 0.199047, - 0.201239, - 0.200403, - 0.197343, - 0.192102, - 0.194234, - 0.202247, - 0.203853, - 0.202232, - 0.200823, - 0.198179, - 0.197545, - 0.200621, - 0.202976, - 0.20355, - 0.199819, - 0.197063, - 0.198297, - 0.198615, - 0.197786, - 0.200913, - 0.201079, - 0.200755, - 0.201249, - 0.199849, - 0.201345, - 0.203057, - 0.203417, - 0.203901, - 0.203661, - 0.202493, - 0.202112, - 0.200587, - 0.198709, - 0.196458, - 0.199567, - 0.199236, - 0.198588, - 0.198346, - 0.199211, - 0.198025 - ], - "train_epoch_time": 4.842037916183472, - "train_loss": 2.1703756035485946, - "train_score": 0.36013719511339964, - "val_loss": 2.2594807024801367, - "val_score": 0.33866425164805214 - }, - { - "epoch": 12, - "grad_norm": 0.8645442128181458, - "learning_rate": 0.215, - "model_norm": 87.7514877319336, - "step_logs": { - "grad_norm": { - "648": 1.3653533458709717, - "649": 1.3219352960586548, - "650": 1.5024542808532715, - "651": 1.3224575519561768, - "652": 1.1388252973556519, - "653": 1.1506034135818481, - "654": 1.1217355728149414, - "655": 1.1146156787872314, - "656": 1.0351256132125854, - "657": 1.056700348854065, - "658": 1.1291779279708862, - "659": 1.0608233213424683, - "660": 1.0120782852172852, - "661": 1.086010217666626, - "662": 1.2112925052642822, - "663": 1.2657173871994019, - "664": 1.3186125755310059, - "665": 1.314749836921692, - "666": 1.1181504726409912, - "667": 0.9255827069282532, - "668": 0.8506559133529663, - "669": 0.8628355860710144, - "670": 0.9333071708679199, - "671": 0.9687804579734802, - "672": 0.9678217768669128, - "673": 0.8736199736595154, - "674": 0.8916498422622681, - "675": 0.9800131320953369, - "676": 1.0898487567901611, - "677": 1.135873794555664, - "678": 1.0493063926696777, - "679": 0.9587380886077881, - "680": 0.9366052746772766, - "681": 1.002776026725769, - "682": 0.9248806238174438, - "683": 0.86803138256073, - "684": 0.9090822339057922, - "685": 0.9373408555984497, - "686": 0.9818825721740723, - "687": 0.9594290256500244, - "688": 0.8473807573318481, - "689": 0.7880852818489075, - "690": 0.831360399723053, - "691": 0.8623039722442627, - "692": 0.859737753868103, - "693": 0.8209106922149658, - "694": 0.7813437581062317, - "695": 0.8045181035995483, - "696": 0.9573120474815369, - "697": 0.9565431475639343, - "698": 0.9431483745574951, - "699": 0.9549505710601807, - "700": 0.9098271727561951, - "701": 0.8645442128181458 - }, - "loss": { - "648": 2.1861355304718018, - "649": 2.2000722885131836, - "650": 2.1804940700531006, - "651": 2.205758810043335, - "652": 2.141606330871582, - "653": 2.12888765335083, - "654": 2.1415884494781494, - "655": 2.1430020332336426, - "656": 2.1317567825317383, - "657": 2.122035026550293, - "658": 2.1527321338653564, - "659": 2.117032527923584, - "660": 2.106912612915039, - "661": 2.1272120475769043, - "662": 2.1285524368286133, - "663": 2.127065420150757, - "664": 2.1504104137420654, - "665": 2.1715259552001953, - "666": 2.135071039199829, - "667": 2.128135919570923, - "668": 2.089106321334839, - "669": 2.098954916000366, - "670": 2.121504306793213, - "671": 2.1078615188598633, - "672": 2.0789291858673096, - "673": 2.078354835510254, - "674": 2.0973331928253174, - "675": 2.0997142791748047, - "676": 2.1036458015441895, - "677": 2.1171813011169434, - "678": 2.126020669937134, - "679": 2.066249370574951, - "680": 2.073198080062866, - "681": 2.0999155044555664, - "682": 2.1347076892852783, - "683": 2.081172466278076, - "684": 2.0877301692962646, - "685": 2.087977170944214, - "686": 2.1000242233276367, - "687": 2.0988271236419678, - "688": 2.0919158458709717, - "689": 2.0790212154388428, - "690": 2.0437426567077637, - "691": 2.067035675048828, - "692": 2.082629919052124, - "693": 2.0848536491394043, - "694": 2.079010009765625, - "695": 2.1066741943359375, - "696": 2.0822031497955322, - "697": 2.07090163230896, - "698": 2.0942249298095703, - "699": 2.0699968338012695, - "700": 2.0855460166931152, - "701": 2.0823583602905273 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 0.196946, - 0.196959, - 0.191317, - 0.194728, - 0.197172, - 0.195686, - 0.195167, - 0.194134, - 0.194398, - 0.192758, - 0.190356, - 0.190267, - 0.189885, - 0.18747, - 0.183966, - 0.18174, - 0.179689, - 0.178748, - 0.180984, - 0.182801, - 0.1825, - 0.181119, - 0.178976, - 0.177198, - 0.175891, - 0.175947, - 0.174523, - 0.172114, - 0.169338, - 0.167503, - 0.167583, - 0.167385, - 0.166455, - 0.164462, - 0.164285, - 0.163559, - 0.161865, - 0.1603, - 0.158578, - 0.157599, - 0.157536, - 0.156825, - 0.155083, - 0.15357, - 0.152371, - 0.151476, - 0.150548, - 0.149127, - 0.146421, - 0.14516, - 0.144093, - 0.142685, - 0.141877, - 0.141 - ], - "train_epoch_time": 4.842416763305664, - "train_loss": 2.0689756368804013, - "train_score": 0.38654949774530045, - "val_loss": 2.1623969784286623, - "val_score": 0.3602854122934878 - }, - { - "epoch": 13, - "grad_norm": 0.5363101959228516, - "learning_rate": 0.14333333333333334, - "model_norm": 87.76978302001953, - "step_logs": { - "grad_norm": { - "702": 0.8883509635925293, - "703": 0.8592520356178284, - "704": 0.769087016582489, - "705": 0.6632691621780396, - "706": 0.7133074402809143, - "707": 0.7297330498695374, - "708": 0.7723709344863892, - "709": 0.6901921629905701, - "710": 0.7359227538108826, - "711": 0.7262988090515137, - "712": 0.651531994342804, - "713": 0.7605281472206116, - "714": 0.9197530150413513, - "715": 0.9430732727050781, - "716": 0.8162515759468079, - "717": 0.7407421469688416, - "718": 0.7444875836372375, - "719": 0.7681788206100464, - "720": 0.7688164710998535, - "721": 0.7129867672920227, - "722": 0.645167887210846, - "723": 0.6192553639411926, - "724": 0.6808722019195557, - "725": 0.7311127185821533, - "726": 0.7050772905349731, - "727": 0.6581369638442993, - "728": 0.6261205077171326, - "729": 0.6191504001617432, - "730": 0.6982353925704956, - "731": 0.621631383895874, - "732": 0.5973417162895203, - "733": 0.5936336517333984, - "734": 0.5752581357955933, - "735": 0.575713038444519, - "736": 0.5697436928749084, - "737": 0.5716831684112549, - "738": 0.6064039468765259, - "739": 0.57911616563797, - "740": 0.5792168378829956, - "741": 0.5748128890991211, - "742": 0.5458971858024597, - "743": 0.5080474019050598, - "744": 0.5709893107414246, - "745": 0.5712481141090393, - "746": 0.5845890641212463, - "747": 0.6277804374694824, - "748": 0.6384970545768738, - "749": 0.6545924544334412, - "750": 0.5954940915107727, - "751": 0.6030061841011047, - "752": 0.6112725734710693, - "753": 0.5923959612846375, - "754": 0.5564180612564087, - "755": 0.5363101959228516 - }, - "loss": { - "702": 2.0729596614837646, - "703": 2.0633277893066406, - "704": 2.0597636699676514, - "705": 2.0561351776123047, - "706": 2.0582871437072754, - "707": 2.040992498397827, - "708": 2.048515796661377, - "709": 2.033328056335449, - "710": 2.052187442779541, - "711": 2.045327663421631, - "712": 2.022686004638672, - "713": 2.05265212059021, - "714": 2.0748143196105957, - "715": 2.0561935901641846, - "716": 2.0642476081848145, - "717": 2.0598320960998535, - "718": 2.0539934635162354, - "719": 2.0780582427978516, - "720": 2.059189796447754, - "721": 2.0391459465026855, - "722": 2.0503721237182617, - "723": 2.0561342239379883, - "724": 2.060713768005371, - "725": 2.0360946655273438, - "726": 2.0340356826782227, - "727": 2.0316989421844482, - "728": 2.05830717086792, - "729": 2.0323402881622314, - "730": 2.04949688911438, - "731": 2.0531349182128906, - "732": 2.0473358631134033, - "733": 2.030392646789551, - "734": 2.043651580810547, - "735": 2.0507802963256836, - "736": 2.0280494689941406, - "737": 2.0345144271850586, - "738": 2.025296688079834, - "739": 2.053833484649658, - "740": 2.0281972885131836, - "741": 2.020223617553711, - "742": 2.0417661666870117, - "743": 2.0308613777160645, - "744": 2.0394160747528076, - "745": 2.0192008018493652, - "746": 2.0360751152038574, - "747": 2.0164196491241455, - "748": 2.034536838531494, - "749": 2.03755784034729, - "750": 2.0405497550964355, - "751": 2.0115280151367188, - "752": 2.039904832839966, - "753": 2.0261573791503906, - "754": 2.0093178749084473, - "755": 2.014651298522949 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 0.139527, - 0.138488, - 0.137894, - 0.137305, - 0.13571, - 0.134303, - 0.132754, - 0.131971, - 0.130432, - 0.1292, - 0.128311, - 0.126441, - 0.124182, - 0.122734, - 0.122291, - 0.12143, - 0.12012, - 0.118736, - 0.117431, - 0.116403, - 0.115422, - 0.114233, - 0.112689, - 0.111163, - 0.109983, - 0.108876, - 0.107711, - 0.106421, - 0.104849, - 0.103821, - 0.102593, - 0.101293, - 0.100047, - 0.0987428, - 0.097444, - 0.0961346, - 0.0947338, - 0.093509, - 0.0921929, - 0.0908934, - 0.0896564, - 0.0884201, - 0.0869836, - 0.0856683, - 0.0843371, - 0.0829335, - 0.0816111, - 0.0802735, - 0.0790825, - 0.0777522, - 0.0764364, - 0.0751558, - 0.0738979, - 0.0726155 - ], - "train_epoch_time": 4.841723442077637, - "train_loss": 2.0200369407319956, - "train_score": 0.40081263441095394, - "val_loss": 2.1211726931014647, - "val_score": 0.3735514136050517 - }, - { - "epoch": 14, - "grad_norm": 0.49820977449417114, - "learning_rate": 0.07166666666666667, - "model_norm": 87.77592468261719, - "step_logs": { - "grad_norm": { - "756": 0.5221066474914551, - "757": 0.5132834911346436, - "758": 0.5706493258476257, - "759": 0.535114586353302, - "760": 0.5421645641326904, - "761": 0.5559265613555908, - "762": 0.526034951210022, - "763": 0.550947904586792, - "764": 0.5373929738998413, - "765": 0.5140271782875061, - "766": 0.5464844703674316, - "767": 0.549755334854126, - "768": 0.5052742958068848, - "769": 0.5164358019828796, - "770": 0.5272457599639893, - "771": 0.5559020042419434, - "772": 0.5261601805686951, - "773": 0.5093954205513, - "774": 0.5267736911773682, - "775": 0.5561476349830627, - "776": 0.5652735233306885, - "777": 0.4991748332977295, - "778": 0.5283232927322388, - "779": 0.5367175936698914, - "780": 0.5495347380638123, - "781": 0.5226330161094666, - "782": 0.5215216875076294, - "783": 0.5344538688659668, - "784": 0.5237383842468262, - "785": 0.49886298179626465, - "786": 0.5775716304779053, - "787": 0.4731007516384125, - "788": 0.5129083395004272, - "789": 0.5332512855529785, - "790": 0.5280617475509644, - "791": 0.5684983134269714, - "792": 0.5158213376998901, - "793": 0.47962474822998047, - "794": 0.5290770530700684, - "795": 0.5021599531173706, - "796": 0.5128231644630432, - "797": 0.5109617114067078, - "798": 0.484877347946167, - "799": 0.46217435598373413, - "800": 0.5111664533615112, - "801": 0.49971768260002136, - "802": 0.5418329834938049, - "803": 0.4835483133792877, - "804": 0.5274619460105896, - "805": 0.4836483299732208, - "806": 0.4814581871032715, - "807": 0.5172290802001953, - "808": 0.48542389273643494, - "809": 0.49820977449417114 - }, - "loss": { - "756": 1.9961459636688232, - "757": 2.016197443008423, - "758": 2.027376651763916, - "759": 2.009145498275757, - "760": 1.9886105060577393, - "761": 2.033357858657837, - "762": 2.028230667114258, - "763": 2.021200180053711, - "764": 2.0305662155151367, - "765": 2.0066072940826416, - "766": 2.0238027572631836, - "767": 2.0330982208251953, - "768": 2.0092780590057373, - "769": 1.9869295358657837, - "770": 2.0419557094573975, - "771": 2.0181655883789062, - "772": 2.033578395843506, - "773": 2.0078439712524414, - "774": 2.0236825942993164, - "775": 2.0226707458496094, - "776": 2.017505645751953, - "777": 2.0299360752105713, - "778": 1.9937987327575684, - "779": 2.0203371047973633, - "780": 1.9887791872024536, - "781": 1.9968193769454956, - "782": 2.0263595581054688, - "783": 2.0171070098876953, - "784": 2.040105104446411, - "785": 2.0039658546447754, - "786": 2.0116939544677734, - "787": 2.015235424041748, - "788": 2.0473623275756836, - "789": 2.015364646911621, - "790": 2.009763717651367, - "791": 2.0257201194763184, - "792": 2.0156891345977783, - "793": 2.0145578384399414, - "794": 2.0035433769226074, - "795": 2.0284392833709717, - "796": 1.9969291687011719, - "797": 2.025338888168335, - "798": 2.0100743770599365, - "799": 1.9956440925598145, - "800": 2.01975679397583, - "801": 2.017876148223877, - "802": 1.9919517040252686, - "803": 1.9907593727111816, - "804": 2.0122876167297363, - "805": 2.001923084259033, - "806": 2.0100719928741455, - "807": 2.010221481323242, - "808": 1.9846417903900146, - "809": 2.003322124481201 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 0.0713177, - 0.0700177, - 0.068632, - 0.0673603, - 0.0660342, - 0.0647111, - 0.0634281, - 0.0620857, - 0.0607855, - 0.0594883, - 0.0581445, - 0.0568269, - 0.055544, - 0.0542156, - 0.0528953, - 0.051555, - 0.0502596, - 0.0489496, - 0.0476218, - 0.0462862, - 0.0449628, - 0.0436789, - 0.0423433, - 0.0410217, - 0.0396948, - 0.0383866, - 0.0370681, - 0.0357426, - 0.0344263, - 0.0331108, - 0.031768, - 0.030473, - 0.0291429, - 0.0278157, - 0.0264944, - 0.0251654, - 0.0238513, - 0.0225327, - 0.0212031, - 0.0198828, - 0.0185575, - 0.0172339, - 0.0159111, - 0.0145874, - 0.0132602, - 0.0119356, - 0.010609, - 0.00928506, - 0.00795858, - 0.00663323, - 0.00530702, - 0.00398043, - 0.0026539, - 0.00132705 - ], - "train_epoch_time": 4.842728853225708, - "train_loss": 2.0071171917908504, - "train_score": 0.40409791955304797, - "val_loss": 2.1130431497685533, - "val_score": 0.3750403628146744 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:28:45.356462", - "final_model_norm": 87.77592468261719, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:27:03.811394", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 4.224915981292725, - "learning_rate": 2.15e-11, - "model_norm": 87.43364715576172, - "step_logs": { - "grad_norm": { - "0": 22.7664794921875, - "1": 23.4499454498291, - "2": 6.8804192543029785, - "3": 7.361940383911133, - "4": 20.986963272094727, - "5": 7.570193290710449, - "6": 5.781525611877441, - "7": 4.410154819488525, - "8": 3.9404349327087402, - "9": 7.543168067932129, - "10": 5.914219379425049, - "11": 6.213420391082764, - "12": 49.07276153564453, - "13": 3.979478359222412, - "14": 6.725850582122803, - "15": 31.08464241027832, - "16": 3.8007287979125977, - "17": 31.767662048339844, - "18": 3.544416666030884, - "19": 12.058910369873047, - "20": 4.551290512084961, - "21": 12.3528470993042, - "22": 5.656938552856445, - "23": 37.410377502441406, - "24": 3.778252363204956, - "25": 5.534794330596924, - "26": 3.278383255004883, - "27": 7.334910869598389, - "28": 4.0505690574646, - "29": 16.37187385559082, - "30": 5.312236309051514, - "31": 11.506085395812988, - "32": 4.718480587005615, - "33": 11.081938743591309, - "34": 4.557792663574219, - "35": 16.363351821899414, - "36": 3.371377468109131, - "37": 11.182097434997559, - "38": 6.526354789733887, - "39": 3.1674888134002686, - "40": 15.734713554382324, - "41": 5.038153648376465, - "42": 4.464365482330322, - "43": 2.5911953449249268, - "44": 5.960587501525879, - "45": 3.347862720489502, - "46": 3.0237228870391846, - "47": 1.7852531671524048, - "48": 2.6196510791778564, - "49": 3.746157169342041, - "50": 3.715843677520752, - "51": 2.8751227855682373, - "52": 2.096085548400879, - "53": 4.224915981292725 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.8451218605041504, - "3": 3.6794261932373047, - "4": 4.073674201965332, - "5": 4.087124824523926, - "6": 3.5744667053222656, - "7": 3.6075944900512695, - "8": 3.501760959625244, - "9": 3.513716459274292, - "10": 3.7672653198242188, - "11": 3.391489028930664, - "12": 3.6313395500183105, - "13": 3.4073574542999268, - "14": 3.5225577354431152, - "15": 3.5085549354553223, - "16": 3.3431077003479004, - "17": 3.6108317375183105, - "18": 3.094069004058838, - "19": 3.347799777984619, - "20": 3.0556721687316895, - "21": 3.530245780944824, - "22": 3.380490303039551, - "23": 4.97763204574585, - "24": 3.108004331588745, - "25": 3.260448455810547, - "26": 2.997645854949951, - "27": 3.225484848022461, - "28": 2.9407646656036377, - "29": 3.5613508224487305, - "30": 3.257999897003174, - "31": 3.193094491958618, - "32": 3.066549777984619, - "33": 3.2439041137695312, - "34": 3.0571742057800293, - "35": 3.4001336097717285, - "36": 2.9874606132507324, - "37": 3.6888062953948975, - "38": 3.831803321838379, - "39": 2.945518970489502, - "40": 3.4076015949249268, - "41": 3.117316246032715, - "42": 3.391188859939575, - "43": 3.0278728008270264, - "44": 3.1067731380462646, - "45": 3.3740577697753906, - "46": 3.062196731567383, - "47": 2.8193845748901367, - "48": 2.8466029167175293, - "49": 3.1355485916137695, - "50": 3.2099781036376953, - "51": 3.2593750953674316, - "52": 2.848984479904175, - "53": 3.01206636428833 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "step_size_list": [ - 2.15e-11, - 0.00341036, - 0.0081676, - 0.0117807, - 0.00891262, - 0.0186838, - 0.0230227, - 0.027841, - 0.0319624, - 0.0294668, - 0.0358446, - 0.0372671, - 0.00284935, - 0.0494733, - 0.0434172, - 0.00652726, - 0.0598968, - 0.00651789, - 0.0668894, - 0.0294479, - 0.0665895, - 0.0305938, - 0.0653424, - 0.00663597, - 0.0834276, - 0.0714279, - 0.0931337, - 0.0589858, - 0.0901287, - 0.0219054, - 0.0827623, - 0.0354201, - 0.0917634, - 0.0384964, - 0.0976807, - 0.02173, - 0.119585, - 0.0430407, - 0.0856324, - 0.130444, - 0.0237294, - 0.102633, - 0.117985, - 0.153443, - 0.0908815, - 0.146437, - 0.152707, - 0.181381, - 0.165279, - 0.143186, - 0.147018, - 0.16894, - 0.184426, - 0.131333 - ], - "train_epoch_time": 4.843566179275513, - "train_loss": 3.192642595709824, - "train_score": 0.19196556671449067, - "val_loss": 3.212363445936744, - "val_score": 0.18875574033993942 - }, - { - "epoch": 1, - "grad_norm": 1.5797064304351807, - "learning_rate": 0.215, - "model_norm": 87.45377349853516, - "step_logs": { - "grad_norm": { - "54": 2.7509515285491943, - "55": 2.1297144889831543, - "56": 2.7332794666290283, - "57": 2.204525947570801, - "58": 2.327118158340454, - "59": 2.423321008682251, - "60": 3.5714168548583984, - "61": 1.8112183809280396, - "62": 1.8024959564208984, - "63": 1.5884958505630493, - "64": 1.8096168041229248, - "65": 2.4035353660583496, - "66": 2.1440398693084717, - "67": 2.1138064861297607, - "68": 2.0728063583374023, - "69": 1.8628382682800293, - "70": 1.760241150856018, - "71": 1.7336857318878174, - "72": 3.0480399131774902, - "73": 1.9429903030395508, - "74": 1.538813591003418, - "75": 1.3420686721801758, - "76": 1.5077552795410156, - "77": 1.8091076612472534, - "78": 3.002013921737671, - "79": 1.5222104787826538, - "80": 1.3437747955322266, - "81": 1.3172543048858643, - "82": 1.4621851444244385, - "83": 1.7923827171325684, - "84": 1.7363418340682983, - "85": 1.4273935556411743, - "86": 1.4675780534744263, - "87": 1.8746922016143799, - "88": 1.5804861783981323, - "89": 1.6660451889038086, - "90": 1.8579421043395996, - "91": 1.568577766418457, - "92": 1.5494729280471802, - "93": 2.0584964752197266, - "94": 1.6027863025665283, - "95": 0.9710261821746826, - "96": 1.1346977949142456, - "97": 1.7505210638046265, - "98": 1.586259365081787, - "99": 1.5141584873199463, - "100": 1.4724820852279663, - "101": 2.00040340423584, - "102": 1.7313096523284912, - "103": 1.2881773710250854, - "104": 1.1386187076568604, - "105": 1.1593906879425049, - "106": 1.3849852085113525, - "107": 1.5797064304351807 - }, - "loss": { - "54": 3.1894376277923584, - "55": 2.894500255584717, - "56": 2.8264591693878174, - "57": 2.9473814964294434, - "58": 2.8314361572265625, - "59": 2.8074607849121094, - "60": 3.004971981048584, - "61": 2.9587607383728027, - "62": 2.744166135787964, - "63": 2.7421693801879883, - "64": 2.710906505584717, - "65": 2.782461166381836, - "66": 2.880190372467041, - "67": 2.7372612953186035, - "68": 2.8365378379821777, - "69": 2.7197723388671875, - "70": 2.76515793800354, - "71": 2.7187962532043457, - "72": 2.7817392349243164, - "73": 2.9693241119384766, - "74": 2.708263874053955, - "75": 2.6438140869140625, - "76": 2.653427839279175, - "77": 2.7037103176116943, - "78": 2.806432008743286, - "79": 2.8769922256469727, - "80": 2.66206693649292, - "81": 2.6320762634277344, - "82": 2.6135997772216797, - "83": 2.6842079162597656, - "84": 2.6965930461883545, - "85": 2.662733554840088, - "86": 2.6265344619750977, - "87": 2.6330149173736572, - "88": 2.7358503341674805, - "89": 2.618044853210449, - "90": 2.745748519897461, - "91": 2.6436691284179688, - "92": 2.6546928882598877, - "93": 2.650205612182617, - "94": 2.7685561180114746, - "95": 2.5784668922424316, - "96": 2.57856822013855, - "97": 2.6211800575256348, - "98": 2.70285701751709, - "99": 2.6242856979370117, - "100": 2.6563167572021484, - "101": 2.629248857498169, - "102": 2.775672435760498, - "103": 2.590315341949463, - "104": 2.598947048187256, - "105": 2.555227279663086, - "106": 2.5942485332489014, - "107": 2.60394287109375 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "step_size_list": [ - 0.171305, - 0.184004, - 0.167427, - 0.182628, - 0.178333, - 0.17553, - 0.147635, - 0.192103, - 0.190725, - 0.195647, - 0.190289, - 0.175769, - 0.183514, - 0.182904, - 0.184894, - 0.189068, - 0.191886, - 0.192163, - 0.158201, - 0.189148, - 0.196528, - 0.200329, - 0.196868, - 0.190244, - 0.159827, - 0.197868, - 0.200388, - 0.200772, - 0.197622, - 0.190491, - 0.191932, - 0.198659, - 0.197583, - 0.188021, - 0.195784, - 0.193003, - 0.189402, - 0.195446, - 0.195949, - 0.183466, - 0.195499, - 0.206868, - 0.204047, - 0.190997, - 0.195441, - 0.196542, - 0.197656, - 0.18477, - 0.192637, - 0.201148, - 0.204057, - 0.203492, - 0.199169, - 0.194919 - ], - "train_epoch_time": 4.842210292816162, - "train_loss": 2.6319974529862917, - "train_score": 0.23389302367288378, - "val_loss": 2.651239775625901, - "val_score": 0.23102037911436998 - }, - { - "epoch": 2, - "grad_norm": 1.315531849861145, - "learning_rate": 0.215, - "model_norm": 87.47314453125, - "step_logs": { - "grad_norm": { - "108": 1.344770908355713, - "109": 1.875128984451294, - "110": 1.6731008291244507, - "111": 1.3293333053588867, - "112": 1.5637351274490356, - "113": 1.8029847145080566, - "114": 1.530908465385437, - "115": 1.2929627895355225, - "116": 1.3823858499526978, - "117": 1.8939834833145142, - "118": 1.5422918796539307, - "119": 1.1374588012695312, - "120": 1.1340739727020264, - "121": 1.4493074417114258, - "122": 1.4997245073318481, - "123": 1.5211377143859863, - "124": 1.4198424816131592, - "125": 1.3821847438812256, - "126": 1.4348351955413818, - "127": 1.4656531810760498, - "128": 1.5007617473602295, - "129": 1.3856004476547241, - "130": 1.358123779296875, - "131": 1.5242176055908203, - "132": 1.4696683883666992, - "133": 1.2413662672042847, - "134": 1.2722327709197998, - "135": 1.395194411277771, - "136": 1.2747689485549927, - "137": 1.288352131843567, - "138": 1.2747585773468018, - "139": 1.2749489545822144, - "140": 1.355198621749878, - "141": 1.5357475280761719, - "142": 1.4174522161483765, - "143": 1.322439193725586, - "144": 1.3464287519454956, - "145": 1.3782477378845215, - "146": 1.2899798154830933, - "147": 1.0685778856277466, - "148": 1.161523699760437, - "149": 1.4050099849700928, - "150": 1.5366697311401367, - "151": 1.5470987558364868, - "152": 1.642154335975647, - "153": 1.3751894235610962, - "154": 1.1721516847610474, - "155": 1.329103946685791, - "156": 1.5129611492156982, - "157": 1.3387285470962524, - "158": 1.2138770818710327, - "159": 1.4187896251678467, - "160": 1.427746295928955, - "161": 1.315531849861145 - }, - "loss": { - "108": 2.6293787956237793, - "109": 2.581925868988037, - "110": 2.7473692893981934, - "111": 2.5824594497680664, - "112": 2.63314151763916, - "113": 2.5941081047058105, - "114": 2.693816900253296, - "115": 2.579030752182007, - "116": 2.611847400665283, - "117": 2.5991437435150146, - "118": 2.704798698425293, - "119": 2.560504913330078, - "120": 2.5768022537231445, - "121": 2.572042942047119, - "122": 2.6388392448425293, - "123": 2.6021575927734375, - "124": 2.6545071601867676, - "125": 2.5796961784362793, - "126": 2.5913963317871094, - "127": 2.5851573944091797, - "128": 2.6309282779693604, - "129": 2.6035070419311523, - "130": 2.593778133392334, - "131": 2.5900230407714844, - "132": 2.628330707550049, - "133": 2.5558037757873535, - "134": 2.5774760246276855, - "135": 2.5583245754241943, - "136": 2.596064329147339, - "137": 2.549643039703369, - "138": 2.5992588996887207, - "139": 2.552520751953125, - "140": 2.5835907459259033, - "141": 2.572484254837036, - "142": 2.6159684658050537, - "143": 2.560361385345459, - "144": 2.587024211883545, - "145": 2.550192356109619, - "146": 2.572631359100342, - "147": 2.51448392868042, - "148": 2.536811351776123, - "149": 2.555095911026001, - "150": 2.5925002098083496, - "151": 2.60998797416687, - "152": 2.5830564498901367, - "153": 2.59608793258667, - "154": 2.5334532260894775, - "155": 2.570136308670044, - "156": 2.571843385696411, - "157": 2.6207480430603027, - "158": 2.5341358184814453, - "159": 2.592061996459961, - "160": 2.580261707305908, - "161": 2.5741782188415527 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "step_size_list": [ - 0.200198, - 0.187544, - 0.193776, - 0.200268, - 0.195485, - 0.189475, - 0.196611, - 0.200994, - 0.199323, - 0.187223, - 0.19643, - 0.203923, - 0.204052, - 0.197648, - 0.196954, - 0.196241, - 0.198772, - 0.199146, - 0.198083, - 0.19737, - 0.196881, - 0.199208, - 0.199731, - 0.196091, - 0.197548, - 0.201913, - 0.201404, - 0.198744, - 0.201445, - 0.200938, - 0.20146, - 0.201225, - 0.199737, - 0.195711, - 0.198603, - 0.200293, - 0.199938, - 0.19906, - 0.201022, - 0.204993, - 0.203373, - 0.198513, - 0.195826, - 0.195706, - 0.193306, - 0.199386, - 0.203156, - 0.200207, - 0.196225, - 0.200277, - 0.202352, - 0.198434, - 0.19817, - 0.200509 - ], - "train_epoch_time": 4.8452136516571045, - "train_loss": 2.555558303507363, - "train_score": 0.24031563843339895, - "val_loss": 2.590563240062219, - "val_score": 0.23925444873821858 - }, - { - "epoch": 3, - "grad_norm": 1.2829318046569824, - "learning_rate": 0.215, - "model_norm": 87.49092102050781, - "step_logs": { - "grad_norm": { - "162": 1.302155613899231, - "163": 1.2679505348205566, - "164": 1.3365378379821777, - "165": 1.2978893518447876, - "166": 1.2945623397827148, - "167": 1.3376919031143188, - "168": 1.3314517736434937, - "169": 1.249277949333191, - "170": 1.1479874849319458, - "171": 1.2287659645080566, - "172": 1.18733811378479, - "173": 1.2171608209609985, - "174": 1.301889181137085, - "175": 1.6929446458816528, - "176": 1.4803028106689453, - "177": 1.2344516515731812, - "178": 1.195341944694519, - "179": 1.4658128023147583, - "180": 1.4468327760696411, - "181": 1.0305976867675781, - "182": 1.0223102569580078, - "183": 1.2272998094558716, - "184": 1.2391563653945923, - "185": 1.3416391611099243, - "186": 1.2720328569412231, - "187": 0.9583979845046997, - "188": 1.1231491565704346, - "189": 1.4031203985214233, - "190": 1.357681393623352, - "191": 1.1329631805419922, - "192": 1.0563315153121948, - "193": 1.01853346824646, - "194": 1.0779166221618652, - "195": 1.1308420896530151, - "196": 1.203302025794983, - "197": 1.6176344156265259, - "198": 1.5035041570663452, - "199": 1.155487298965454, - "200": 1.0421441793441772, - "201": 1.0787928104400635, - "202": 1.1190375089645386, - "203": 1.1839451789855957, - "204": 1.1982773542404175, - "205": 1.2874412536621094, - "206": 1.2700939178466797, - "207": 1.2709908485412598, - "208": 1.2595946788787842, - "209": 1.1214065551757812, - "210": 1.0963120460510254, - "211": 1.0826447010040283, - "212": 1.2664756774902344, - "213": 1.446083903312683, - "214": 1.4617469310760498, - "215": 1.2829318046569824 - }, - "loss": { - "162": 2.5503251552581787, - "163": 2.580986738204956, - "164": 2.54641056060791, - "165": 2.565549373626709, - "166": 2.5534493923187256, - "167": 2.5993361473083496, - "168": 2.543041706085205, - "169": 2.5455493927001953, - "170": 2.539292573928833, - "171": 2.5354411602020264, - "172": 2.5436558723449707, - "173": 2.5123326778411865, - "174": 2.557433605194092, - "175": 2.5449934005737305, - "176": 2.618886709213257, - "177": 2.523719549179077, - "178": 2.5625057220458984, - "179": 2.5322012901306152, - "180": 2.5992798805236816, - "181": 2.5401182174682617, - "182": 2.5114431381225586, - "183": 2.5168349742889404, - "184": 2.559504508972168, - "185": 2.5291378498077393, - "186": 2.5774779319763184, - "187": 2.520627737045288, - "188": 2.5269546508789062, - "189": 2.5501954555511475, - "190": 2.573432445526123, - "191": 2.5298590660095215, - "192": 2.5226101875305176, - "193": 2.5114099979400635, - "194": 2.499330520629883, - "195": 2.517226219177246, - "196": 2.507829189300537, - "197": 2.5550994873046875, - "198": 2.6146035194396973, - "199": 2.535062074661255, - "200": 2.511296272277832, - "201": 2.500337600708008, - "202": 2.531972885131836, - "203": 2.514383316040039, - "204": 2.522951126098633, - "205": 2.5224952697753906, - "206": 2.5570836067199707, - "207": 2.5087690353393555, - "208": 2.5390076637268066, - "209": 2.5294992923736572, - "210": 2.5221924781799316, - "211": 2.489377975463867, - "212": 2.520740032196045, - "213": 2.553650140762329, - "214": 2.57137131690979, - "215": 2.544015407562256 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "step_size_list": [ - 0.200658, - 0.201507, - 0.199923, - 0.200825, - 0.20083, - 0.200185, - 0.200011, - 0.201706, - 0.203639, - 0.202064, - 0.202911, - 0.202183, - 0.200701, - 0.191782, - 0.197257, - 0.201895, - 0.202841, - 0.197028, - 0.197869, - 0.205751, - 0.205794, - 0.202004, - 0.201974, - 0.19972, - 0.201408, - 0.206895, - 0.20405, - 0.198524, - 0.199629, - 0.20388, - 0.205241, - 0.205859, - 0.204767, - 0.203866, - 0.202435, - 0.193677, - 0.196717, - 0.203479, - 0.205449, - 0.204755, - 0.204146, - 0.202844, - 0.202605, - 0.200815, - 0.201345, - 0.201081, - 0.201467, - 0.204092, - 0.204523, - 0.204642, - 0.201235, - 0.197605, - 0.197369, - 0.201019 - ], - "train_epoch_time": 4.84174919128418, - "train_loss": 2.5135812423492605, - "train_score": 0.2482077205198225, - "val_loss": 2.5549608303402924, - "val_score": 0.23722284038291322 - }, - { - "epoch": 4, - "grad_norm": 1.3852770328521729, - "learning_rate": 0.215, - "model_norm": 87.51226806640625, - "step_logs": { - "grad_norm": { - "216": 0.9911021590232849, - "217": 1.1388626098632812, - "218": 1.18673574924469, - "219": 1.2927554845809937, - "220": 1.1694165468215942, - "221": 0.9886003732681274, - "222": 1.147566795349121, - "223": 1.5065957307815552, - "224": 1.444945216178894, - "225": 1.1592024564743042, - "226": 1.0360867977142334, - "227": 1.1043791770935059, - "228": 1.1281208992004395, - "229": 1.2424534559249878, - "230": 1.3199564218521118, - "231": 1.2597553730010986, - "232": 1.3101727962493896, - "233": 1.3297395706176758, - "234": 1.3228331804275513, - "235": 1.2477182149887085, - "236": 1.287459135055542, - "237": 1.2524323463439941, - "238": 1.1583845615386963, - "239": 1.1117467880249023, - "240": 1.2913349866867065, - "241": 1.2728441953659058, - "242": 1.282461404800415, - "243": 1.2495893239974976, - "244": 1.227732539176941, - "245": 1.1781437397003174, - "246": 1.1705435514450073, - "247": 1.1541047096252441, - "248": 1.1340049505233765, - "249": 1.0704351663589478, - "250": 1.1875613927841187, - "251": 1.326641321182251, - "252": 1.13332998752594, - "253": 1.1530803442001343, - "254": 1.3963634967803955, - "255": 1.4765300750732422, - "256": 1.243006944656372, - "257": 1.167756199836731, - "258": 1.150063395500183, - "259": 1.087386965751648, - "260": 1.0243992805480957, - "261": 1.2357460260391235, - "262": 1.351833701133728, - "263": 1.4577537775039673, - "264": 1.3989040851593018, - "265": 1.4537324905395508, - "266": 1.3404828310012817, - "267": 1.2918037176132202, - "268": 1.3404955863952637, - "269": 1.3852770328521729 - }, - "loss": { - "216": 2.512786865234375, - "217": 2.483055591583252, - "218": 2.51798677444458, - "219": 2.528566837310791, - "220": 2.562972068786621, - "221": 2.485072612762451, - "222": 2.5267343521118164, - "223": 2.5232694149017334, - "224": 2.591841220855713, - "225": 2.508546829223633, - "226": 2.5094411373138428, - "227": 2.4954824447631836, - "228": 2.513897180557251, - "229": 2.500267505645752, - "230": 2.53218936920166, - "231": 2.524029493331909, - "232": 2.5123438835144043, - "233": 2.5255627632141113, - "234": 2.547271490097046, - "235": 2.5202784538269043, - "236": 2.5422465801239014, - "237": 2.532721996307373, - "238": 2.5069632530212402, - "239": 2.501102924346924, - "240": 2.5159387588500977, - "241": 2.5408389568328857, - "242": 2.49662446975708, - "243": 2.527021884918213, - "244": 2.4945502281188965, - "245": 2.5129857063293457, - "246": 2.4755072593688965, - "247": 2.500941276550293, - "248": 2.5067288875579834, - "249": 2.4871864318847656, - "250": 2.477766275405884, - "251": 2.5294623374938965, - "252": 2.518540382385254, - "253": 2.5060808658599854, - "254": 2.49104905128479, - "255": 2.545215129852295, - "256": 2.500241279602051, - "257": 2.4953794479370117, - "258": 2.480581283569336, - "259": 2.4736239910125732, - "260": 2.458857297897339, - "261": 2.4802355766296387, - "262": 2.4805707931518555, - "263": 2.5296616554260254, - "264": 2.5329012870788574, - "265": 2.5064330101013184, - "266": 2.500066041946411, - "267": 2.478817939758301, - "268": 2.4949803352355957, - "269": 2.4904167652130127 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "step_size_list": [ - 0.206329, - 0.203569, - 0.202806, - 0.200738, - 0.203337, - 0.206279, - 0.203593, - 0.196042, - 0.197865, - 0.203293, - 0.205548, - 0.204268, - 0.203903, - 0.201618, - 0.200193, - 0.201388, - 0.200289, - 0.199951, - 0.200214, - 0.201612, - 0.200918, - 0.201579, - 0.203302, - 0.204155, - 0.2007, - 0.201208, - 0.200781, - 0.201608, - 0.201886, - 0.20295, - 0.202926, - 0.203357, - 0.203763, - 0.204855, - 0.202603, - 0.200038, - 0.203825, - 0.203399, - 0.198313, - 0.196872, - 0.201607, - 0.20307, - 0.203344, - 0.204492, - 0.205569, - 0.201653, - 0.199222, - 0.197192, - 0.198513, - 0.197132, - 0.19958, - 0.200491, - 0.19955, - 0.198553 - ], - "train_epoch_time": 4.84600830078125, - "train_loss": 2.502915991669577, - "train_score": 0.2600542502389577, - "val_loss": 2.5505322227521825, - "val_score": 0.2530810491157317 - }, - { - "epoch": 5, - "grad_norm": 1.2029547691345215, - "learning_rate": 0.215, - "model_norm": 87.5400161743164, - "step_logs": { - "grad_norm": { - "270": 1.2710802555084229, - "271": 1.0045161247253418, - "272": 1.0132417678833008, - "273": 1.248389482498169, - "274": 1.4885855913162231, - "275": 1.428642988204956, - "276": 1.2419459819793701, - "277": 1.2277100086212158, - "278": 1.286665916442871, - "279": 1.2028756141662598, - "280": 1.1912678480148315, - "281": 1.3207236528396606, - "282": 1.27871835231781, - "283": 1.315139651298523, - "284": 1.3207634687423706, - "285": 1.2287064790725708, - "286": 1.2362735271453857, - "287": 1.2319828271865845, - "288": 1.3179576396942139, - "289": 1.3505678176879883, - "290": 1.2994858026504517, - "291": 1.1427247524261475, - "292": 1.0707684755325317, - "293": 1.394778847694397, - "294": 1.3099271059036255, - "295": 1.4401239156723022, - "296": 1.7194749116897583, - "297": 1.9601112604141235, - "298": 1.5322904586791992, - "299": 1.1947455406188965, - "300": 1.0073742866516113, - "301": 1.1618094444274902, - "302": 1.2410342693328857, - "303": 1.247981071472168, - "304": 1.283972144126892, - "305": 1.3671889305114746, - "306": 1.4527076482772827, - "307": 1.4652153253555298, - "308": 1.5101890563964844, - "309": 1.1943949460983276, - "310": 1.0305434465408325, - "311": 1.1106972694396973, - "312": 1.3709852695465088, - "313": 1.3301037549972534, - "314": 1.1110860109329224, - "315": 1.0978580713272095, - "316": 1.2091288566589355, - "317": 1.2651363611221313, - "318": 1.3794142007827759, - "319": 1.2932034730911255, - "320": 1.249191164970398, - "321": 1.2317206859588623, - "322": 1.1675301790237427, - "323": 1.2029547691345215 - }, - "loss": { - "270": 2.504241943359375, - "271": 2.4517226219177246, - "272": 2.438788414001465, - "273": 2.4542157649993896, - "274": 2.4831953048706055, - "275": 2.4865403175354004, - "276": 2.471459150314331, - "277": 2.444843053817749, - "278": 2.45448637008667, - "279": 2.4569814205169678, - "280": 2.440514087677002, - "281": 2.451827049255371, - "282": 2.475576639175415, - "283": 2.4644479751586914, - "284": 2.4695143699645996, - "285": 2.4339611530303955, - "286": 2.4734182357788086, - "287": 2.4525671005249023, - "288": 2.4568240642547607, - "289": 2.4482290744781494, - "290": 2.457190752029419, - "291": 2.422393321990967, - "292": 2.3946518898010254, - "293": 2.418074369430542, - "294": 2.4740211963653564, - "295": 2.4526937007904053, - "296": 2.485193967819214, - "297": 2.529479503631592, - "298": 2.470250129699707, - "299": 2.4665398597717285, - "300": 2.398270845413208, - "301": 2.4219326972961426, - "302": 2.439095973968506, - "303": 2.4235827922821045, - "304": 2.4364137649536133, - "305": 2.450216770172119, - "306": 2.4460811614990234, - "307": 2.4381558895111084, - "308": 2.467790126800537, - "309": 2.481377601623535, - "310": 2.4137468338012695, - "311": 2.418330669403076, - "312": 2.413196563720703, - "313": 2.468334197998047, - "314": 2.4007620811462402, - "315": 2.3954718112945557, - "316": 2.363215923309326, - "317": 2.4446158409118652, - "318": 2.4106626510620117, - "319": 2.423877716064453, - "320": 2.414515972137451, - "321": 2.4114866256713867, - "322": 2.380112648010254, - "323": 2.4021129608154297 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "step_size_list": [ - 0.201056, - 0.205891, - 0.205692, - 0.201261, - 0.196181, - 0.197567, - 0.201482, - 0.201637, - 0.200465, - 0.202199, - 0.202351, - 0.199725, - 0.200746, - 0.199917, - 0.199826, - 0.20156, - 0.201608, - 0.201589, - 0.199813, - 0.199057, - 0.200209, - 0.203223, - 0.204476, - 0.197886, - 0.200082, - 0.197085, - 0.190621, - 0.184822, - 0.195069, - 0.202408, - 0.205646, - 0.202847, - 0.201333, - 0.201107, - 0.200421, - 0.198704, - 0.196752, - 0.196409, - 0.19557, - 0.202486, - 0.20529, - 0.203823, - 0.198389, - 0.199619, - 0.203738, - 0.203968, - 0.201593, - 0.200863, - 0.198184, - 0.200154, - 0.201033, - 0.20138, - 0.202531, - 0.201923 - ], - "train_epoch_time": 4.844873905181885, - "train_loss": 2.4011798890112463, - "train_score": 0.2974533715754363, - "val_loss": 2.45778798865396, - "val_score": 0.28674834922178466 - }, - { - "epoch": 6, - "grad_norm": 1.1454983949661255, - "learning_rate": 0.215, - "model_norm": 87.56878662109375, - "step_logs": { - "grad_norm": { - "324": 1.4478660821914673, - "325": 1.3053901195526123, - "326": 1.0898138284683228, - "327": 1.1261799335479736, - "328": 1.1343061923980713, - "329": 1.0687384605407715, - "330": 1.074279546737671, - "331": 1.2334868907928467, - "332": 1.3701967000961304, - "333": 1.2915104627609253, - "334": 1.1126514673233032, - "335": 1.2157806158065796, - "336": 1.2364264726638794, - "337": 1.2313041687011719, - "338": 1.2272017002105713, - "339": 1.2962377071380615, - "340": 1.2160311937332153, - "341": 1.167911171913147, - "342": 1.1176592111587524, - "343": 1.202584981918335, - "344": 1.5088257789611816, - "345": 1.7485594749450684, - "346": 1.632184624671936, - "347": 1.4440006017684937, - "348": 1.311724066734314, - "349": 1.2276780605316162, - "350": 1.0833379030227661, - "351": 1.1216859817504883, - "352": 1.2419434785842896, - "353": 1.236767292022705, - "354": 1.2276660203933716, - "355": 1.3100180625915527, - "356": 1.3939276933670044, - "357": 1.2280195951461792, - "358": 1.0774376392364502, - "359": 1.0626447200775146, - "360": 1.0826159715652466, - "361": 1.1186443567276, - "362": 1.115387201309204, - "363": 1.1995201110839844, - "364": 1.2868213653564453, - "365": 1.1960668563842773, - "366": 1.037724256515503, - "367": 1.0093685388565063, - "368": 1.0928775072097778, - "369": 1.211077094078064, - "370": 1.5211516618728638, - "371": 1.4537298679351807, - "372": 1.4707303047180176, - "373": 1.3578100204467773, - "374": 1.4943362474441528, - "375": 1.4854881763458252, - "376": 1.292373776435852, - "377": 1.1454983949661255 - }, - "loss": { - "324": 2.412602186203003, - "325": 2.4778757095336914, - "326": 2.3745248317718506, - "327": 2.4018149375915527, - "328": 2.3857524394989014, - "329": 2.3892507553100586, - "330": 2.371295928955078, - "331": 2.3904948234558105, - "332": 2.3940982818603516, - "333": 2.402587413787842, - "334": 2.3605637550354004, - "335": 2.402249813079834, - "336": 2.385800838470459, - "337": 2.3857994079589844, - "338": 2.4248242378234863, - "339": 2.3977396488189697, - "340": 2.401632308959961, - "341": 2.3702032566070557, - "342": 2.3574700355529785, - "343": 2.3406589031219482, - "344": 2.3903648853302, - "345": 2.4242103099823, - "346": 2.423222303390503, - "347": 2.4280123710632324, - "348": 2.3788585662841797, - "349": 2.3968396186828613, - "350": 2.340715169906616, - "351": 2.3773956298828125, - "352": 2.356738328933716, - "353": 2.387390613555908, - "354": 2.356386184692383, - "355": 2.393049478530884, - "356": 2.3868567943573, - "357": 2.3960678577423096, - "358": 2.3528735637664795, - "359": 2.331757068634033, - "360": 2.3170411586761475, - "361": 2.3428142070770264, - "362": 2.3230068683624268, - "363": 2.351250410079956, - "364": 2.34890079498291, - "365": 2.3764071464538574, - "366": 2.2935149669647217, - "367": 2.337144374847412, - "368": 2.326967716217041, - "369": 2.349787950515747, - "370": 2.361410140991211, - "371": 2.4077606201171875, - "372": 2.349144697189331, - "373": 2.40689754486084, - "374": 2.3618967533111572, - "375": 2.355367660522461, - "376": 2.3666179180145264, - "377": 2.3410162925720215 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "step_size_list": [ - 0.196633, - 0.2002, - 0.204029, - 0.203451, - 0.203218, - 0.204491, - 0.204311, - 0.201232, - 0.198284, - 0.200068, - 0.203526, - 0.201661, - 0.201145, - 0.201252, - 0.201544, - 0.199938, - 0.201653, - 0.202474, - 0.203413, - 0.201609, - 0.195032, - 0.18933, - 0.192276, - 0.196829, - 0.199489, - 0.201387, - 0.204004, - 0.203427, - 0.200868, - 0.201146, - 0.201168, - 0.199611, - 0.197699, - 0.201375, - 0.204171, - 0.204361, - 0.203912, - 0.203325, - 0.203296, - 0.201729, - 0.199854, - 0.201932, - 0.204669, - 0.205376, - 0.203757, - 0.201481, - 0.194511, - 0.196463, - 0.195635, - 0.198643, - 0.195164, - 0.195328, - 0.199839, - 0.202781 - ], - "train_epoch_time": 4.844247579574585, - "train_loss": 2.331601727060129, - "train_score": 0.3248217809234493, - "val_loss": 2.379627516841779, - "val_score": 0.316186496126118 - }, - { - "epoch": 7, - "grad_norm": 1.1997441053390503, - "learning_rate": 0.215, - "model_norm": 87.59695434570312, - "step_logs": { - "grad_norm": { - "378": 1.3484280109405518, - "379": 1.3991032838821411, - "380": 1.3478426933288574, - "381": 1.436978816986084, - "382": 1.3199915885925293, - "383": 1.2213108539581299, - "384": 1.1229854822158813, - "385": 1.082373857498169, - "386": 1.0697990655899048, - "387": 1.146734595298767, - "388": 1.2959370613098145, - "389": 1.4532639980316162, - "390": 1.5401716232299805, - "391": 1.5075747966766357, - "392": 1.3791413307189941, - "393": 1.3971352577209473, - "394": 1.4198503494262695, - "395": 1.461948037147522, - "396": 1.371685266494751, - "397": 1.4157965183258057, - "398": 1.2385632991790771, - "399": 1.07338547706604, - "400": 1.1034038066864014, - "401": 1.126296043395996, - "402": 1.076011061668396, - "403": 1.0979341268539429, - "404": 1.1755038499832153, - "405": 1.176401972770691, - "406": 1.1992491483688354, - "407": 1.1118355989456177, - "408": 1.024641752243042, - "409": 1.0313398838043213, - "410": 1.1729899644851685, - "411": 1.3220034837722778, - "412": 1.275770664215088, - "413": 1.2952038049697876, - "414": 1.1650899648666382, - "415": 1.0833736658096313, - "416": 1.0968871116638184, - "417": 1.1617414951324463, - "418": 1.24214768409729, - "419": 1.406369686126709, - "420": 1.3495177030563354, - "421": 1.168258786201477, - "422": 1.0976866483688354, - "423": 1.0422807931900024, - "424": 1.0422507524490356, - "425": 1.169421911239624, - "426": 1.1457267999649048, - "427": 1.0372921228408813, - "428": 1.2229158878326416, - "429": 1.2568180561065674, - "430": 1.32062566280365, - "431": 1.1997441053390503 - }, - "loss": { - "378": 2.3507919311523438, - "379": 2.368234157562256, - "380": 2.349613666534424, - "381": 2.355195999145508, - "382": 2.3437561988830566, - "383": 2.3469855785369873, - "384": 2.31719970703125, - "385": 2.312052011489868, - "386": 2.3113436698913574, - "387": 2.295637607574463, - "388": 2.291583299636841, - "389": 2.346353530883789, - "390": 2.401693820953369, - "391": 2.352247714996338, - "392": 2.356387138366699, - "393": 2.3271470069885254, - "394": 2.369628667831421, - "395": 2.3422141075134277, - "396": 2.3602776527404785, - "397": 2.331357955932617, - "398": 2.349625587463379, - "399": 2.3089780807495117, - "400": 2.3075170516967773, - "401": 2.30657958984375, - "402": 2.3218934535980225, - "403": 2.3027148246765137, - "404": 2.342536449432373, - "405": 2.2852530479431152, - "406": 2.313821792602539, - "407": 2.309011459350586, - "408": 2.3086814880371094, - "409": 2.2737903594970703, - "410": 2.292351722717285, - "411": 2.317483425140381, - "412": 2.3365187644958496, - "413": 2.295337677001953, - "414": 2.3287415504455566, - "415": 2.2811338901519775, - "416": 2.3108086585998535, - "417": 2.298628330230713, - "418": 2.295947313308716, - "419": 2.320661783218384, - "420": 2.3425865173339844, - "421": 2.2825095653533936, - "422": 2.305323600769043, - "423": 2.279123067855835, - "424": 2.2789125442504883, - "425": 2.2769432067871094, - "426": 2.2836215496063232, - "427": 2.2680306434631348, - "428": 2.2916526794433594, - "429": 2.3019516468048096, - "430": 2.30794358253479, - "431": 2.2952566146850586 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "step_size_list": [ - 0.198496, - 0.197455, - 0.198501, - 0.196482, - 0.199089, - 0.20125, - 0.203117, - 0.203894, - 0.204134, - 0.202529, - 0.199298, - 0.196032, - 0.194363, - 0.19477, - 0.197834, - 0.197217, - 0.196985, - 0.195794, - 0.19803, - 0.196809, - 0.2009, - 0.204054, - 0.20346, - 0.202998, - 0.204061, - 0.203545, - 0.202179, - 0.201859, - 0.201534, - 0.2033, - 0.204979, - 0.204706, - 0.201968, - 0.198877, - 0.200022, - 0.199339, - 0.202322, - 0.203731, - 0.203604, - 0.202235, - 0.200514, - 0.196955, - 0.198417, - 0.202015, - 0.203562, - 0.20452, - 0.20452, - 0.20196, - 0.202487, - 0.204567, - 0.200906, - 0.20023, - 0.198847, - 0.201421 - ], - "train_epoch_time": 4.845145225524902, - "train_loss": 2.282484034452069, - "train_score": 0.3152248475353208, - "val_loss": 2.3400606446643923, - "val_score": 0.3001668338592236 - }, - { - "epoch": 8, - "grad_norm": 1.2059179544448853, - "learning_rate": 0.215, - "model_norm": 87.62821197509766, - "step_logs": { - "grad_norm": { - "432": 1.154915690422058, - "433": 1.2224822044372559, - "434": 1.1708451509475708, - "435": 1.1733704805374146, - "436": 1.1595726013183594, - "437": 1.1824321746826172, - "438": 1.3086246252059937, - "439": 1.2058050632476807, - "440": 1.0663050413131714, - "441": 1.2326864004135132, - "442": 1.2345457077026367, - "443": 1.1186063289642334, - "444": 1.098375678062439, - "445": 1.1730140447616577, - "446": 1.1214861869812012, - "447": 1.0590879917144775, - "448": 0.9779362082481384, - "449": 0.9481033682823181, - "450": 1.04584801197052, - "451": 1.252267837524414, - "452": 1.6064000129699707, - "453": 1.492759346961975, - "454": 1.1854383945465088, - "455": 1.2818635702133179, - "456": 1.5860165357589722, - "457": 1.3650422096252441, - "458": 1.0735995769500732, - "459": 1.0504001379013062, - "460": 1.329114556312561, - "461": 1.4292060136795044, - "462": 1.2894105911254883, - "463": 1.1807360649108887, - "464": 1.2821755409240723, - "465": 1.2632641792297363, - "466": 1.2759512662887573, - "467": 1.2425559759140015, - "468": 1.1334298849105835, - "469": 1.1011444330215454, - "470": 1.0455904006958008, - "471": 1.1691581010818481, - "472": 1.2184985876083374, - "473": 1.2237861156463623, - "474": 1.2811400890350342, - "475": 1.1922094821929932, - "476": 1.069293737411499, - "477": 1.1363435983657837, - "478": 1.1625710725784302, - "479": 1.167516827583313, - "480": 1.1135964393615723, - "481": 1.115341305732727, - "482": 1.1914392709732056, - "483": 1.310146450996399, - "484": 1.2625014781951904, - "485": 1.2059179544448853 - }, - "loss": { - "432": 2.306880474090576, - "433": 2.2761852741241455, - "434": 2.3028855323791504, - "435": 2.2981693744659424, - "436": 2.29217529296875, - "437": 2.2850661277770996, - "438": 2.257185459136963, - "439": 2.294344186782837, - "440": 2.2642316818237305, - "441": 2.253075122833252, - "442": 2.3124477863311768, - "443": 2.2656140327453613, - "444": 2.2863221168518066, - "445": 2.2782537937164307, - "446": 2.269493579864502, - "447": 2.2647018432617188, - "448": 2.2654755115509033, - "449": 2.2519307136535645, - "450": 2.242961883544922, - "451": 2.2560482025146484, - "452": 2.2995927333831787, - "453": 2.3216710090637207, - "454": 2.2739310264587402, - "455": 2.2942895889282227, - "456": 2.282039165496826, - "457": 2.334156036376953, - "458": 2.2432713508605957, - "459": 2.2499780654907227, - "460": 2.242152690887451, - "461": 2.321077585220337, - "462": 2.2890729904174805, - "463": 2.261343002319336, - "464": 2.257908344268799, - "465": 2.269758462905884, - "466": 2.2366390228271484, - "467": 2.2978789806365967, - "468": 2.2593274116516113, - "469": 2.2600831985473633, - "470": 2.2695047855377197, - "471": 2.2583765983581543, - "472": 2.2594573497772217, - "473": 2.260195732116699, - "474": 2.241128921508789, - "475": 2.2822160720825195, - "476": 2.2174103260040283, - "477": 2.2257683277130127, - "478": 2.252816915512085, - "479": 2.252662181854248, - "480": 2.2483038902282715, - "481": 2.2260961532592773, - "482": 2.259723424911499, - "483": 2.2424588203430176, - "484": 2.248912811279297, - "485": 2.243582248687744 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "step_size_list": [ - 0.202418, - 0.200826, - 0.202069, - 0.201991, - 0.202246, - 0.201731, - 0.198787, - 0.201287, - 0.203988, - 0.200466, - 0.200775, - 0.202951, - 0.203459, - 0.201892, - 0.202911, - 0.204131, - 0.205667, - 0.206154, - 0.20429, - 0.200052, - 0.191856, - 0.194891, - 0.201607, - 0.19963, - 0.192223, - 0.198008, - 0.203746, - 0.204234, - 0.198212, - 0.196418, - 0.199429, - 0.201637, - 0.199393, - 0.199892, - 0.199397, - 0.200517, - 0.202615, - 0.203276, - 0.204414, - 0.201865, - 0.200814, - 0.200704, - 0.199309, - 0.201509, - 0.203708, - 0.202378, - 0.201974, - 0.201869, - 0.202965, - 0.202816, - 0.201399, - 0.198654, - 0.199779, - 0.200995 - ], - "train_epoch_time": 4.844937801361084, - "train_loss": 2.249316280917766, - "train_score": 0.3493364419023823, - "val_loss": 2.307672290933392, - "val_score": 0.3339014068742023 - }, - { - "epoch": 9, - "grad_norm": 1.286625862121582, - "learning_rate": 0.215, - "model_norm": 87.65879821777344, - "step_logs": { - "grad_norm": { - "486": 1.2811017036437988, - "487": 1.3381850719451904, - "488": 1.2197372913360596, - "489": 1.3329378366470337, - "490": 1.308487057685852, - "491": 1.2579445838928223, - "492": 1.291610836982727, - "493": 1.2727007865905762, - "494": 1.0571719408035278, - "495": 0.931128740310669, - "496": 0.9739916920661926, - "497": 1.1750648021697998, - "498": 1.3887135982513428, - "499": 1.6758337020874023, - "500": 1.4978593587875366, - "501": 1.3222256898880005, - "502": 1.4410287141799927, - "503": 1.1668554544448853, - "504": 1.191144347190857, - "505": 1.255637764930725, - "506": 1.1259422302246094, - "507": 1.1139298677444458, - "508": 1.0737437009811401, - "509": 1.1874620914459229, - "510": 1.1882264614105225, - "511": 1.1086351871490479, - "512": 1.03429114818573, - "513": 1.0379016399383545, - "514": 1.0770936012268066, - "515": 1.1197972297668457, - "516": 1.1270424127578735, - "517": 1.1074938774108887, - "518": 1.2652866840362549, - "519": 1.5173068046569824, - "520": 1.2729284763336182, - "521": 1.2219595909118652, - "522": 1.2521469593048096, - "523": 1.2389447689056396, - "524": 1.3149099349975586, - "525": 1.2944488525390625, - "526": 1.1764169931411743, - "527": 1.2254124879837036, - "528": 1.2436047792434692, - "529": 1.1616783142089844, - "530": 1.1216086149215698, - "531": 1.1146761178970337, - "532": 1.1473681926727295, - "533": 1.1905994415283203, - "534": 1.2116689682006836, - "535": 1.1863447427749634, - "536": 1.06602144241333, - "537": 1.0756251811981201, - "538": 1.1655296087265015, - "539": 1.286625862121582 - }, - "loss": { - "486": 2.263685703277588, - "487": 2.27846622467041, - "488": 2.246555805206299, - "489": 2.237314462661743, - "490": 2.2342114448547363, - "491": 2.255732536315918, - "492": 2.261526107788086, - "493": 2.274289131164551, - "494": 2.20943546295166, - "495": 2.2043192386627197, - "496": 2.227146625518799, - "497": 2.228538990020752, - "498": 2.2645938396453857, - "499": 2.265127182006836, - "500": 2.301616668701172, - "501": 2.262078046798706, - "502": 2.2398734092712402, - "503": 2.2471923828125, - "504": 2.20629620552063, - "505": 2.246720552444458, - "506": 2.226177215576172, - "507": 2.1992220878601074, - "508": 2.2379846572875977, - "509": 2.2320992946624756, - "510": 2.228458881378174, - "511": 2.2272465229034424, - "512": 2.2293572425842285, - "513": 2.2162818908691406, - "514": 2.2113728523254395, - "515": 2.2047367095947266, - "516": 2.2201035022735596, - "517": 2.209986686706543, - "518": 2.2332091331481934, - "519": 2.2668092250823975, - "520": 2.2332048416137695, - "521": 2.256260871887207, - "522": 2.2383127212524414, - "523": 2.253067970275879, - "524": 2.256917953491211, - "525": 2.244154691696167, - "526": 2.2304253578186035, - "527": 2.211859941482544, - "528": 2.212076187133789, - "529": 2.2092349529266357, - "530": 2.215790271759033, - "531": 2.1990294456481934, - "532": 2.2107343673706055, - "533": 2.223140001296997, - "534": 2.229137659072876, - "535": 2.23358154296875, - "536": 2.225706100463867, - "537": 2.1915171146392822, - "538": 2.1892237663269043, - "539": 2.2547144889831543 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "step_size_list": [ - 0.199455, - 0.19825, - 0.200711, - 0.198089, - 0.198636, - 0.199923, - 0.199203, - 0.19971, - 0.203912, - 0.206278, - 0.205586, - 0.201574, - 0.196968, - 0.189714, - 0.194607, - 0.198507, - 0.195515, - 0.201853, - 0.201098, - 0.199919, - 0.202597, - 0.202705, - 0.203718, - 0.201328, - 0.20129, - 0.20296, - 0.204453, - 0.204324, - 0.203522, - 0.202612, - 0.202542, - 0.202895, - 0.199617, - 0.193837, - 0.199444, - 0.20072, - 0.199944, - 0.200328, - 0.198641, - 0.199025, - 0.201556, - 0.200376, - 0.199971, - 0.201752, - 0.202633, - 0.202689, - 0.202065, - 0.201208, - 0.200784, - 0.20136, - 0.203813, - 0.203453, - 0.201555, - 0.199272 - ], - "train_epoch_time": 4.84569525718689, - "train_loss": 2.228900132459753, - "train_score": 0.33784522952958196, - "val_loss": 2.2953676372663847, - "val_score": 0.3241425087818733 - }, - { - "epoch": 10, - "grad_norm": 1.5282254219055176, - "learning_rate": 0.215, - "model_norm": 87.6897201538086, - "step_logs": { - "grad_norm": { - "540": 1.3236253261566162, - "541": 1.2120963335037231, - "542": 1.3293075561523438, - "543": 1.2659614086151123, - "544": 1.2643650770187378, - "545": 1.2919769287109375, - "546": 1.3321635723114014, - "547": 1.2872507572174072, - "548": 1.1092441082000732, - "549": 1.2507811784744263, - "550": 1.2238914966583252, - "551": 1.1918277740478516, - "552": 1.3765634298324585, - "553": 1.4541363716125488, - "554": 1.3285415172576904, - "555": 1.2294840812683105, - "556": 1.0333061218261719, - "557": 0.9750503897666931, - "558": 1.0579694509506226, - "559": 1.1781774759292603, - "560": 1.0836007595062256, - "561": 1.0941779613494873, - "562": 1.1210981607437134, - "563": 1.170119047164917, - "564": 1.2395886182785034, - "565": 1.2561867237091064, - "566": 1.2560819387435913, - "567": 1.234580159187317, - "568": 1.1695517301559448, - "569": 1.204097032546997, - "570": 1.2253378629684448, - "571": 1.235628604888916, - "572": 1.2304635047912598, - "573": 1.261124610900879, - "574": 1.1713411808013916, - "575": 1.154990553855896, - "576": 1.1073288917541504, - "577": 1.108127236366272, - "578": 1.1391375064849854, - "579": 1.1087826490402222, - "580": 1.0753718614578247, - "581": 1.0891958475112915, - "582": 1.1358745098114014, - "583": 1.29420006275177, - "584": 1.4496811628341675, - "585": 1.3629112243652344, - "586": 1.2241178750991821, - "587": 1.6359233856201172, - "588": 1.7339699268341064, - "589": 2.3059768676757812, - "590": 1.75924813747406, - "591": 1.3533680438995361, - "592": 1.40185546875, - "593": 1.5282254219055176 - }, - "loss": { - "540": 2.24470591545105, - "541": 2.2166566848754883, - "542": 2.202448844909668, - "543": 2.2326884269714355, - "544": 2.1964781284332275, - "545": 2.2362253665924072, - "546": 2.237544059753418, - "547": 2.2418558597564697, - "548": 2.19227933883667, - "549": 2.1998894214630127, - "550": 2.2158803939819336, - "551": 2.2009477615356445, - "552": 2.218970775604248, - "553": 2.254849910736084, - "554": 2.222987174987793, - "555": 2.2264702320098877, - "556": 2.2049458026885986, - "557": 2.1714818477630615, - "558": 2.1553432941436768, - "559": 2.1979007720947266, - "560": 2.191707134246826, - "561": 2.1716389656066895, - "562": 2.214081048965454, - "563": 2.2235708236694336, - "564": 2.200239658355713, - "565": 2.190506935119629, - "566": 2.224316120147705, - "567": 2.1955504417419434, - "568": 2.181828022003174, - "569": 2.1953487396240234, - "570": 2.2270801067352295, - "571": 2.1956965923309326, - "572": 2.197404384613037, - "573": 2.172541618347168, - "574": 2.1874992847442627, - "575": 2.194261074066162, - "576": 2.159250259399414, - "577": 2.166497230529785, - "578": 2.174556016921997, - "579": 2.1848740577697754, - "580": 2.1783227920532227, - "581": 2.1724984645843506, - "582": 2.157999038696289, - "583": 2.1706554889678955, - "584": 2.2155213356018066, - "585": 2.1973178386688232, - "586": 2.200434684753418, - "587": 2.2382147312164307, - "588": 2.245429039001465, - "589": 2.2540335655212402, - "590": 2.3192644119262695, - "591": 2.239567279815674, - "592": 2.198662757873535, - "593": 2.2130331993103027 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "step_size_list": [ - 0.198357, - 0.2007, - 0.197929, - 0.199598, - 0.199399, - 0.199029, - 0.198109, - 0.199174, - 0.202766, - 0.199731, - 0.200435, - 0.201051, - 0.196922, - 0.195311, - 0.198092, - 0.200375, - 0.204362, - 0.205336, - 0.203632, - 0.201331, - 0.203292, - 0.202971, - 0.202634, - 0.201652, - 0.199986, - 0.199547, - 0.199767, - 0.200069, - 0.201425, - 0.200748, - 0.200471, - 0.200046, - 0.200173, - 0.199315, - 0.201419, - 0.201811, - 0.20263, - 0.202652, - 0.202039, - 0.202737, - 0.203393, - 0.203079, - 0.202016, - 0.198532, - 0.195105, - 0.197089, - 0.200334, - 0.190512, - 0.187946, - 0.171505, - 0.188027, - 0.197625, - 0.196153, - 0.193094 - ], - "train_epoch_time": 4.844839334487915, - "train_loss": 2.207936185264861, - "train_score": 0.34560729015506325, - "val_loss": 2.2938550016773283, - "val_score": 0.3282012413892078 - }, - { - "epoch": 11, - "grad_norm": 1.0767444372177124, - "learning_rate": 0.215, - "model_norm": 87.72090911865234, - "step_logs": { - "grad_norm": { - "594": 1.1823039054870605, - "595": 1.1631243228912354, - "596": 1.4852553606033325, - "597": 1.4170695543289185, - "598": 1.2073761224746704, - "599": 1.1509628295898438, - "600": 1.1863006353378296, - "601": 1.2522052526474, - "602": 1.1972665786743164, - "603": 1.1848161220550537, - "604": 1.1319524049758911, - "605": 1.1472547054290771, - "606": 1.1560578346252441, - "607": 1.110656499862671, - "608": 1.2006510496139526, - "609": 1.3080259561538696, - "610": 1.3089104890823364, - "611": 1.1568255424499512, - "612": 1.0831385850906372, - "613": 1.1552590131759644, - "614": 1.2013462781906128, - "615": 1.205397129058838, - "616": 1.1810394525527954, - "617": 1.1056616306304932, - "618": 1.2109280824661255, - "619": 1.2529412508010864, - "620": 1.120617389678955, - "621": 1.0886765718460083, - "622": 1.2116297483444214, - "623": 1.2842867374420166, - "624": 1.365793228149414, - "625": 1.452988862991333, - "626": 1.2926194667816162, - "627": 1.2699536085128784, - "628": 1.1802414655685425, - "629": 1.1245898008346558, - "630": 1.1074035167694092, - "631": 1.1504571437835693, - "632": 1.1566340923309326, - "633": 1.2103692293167114, - "634": 1.169101357460022, - "635": 1.094599962234497, - "636": 1.107570767402649, - "637": 1.125802993774414, - "638": 0.9927701950073242, - "639": 1.0007109642028809, - "640": 1.0888326168060303, - "641": 1.1877045631408691, - "642": 1.2481164932250977, - "643": 1.295889973640442, - "644": 1.289625883102417, - "645": 1.1353554725646973, - "646": 1.0083565711975098, - "647": 1.0767444372177124 - }, - "loss": { - "594": 2.1937105655670166, - "595": 2.169579029083252, - "596": 2.219860076904297, - "597": 2.2350001335144043, - "598": 2.1906545162200928, - "599": 2.1421122550964355, - "600": 2.1757922172546387, - "601": 2.1895949840545654, - "602": 2.176201343536377, - "603": 2.1726856231689453, - "604": 2.1687235832214355, - "605": 2.186079978942871, - "606": 2.1864013671875, - "607": 2.189131021499634, - "608": 2.1774678230285645, - "609": 2.168029308319092, - "610": 2.174156665802002, - "611": 2.191075325012207, - "612": 2.146369695663452, - "613": 2.1871626377105713, - "614": 2.165951728820801, - "615": 2.1848959922790527, - "616": 2.2014527320861816, - "617": 2.1646289825439453, - "618": 2.1827850341796875, - "619": 2.1788363456726074, - "620": 2.1607096195220947, - "621": 2.1529273986816406, - "622": 2.163221597671509, - "623": 2.1707754135131836, - "624": 2.1937742233276367, - "625": 2.2061731815338135, - "626": 2.1780202388763428, - "627": 2.159313440322876, - "628": 2.169443130493164, - "629": 2.1191844940185547, - "630": 2.160952568054199, - "631": 2.1320996284484863, - "632": 2.1442203521728516, - "633": 2.1720244884490967, - "634": 2.164599657058716, - "635": 2.133831024169922, - "636": 2.149535655975342, - "637": 2.169243335723877, - "638": 2.165787935256958, - "639": 2.1171839237213135, - "640": 2.154299020767212, - "641": 2.138416290283203, - "642": 2.1590371131896973, - "643": 2.18825364112854, - "644": 2.165739059448242, - "645": 2.1778724193573, - "646": 2.1531286239624023, - "647": 2.160839080810547 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "step_size_list": [ - 0.201217, - 0.201493, - 0.194249, - 0.196063, - 0.200647, - 0.201598, - 0.201023, - 0.199632, - 0.200783, - 0.201037, - 0.20216, - 0.20193, - 0.201743, - 0.20272, - 0.200715, - 0.198187, - 0.19821, - 0.201753, - 0.203068, - 0.201765, - 0.200629, - 0.200655, - 0.20129, - 0.202694, - 0.200519, - 0.199544, - 0.202357, - 0.202987, - 0.200381, - 0.198765, - 0.196993, - 0.194946, - 0.19862, - 0.19902, - 0.201118, - 0.202038, - 0.202638, - 0.20155, - 0.201486, - 0.200465, - 0.201334, - 0.202761, - 0.202572, - 0.202294, - 0.204973, - 0.204597, - 0.202991, - 0.200763, - 0.199524, - 0.198615, - 0.198605, - 0.202139, - 0.204613, - 0.203275 - ], - "train_epoch_time": 4.845956087112427, - "train_loss": 2.150563732897021, - "train_score": 0.36072229200832473, - "val_loss": 2.238299369538282, - "val_score": 0.3406599821986889 - }, - { - "epoch": 12, - "grad_norm": 0.7353389263153076, - "learning_rate": 0.215, - "model_norm": 87.7488021850586, - "step_logs": { - "grad_norm": { - "648": 1.2911244630813599, - "649": 1.3675874471664429, - "650": 1.2735424041748047, - "651": 1.2170828580856323, - "652": 1.1520575284957886, - "653": 1.1016744375228882, - "654": 1.1052532196044922, - "655": 1.123640775680542, - "656": 1.0669153928756714, - "657": 1.0239628553390503, - "658": 1.1762434244155884, - "659": 1.109971523284912, - "660": 1.0068074464797974, - "661": 1.0006115436553955, - "662": 1.0679601430892944, - "663": 1.1093448400497437, - "664": 1.044920802116394, - "665": 1.0257349014282227, - "666": 1.1455438137054443, - "667": 1.2184151411056519, - "668": 1.3766710758209229, - "669": 1.2604378461837769, - "670": 1.1541317701339722, - "671": 1.0473637580871582, - "672": 1.0855695009231567, - "673": 1.1077187061309814, - "674": 1.0599641799926758, - "675": 1.080422043800354, - "676": 1.077756404876709, - "677": 1.037529468536377, - "678": 0.9889176487922668, - "679": 0.9151387810707092, - "680": 0.9786506295204163, - "681": 0.8844283223152161, - "682": 0.8383657336235046, - "683": 0.8414342403411865, - "684": 0.8716306090354919, - "685": 0.9330067038536072, - "686": 0.9714165925979614, - "687": 0.8918057084083557, - "688": 0.9009073972702026, - "689": 0.9107198119163513, - "690": 0.953681230545044, - "691": 0.8878958225250244, - "692": 0.8807072639465332, - "693": 0.8689160943031311, - "694": 0.9305926561355591, - "695": 0.9528166651725769, - "696": 0.9149192571640015, - "697": 0.8647100329399109, - "698": 0.843787431716919, - "699": 0.8242149949073792, - "700": 0.7857349514961243, - "701": 0.7353389263153076 - }, - "loss": { - "648": 2.1395187377929688, - "649": 2.1873302459716797, - "650": 2.1630945205688477, - "651": 2.154590368270874, - "652": 2.163867950439453, - "653": 2.1423096656799316, - "654": 2.119457244873047, - "655": 2.1374893188476562, - "656": 2.1446125507354736, - "657": 2.134413003921509, - "658": 2.1253976821899414, - "659": 2.1292295455932617, - "660": 2.107851505279541, - "661": 2.1307358741760254, - "662": 2.1395468711853027, - "663": 2.139540672302246, - "664": 2.125692367553711, - "665": 2.1287403106689453, - "666": 2.128596782684326, - "667": 2.114706039428711, - "668": 2.128831386566162, - "669": 2.1656930446624756, - "670": 2.12746524810791, - "671": 2.102405071258545, - "672": 2.1138434410095215, - "673": 2.146491527557373, - "674": 2.121488571166992, - "675": 2.104985475540161, - "676": 2.1472997665405273, - "677": 2.1106696128845215, - "678": 2.112091064453125, - "679": 2.1224775314331055, - "680": 2.10707688331604, - "681": 2.079653263092041, - "682": 2.100478172302246, - "683": 2.10656476020813, - "684": 2.099215507507324, - "685": 2.096406936645508, - "686": 2.096055507659912, - "687": 2.0820322036743164, - "688": 2.0983026027679443, - "689": 2.0865349769592285, - "690": 2.0767717361450195, - "691": 2.0998764038085938, - "692": 2.062168598175049, - "693": 2.0599799156188965, - "694": 2.0793333053588867, - "695": 2.075690269470215, - "696": 2.092489242553711, - "697": 2.0711312294006348, - "698": 2.0643420219421387, - "699": 2.0595874786376953, - "700": 2.0784080028533936, - "701": 2.068653106689453 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "step_size_list": [ - 0.198384, - 0.195787, - 0.196687, - 0.196747, - 0.197021, - 0.196751, - 0.19538, - 0.193928, - 0.193867, - 0.193409, - 0.189299, - 0.189419, - 0.18998, - 0.188967, - 0.186648, - 0.184728, - 0.18458, - 0.183702, - 0.180479, - 0.177931, - 0.173871, - 0.175111, - 0.175589, - 0.176005, - 0.174252, - 0.172839, - 0.172261, - 0.170687, - 0.169678, - 0.168909, - 0.168357, - 0.168092, - 0.166021, - 0.165863, - 0.165182, - 0.163907, - 0.162311, - 0.160371, - 0.158689, - 0.158305, - 0.156997, - 0.155619, - 0.153893, - 0.153385, - 0.152125, - 0.150981, - 0.149168, - 0.147694, - 0.146852, - 0.146016, - 0.144927, - 0.143821, - 0.142889, - 0.141976 - ], - "train_epoch_time": 4.845593690872192, - "train_loss": 2.0632568827318494, - "train_score": 0.3854891499111608, - "val_loss": 2.1587396522066484, - "val_score": 0.3612092782996689 - }, - { - "epoch": 13, - "grad_norm": 0.634152352809906, - "learning_rate": 0.14333333333333334, - "model_norm": 87.76688385009766, - "step_logs": { - "grad_norm": { - "702": 0.7698751091957092, - "703": 0.8198410272598267, - "704": 0.9168505668640137, - "705": 0.9296581149101257, - "706": 0.8759832382202148, - "707": 0.8478113412857056, - "708": 0.8359658122062683, - "709": 0.8174706697463989, - "710": 0.791767418384552, - "711": 0.7542242407798767, - "712": 0.7842721939086914, - "713": 0.7353886961936951, - "714": 0.7139715552330017, - "715": 0.7196046113967896, - "716": 0.6799511909484863, - "717": 0.6413664221763611, - "718": 0.5351870059967041, - "719": 0.558090329170227, - "720": 0.580518364906311, - "721": 0.6409737467765808, - "722": 0.7095553874969482, - "723": 0.794053852558136, - "724": 0.988107442855835, - "725": 0.9364408254623413, - "726": 0.799116849899292, - "727": 0.7245419025421143, - "728": 0.7226298451423645, - "729": 0.7147102952003479, - "730": 0.6701464056968689, - "731": 0.6959460377693176, - "732": 0.6833547949790955, - "733": 0.6730333566665649, - "734": 0.6496109962463379, - "735": 0.641011655330658, - "736": 0.6611383557319641, - "737": 0.7568486928939819, - "738": 0.7157216668128967, - "739": 0.6155111789703369, - "740": 0.5869841575622559, - "741": 0.5859789848327637, - "742": 0.613866925239563, - "743": 0.5982912182807922, - "744": 0.6077699661254883, - "745": 0.5792810320854187, - "746": 0.6071605682373047, - "747": 0.5650544762611389, - "748": 0.6018205881118774, - "749": 0.6394475102424622, - "750": 0.6425354480743408, - "751": 0.6009455323219299, - "752": 0.5385817885398865, - "753": 0.5594043731689453, - "754": 0.5386627912521362, - "755": 0.634152352809906 - }, - "loss": { - "702": 2.0609946250915527, - "703": 2.0661439895629883, - "704": 2.072573184967041, - "705": 2.0751543045043945, - "706": 2.062167167663574, - "707": 2.064584255218506, - "708": 2.059007167816162, - "709": 2.0824217796325684, - "710": 2.0671615600585938, - "711": 2.0581789016723633, - "712": 2.040334939956665, - "713": 2.0633294582366943, - "714": 2.042848587036133, - "715": 2.079887866973877, - "716": 2.0341079235076904, - "717": 2.014914035797119, - "718": 2.037348747253418, - "719": 2.0633087158203125, - "720": 2.0116822719573975, - "721": 2.0289316177368164, - "722": 2.0564358234405518, - "723": 2.0336766242980957, - "724": 2.057720899581909, - "725": 2.0924949645996094, - "726": 2.0487751960754395, - "727": 2.045595645904541, - "728": 2.0411906242370605, - "729": 2.0441477298736572, - "730": 2.0299253463745117, - "731": 2.061331272125244, - "732": 2.0285091400146484, - "733": 2.040985345840454, - "734": 2.0330429077148438, - "735": 2.062788963317871, - "736": 2.0251986980438232, - "737": 2.0254530906677246, - "738": 2.047938585281372, - "739": 2.0503218173980713, - "740": 2.0282864570617676, - "741": 2.0657505989074707, - "742": 2.020443916320801, - "743": 2.0209243297576904, - "744": 2.0356192588806152, - "745": 2.0202062129974365, - "746": 2.0494089126586914, - "747": 2.032576560974121, - "748": 2.0152640342712402, - "749": 2.0075504779815674, - "750": 2.0470235347747803, - "751": 2.0157077312469482, - "752": 2.04846453666687, - "753": 2.0420918464660645, - "754": 2.0340828895568848, - "755": 2.0400009155273438 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "step_size_list": [ - 0.140439, - 0.1388, - 0.136777, - 0.135422, - 0.134569, - 0.13352, - 0.13233, - 0.131221, - 0.130098, - 0.129046, - 0.127561, - 0.126599, - 0.125414, - 0.124132, - 0.123009, - 0.12189, - 0.12106, - 0.119681, - 0.118261, - 0.116721, - 0.115144, - 0.113433, - 0.111127, - 0.110204, - 0.109578, - 0.108619, - 0.107333, - 0.106075, - 0.10494, - 0.10357, - 0.1023, - 0.101045, - 0.0998193, - 0.09856, - 0.0971799, - 0.0955734, - 0.0944271, - 0.0934152, - 0.0921739, - 0.0908824, - 0.0894937, - 0.088225, - 0.086902, - 0.0856518, - 0.0842943, - 0.0830656, - 0.08168, - 0.0802948, - 0.0789953, - 0.0777571, - 0.0765581, - 0.0752122, - 0.0739291, - 0.0724724 - ], - "train_epoch_time": 4.845149993896484, - "train_loss": 2.022967890653241, - "train_score": 0.39838033527178607, - "val_loss": 2.1248326591728204, - "val_score": 0.37122380873776467 - }, - { - "epoch": 14, - "grad_norm": 0.5033368468284607, - "learning_rate": 0.07166666666666667, - "model_norm": 87.7729263305664, - "step_logs": { - "grad_norm": { - "756": 0.5817490220069885, - "757": 0.5924174189567566, - "758": 0.6085439324378967, - "759": 0.5963684916496277, - "760": 0.5649013519287109, - "761": 0.5879322290420532, - "762": 0.5865781307220459, - "763": 0.5456233024597168, - "764": 0.5751177668571472, - "765": 0.5415573120117188, - "766": 0.5502078533172607, - "767": 0.5416109561920166, - "768": 0.5399821400642395, - "769": 0.5714665651321411, - "770": 0.5819351673126221, - "771": 0.4997367560863495, - "772": 0.5309842824935913, - "773": 0.5636150240898132, - "774": 0.5608433485031128, - "775": 0.5910018682479858, - "776": 0.5440702438354492, - "777": 0.5018715858459473, - "778": 0.5244648456573486, - "779": 0.583748459815979, - "780": 0.535076916217804, - "781": 0.5455226898193359, - "782": 0.49497535824775696, - "783": 0.5218121409416199, - "784": 0.5231913924217224, - "785": 0.517935574054718, - "786": 0.5191906690597534, - "787": 0.5253349542617798, - "788": 0.532258152961731, - "789": 0.5133633017539978, - "790": 0.5274272561073303, - "791": 0.5238879323005676, - "792": 0.49098068475723267, - "793": 0.49649348855018616, - "794": 0.5085113644599915, - "795": 0.4726006090641022, - "796": 0.48333048820495605, - "797": 0.5235194563865662, - "798": 0.5081830024719238, - "799": 0.4873445928096771, - "800": 0.536490797996521, - "801": 0.5417503118515015, - "802": 0.46851634979248047, - "803": 0.5397835969924927, - "804": 0.5049363970756531, - "805": 0.49544158577919006, - "806": 0.48979800939559937, - "807": 0.537993311882019, - "808": 0.493367999792099, - "809": 0.5033368468284607 - }, - "loss": { - "756": 2.041349172592163, - "757": 2.035072088241577, - "758": 2.0335869789123535, - "759": 2.0425331592559814, - "760": 2.0125341415405273, - "761": 2.0303878784179688, - "762": 2.0292818546295166, - "763": 2.0409562587738037, - "764": 1.9783830642700195, - "765": 2.010605812072754, - "766": 2.0254247188568115, - "767": 2.025029182434082, - "768": 2.0390419960021973, - "769": 1.9939740896224976, - "770": 2.0474905967712402, - "771": 1.9895176887512207, - "772": 2.0228195190429688, - "773": 2.013899326324463, - "774": 2.0186731815338135, - "775": 2.000210762023926, - "776": 2.0273733139038086, - "777": 2.0154099464416504, - "778": 2.021178960800171, - "779": 2.028766632080078, - "780": 2.0093116760253906, - "781": 1.9783612489700317, - "782": 2.0299265384674072, - "783": 1.9778850078582764, - "784": 2.0285398960113525, - "785": 2.0226364135742188, - "786": 1.9988963603973389, - "787": 2.0297608375549316, - "788": 2.024130344390869, - "789": 1.9695649147033691, - "790": 1.9946712255477905, - "791": 2.028390407562256, - "792": 2.0368010997772217, - "793": 1.9961975812911987, - "794": 2.0180156230926514, - "795": 2.005180835723877, - "796": 2.019975185394287, - "797": 1.9743021726608276, - "798": 2.004042863845825, - "799": 2.0229129791259766, - "800": 1.9998270273208618, - "801": 1.9911322593688965, - "802": 2.0253686904907227, - "803": 2.0109105110168457, - "804": 2.0327634811401367, - "805": 2.0213463306427, - "806": 2.0170485973358154, - "807": 2.017352819442749, - "808": 2.0053954124450684, - "809": 1.9924137592315674 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "step_size_list": [ - 0.0712434, - 0.0699155, - 0.0685814, - 0.0672887, - 0.0660107, - 0.0646729, - 0.0633615, - 0.0620941, - 0.0607394, - 0.0594632, - 0.0581413, - 0.056833, - 0.0555195, - 0.0541722, - 0.0528544, - 0.0515917, - 0.0502555, - 0.0489155, - 0.0476006, - 0.046263, - 0.0449753, - 0.0436768, - 0.0423468, - 0.0410003, - 0.0397022, - 0.0383766, - 0.0370773, - 0.0357452, - 0.034426, - 0.0331062, - 0.0317836, - 0.0304615, - 0.029138, - 0.0278185, - 0.0264942, - 0.0251731, - 0.0238552, - 0.0225303, - 0.0212057, - 0.0198854, - 0.0185603, - 0.0172324, - 0.0159096, - 0.0145863, - 0.0132589, - 0.0119339, - 0.0106112, - 0.00928388, - 0.00795899, - 0.00663313, - 0.00530697, - 0.00398034, - 0.00265389, - 0.00132705 - ], - "train_epoch_time": 4.8447723388671875, - "train_loss": 2.00816962838686, - "train_score": 0.4025342987890394, - "val_loss": 2.113864118278232, - "val_score": 0.3735693525843724 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:30:27.143766", - "final_model_norm": 87.7729263305664, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:28:45.505587", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 6.415041446685791, - "learning_rate": 4.64e-11, - "model_norm": 87.42022705078125, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.392029762268066, - "3": 8.84481143951416, - "4": 17.850553512573242, - "5": 6.034968852996826, - "6": 4.637722492218018, - "7": 4.9288530349731445, - "8": 4.521860122680664, - "9": 6.412627696990967, - "10": 3.8342607021331787, - "11": 14.737504005432129, - "12": 6.603000164031982, - "13": 6.025729179382324, - "14": 7.747841835021973, - "15": 3.797628402709961, - "16": 12.529465675354004, - "17": 22.666439056396484, - "18": 4.543037414550781, - "19": 5.329233169555664, - "20": 22.00737762451172, - "21": 4.220154285430908, - "22": 8.132468223571777, - "23": 3.5606038570404053, - "24": 7.961135387420654, - "25": 13.382946968078613, - "26": 5.8169474601745605, - "27": 4.369606971740723, - "28": 17.540950775146484, - "29": 10.750436782836914, - "30": 34.23600387573242, - "31": 3.700395345687866, - "32": 21.214187622070312, - "33": 11.726557731628418, - "34": 6.260554313659668, - "35": 3.59450364112854, - "36": 15.356278419494629, - "37": 4.497802734375, - "38": 18.671419143676758, - "39": 5.688169479370117, - "40": 6.008426666259766, - "41": 10.915599822998047, - "42": 10.294869422912598, - "43": 4.826373100280762, - "44": 3.8631951808929443, - "45": 6.129805088043213, - "46": 4.865635871887207, - "47": 10.85316276550293, - "48": 3.7912490367889404, - "49": 4.910715103149414, - "50": 10.566633224487305, - "51": 3.880162239074707, - "52": 9.479639053344727, - "53": 6.415041446685791 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.8181304931640625, - "3": 3.8262925148010254, - "4": 4.349514484405518, - "5": 4.229140758514404, - "6": 3.5613114833831787, - "7": 3.743668794631958, - "8": 3.849187135696411, - "9": 3.633787155151367, - "10": 3.7944042682647705, - "11": 4.033172607421875, - "12": 6.311926364898682, - "13": 3.8382623195648193, - "14": 5.171895980834961, - "15": 3.6298279762268066, - "16": 3.903632164001465, - "17": 4.803995609283447, - "18": 4.232617378234863, - "19": 3.5956687927246094, - "20": 4.447190284729004, - "21": 3.989043712615967, - "22": 3.576582670211792, - "23": 3.1795363426208496, - "24": 3.9938511848449707, - "25": 4.409998416900635, - "26": 3.63392972946167, - "27": 3.9249258041381836, - "28": 5.45171594619751, - "29": 4.656816482543945, - "30": 4.673017978668213, - "31": 3.7599217891693115, - "32": 5.267796516418457, - "33": 5.631796360015869, - "34": 3.6322474479675293, - "35": 3.36767840385437, - "36": 5.973868370056152, - "37": 3.5887484550476074, - "38": 6.50324821472168, - "39": 5.428341865539551, - "40": 4.202503204345703, - "41": 4.49423885345459, - "42": 4.534539699554443, - "43": 3.8579530715942383, - "44": 3.6919684410095215, - "45": 3.7566146850585938, - "46": 3.64108943939209, - "47": 6.234297752380371, - "48": 3.9295856952667236, - "49": 3.7313809394836426, - "50": 5.694178581237793, - "51": 4.528134822845459, - "52": 4.5458760261535645, - "53": 3.82647705078125 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 4.64e-11, - 0.00602199, - 0.0168834, - 0.0216721, - 0.0157309, - 0.0386733, - 0.0476656, - 0.0536518, - 0.0620122, - 0.0567169, - 0.0786588, - 0.0272315, - 0.080427, - 0.0768105, - 0.0740717, - 0.109045, - 0.0372539, - 0.0167191, - 0.118699, - 0.103941, - 0.016711, - 0.135801, - 0.0707015, - 0.149727, - 0.0804855, - 0.0406226, - 0.113633, - 0.155681, - 0.0311841, - 0.0620166, - 0.00775171, - 0.188786, - 0.0216996, - 0.0646247, - 0.116758, - 0.200115, - 0.0439937, - 0.174491, - 0.0337388, - 0.174117, - 0.143079, - 0.0629558, - 0.0701655, - 0.180997, - 0.223702, - 0.135213, - 0.178776, - 0.0851806, - 0.245467, - 0.184143, - 0.0836164, - 0.261943, - 0.0830617, - 0.132757 - ], - "train_epoch_time": 4.849003076553345, - "train_loss": 3.8052916164890767, - "train_score": 0.12868655840498816, - "val_loss": 3.796106784954137, - "val_score": 0.12458739955786585 - }, - { - "epoch": 1, - "grad_norm": 3.3606152534484863, - "learning_rate": 0.464, - "model_norm": 87.35164642333984, - "step_logs": { - "grad_norm": { - "54": 2.858609676361084, - "55": 10.737136840820312, - "56": 3.2116808891296387, - "57": 3.384654998779297, - "58": 3.546201229095459, - "59": 3.1849629878997803, - "60": 4.52200984954834, - "61": 8.260720252990723, - "62": 2.8668410778045654, - "63": 12.155377388000488, - "64": 3.3598384857177734, - "65": 2.9624006748199463, - "66": 4.3477277755737305, - "67": 3.3108246326446533, - "68": 3.171374559402466, - "69": 3.7421977519989014, - "70": 3.211254596710205, - "71": 4.05525016784668, - "72": 11.48981761932373, - "73": 3.459059476852417, - "74": 2.85099196434021, - "75": 4.509913444519043, - "76": 2.6777124404907227, - "77": 2.991694927215576, - "78": 2.37827205657959, - "79": 4.576386451721191, - "80": 2.6979970932006836, - "81": 4.141533851623535, - "82": 3.5639302730560303, - "83": 2.1359448432922363, - "84": 3.2156429290771484, - "85": 2.999981641769409, - "86": 2.734909772872925, - "87": 4.064084529876709, - "88": 2.995272159576416, - "89": 5.097923278808594, - "90": 3.395552396774292, - "91": 3.936406135559082, - "92": 2.5835976600646973, - "93": 3.791635274887085, - "94": 2.783562660217285, - "95": 4.196510314941406, - "96": 2.4339089393615723, - "97": 13.713967323303223, - "98": 2.9899070262908936, - "99": 2.635986089706421, - "100": 2.4263968467712402, - "101": 2.2045938968658447, - "102": 2.121074914932251, - "103": 3.585427761077881, - "104": 1.44589364528656, - "105": 2.5503478050231934, - "106": 2.310807704925537, - "107": 3.3606152534484863 - }, - "loss": { - "54": 3.838592767715454, - "55": 5.012306213378906, - "56": 3.727595090866089, - "57": 3.6583807468414307, - "58": 3.7307209968566895, - "59": 3.450202465057373, - "60": 3.543002128601074, - "61": 4.588754653930664, - "62": 3.484729051589966, - "63": 5.3817596435546875, - "64": 3.8180785179138184, - "65": 3.299722194671631, - "66": 3.5598297119140625, - "67": 3.657235860824585, - "68": 3.625640869140625, - "69": 3.5288796424865723, - "70": 3.8291115760803223, - "71": 3.703097343444824, - "72": 4.860246658325195, - "73": 3.5869498252868652, - "74": 3.3525750637054443, - "75": 3.75704288482666, - "76": 3.555233955383301, - "77": 3.388500928878784, - "78": 3.2966203689575195, - "79": 3.493374824523926, - "80": 3.443857192993164, - "81": 3.500192880630493, - "82": 3.5496270656585693, - "83": 3.344968795776367, - "84": 3.2407050132751465, - "85": 3.616891384124756, - "86": 3.2851226329803467, - "87": 3.6439428329467773, - "88": 3.2720589637756348, - "89": 3.6229255199432373, - "90": 3.278555393218994, - "91": 3.543064594268799, - "92": 3.378326416015625, - "93": 3.3239662647247314, - "94": 3.398552417755127, - "95": 3.1471972465515137, - "96": 3.292386054992676, - "97": 4.4440999031066895, - "98": 3.343900680541992, - "99": 3.076943874359131, - "100": 3.334789752960205, - "101": 3.07427978515625, - "102": 3.110471248626709, - "103": 3.5102057456970215, - "104": 2.9275636672973633, - "105": 3.050889253616333, - "106": 3.179858446121216, - "107": 3.2726807594299316 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.3106, - 0.0732307, - 0.282585, - 0.268754, - 0.260378, - 0.275845, - 0.198376, - 0.104268, - 0.299901, - 0.0629628, - 0.275219, - 0.286948, - 0.207893, - 0.273689, - 0.282312, - 0.241582, - 0.285574, - 0.228539, - 0.0635471, - 0.261572, - 0.296965, - 0.205677, - 0.316099, - 0.287699, - 0.33189, - 0.194071, - 0.311332, - 0.217138, - 0.253529, - 0.352469, - 0.266627, - 0.294176, - 0.303619, - 0.226167, - 0.283598, - 0.174159, - 0.255523, - 0.230315, - 0.318159, - 0.231603, - 0.303481, - 0.201897, - 0.327353, - 0.0428908, - 0.28638, - 0.30448, - 0.329175, - 0.339485, - 0.347419, - 0.250859, - 0.398053, - 0.310449, - 0.333911, - 0.25769 - ], - "train_epoch_time": 4.845209360122681, - "train_loss": 3.1161415298494752, - "train_score": 0.1969646699715892, - "val_loss": 3.135316308959891, - "val_score": 0.19353652417865058 - }, - { - "epoch": 2, - "grad_norm": 1.0763298273086548, - "learning_rate": 0.464, - "model_norm": 87.33485412597656, - "step_logs": { - "grad_norm": { - "108": 1.4284387826919556, - "109": 1.6965339183807373, - "110": 3.2703537940979004, - "111": 1.5285755395889282, - "112": 1.754952311515808, - "113": 2.799487352371216, - "114": 1.4387683868408203, - "115": 1.8313871622085571, - "116": 1.794874906539917, - "117": 2.0885822772979736, - "118": 1.3231536149978638, - "119": 2.006324529647827, - "120": 1.4542523622512817, - "121": 1.6092643737792969, - "122": 1.6640442609786987, - "123": 1.8148009777069092, - "124": 1.4465478658676147, - "125": 1.3389941453933716, - "126": 1.7375508546829224, - "127": 2.907604694366455, - "128": 1.5427086353302002, - "129": 1.4757261276245117, - "130": 1.6154417991638184, - "131": 1.99034583568573, - "132": 1.387081503868103, - "133": 2.0772249698638916, - "134": 1.6645475625991821, - "135": 2.2846858501434326, - "136": 2.059701919555664, - "137": 1.4060015678405762, - "138": 1.903393268585205, - "139": 1.2354459762573242, - "140": 1.0644891262054443, - "141": 2.1027815341949463, - "142": 2.352018356323242, - "143": 1.2187929153442383, - "144": 1.6577410697937012, - "145": 1.9611213207244873, - "146": 1.984497308731079, - "147": 1.4651412963867188, - "148": 1.2242337465286255, - "149": 1.2056084871292114, - "150": 1.3629612922668457, - "151": 1.2349845170974731, - "152": 1.1854910850524902, - "153": 1.1565769910812378, - "154": 1.252566933631897, - "155": 1.3478208780288696, - "156": 1.703112244606018, - "157": 1.2441173791885376, - "158": 1.2054235935211182, - "159": 1.4467012882232666, - "160": 1.0429385900497437, - "161": 1.0763298273086548 - }, - "loss": { - "108": 3.1150312423706055, - "109": 2.8726983070373535, - "110": 3.2444562911987305, - "111": 3.025568962097168, - "112": 2.893230438232422, - "113": 3.072216510772705, - "114": 3.0514917373657227, - "115": 2.799407720565796, - "116": 3.0119693279266357, - "117": 2.9598679542541504, - "118": 2.912177801132202, - "119": 2.818962574005127, - "120": 2.9666385650634766, - "121": 2.8044402599334717, - "122": 2.895341396331787, - "123": 2.8714845180511475, - "124": 2.874838352203369, - "125": 2.744900941848755, - "126": 2.845977783203125, - "127": 3.015171766281128, - "128": 3.1005349159240723, - "129": 2.7572147846221924, - "130": 2.8017001152038574, - "131": 2.955296039581299, - "132": 2.8445024490356445, - "133": 2.8474457263946533, - "134": 2.9492745399475098, - "135": 2.943544864654541, - "136": 3.050510883331299, - "137": 2.8159570693969727, - "138": 2.784405469894409, - "139": 2.8976099491119385, - "140": 2.670093059539795, - "141": 2.787055492401123, - "142": 3.086111068725586, - "143": 2.804927349090576, - "144": 2.787051200866699, - "145": 2.976318120956421, - "146": 2.861079216003418, - "147": 2.871668815612793, - "148": 2.7234416007995605, - "149": 2.6982059478759766, - "150": 2.728771209716797, - "151": 2.7277684211730957, - "152": 2.6717612743377686, - "153": 2.692474365234375, - "154": 2.6754016876220703, - "155": 2.7360117435455322, - "156": 2.7431869506835938, - "157": 2.8736257553100586, - "158": 2.610884189605713, - "159": 2.81746768951416, - "160": 2.659851312637329, - "161": 2.605217933654785 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.402789, - 0.376487, - 0.262923, - 0.393499, - 0.372103, - 0.291489, - 0.400904, - 0.363079, - 0.371752, - 0.345774, - 0.407206, - 0.348536, - 0.398151, - 0.382133, - 0.379743, - 0.366481, - 0.396966, - 0.40294, - 0.372358, - 0.281127, - 0.393861, - 0.392142, - 0.381548, - 0.353931, - 0.401064, - 0.343307, - 0.380967, - 0.32875, - 0.350812, - 0.399014, - 0.356412, - 0.413471, - 0.422411, - 0.339164, - 0.327714, - 0.413229, - 0.377617, - 0.356981, - 0.35169, - 0.395423, - 0.411467, - 0.412453, - 0.400712, - 0.410722, - 0.413534, - 0.416046, - 0.408432, - 0.402066, - 0.372597, - 0.412458, - 0.410941, - 0.39579, - 0.423793, - 0.420608 - ], - "train_epoch_time": 4.8451924324035645, - "train_loss": 2.6872197686856243, - "train_score": 0.2114441356195439, - "val_loss": 2.7009051861637086, - "val_score": 0.21094646995157929 - }, - { - "epoch": 3, - "grad_norm": 0.9656234979629517, - "learning_rate": 0.464, - "model_norm": 87.35486602783203, - "step_logs": { - "grad_norm": { - "162": 1.1189560890197754, - "163": 1.2439683675765991, - "164": 1.1907100677490234, - "165": 1.1807596683502197, - "166": 1.2217439413070679, - "167": 1.590959072113037, - "168": 1.0795228481292725, - "169": 0.8327977657318115, - "170": 1.4565677642822266, - "171": 1.4457651376724243, - "172": 1.2531334161758423, - "173": 2.302196979522705, - "174": 1.0985028743743896, - "175": 0.8447472453117371, - "176": 0.8835721611976624, - "177": 1.4077037572860718, - "178": 1.3198087215423584, - "179": 1.0174331665039062, - "180": 0.990699291229248, - "181": 1.397781491279602, - "182": 1.1386934518814087, - "183": 0.8387331962585449, - "184": 0.9318520426750183, - "185": 1.1538796424865723, - "186": 1.1080108880996704, - "187": 1.1009576320648193, - "188": 1.5068435668945312, - "189": 1.0529667139053345, - "190": 0.7240515351295471, - "191": 0.8497000932693481, - "192": 1.029805064201355, - "193": 1.2156177759170532, - "194": 1.1472550630569458, - "195": 1.2585384845733643, - "196": 1.1241824626922607, - "197": 0.8736162185668945, - "198": 1.0393797159194946, - "199": 1.399061918258667, - "200": 0.9984358549118042, - "201": 0.8065123558044434, - "202": 0.8300282955169678, - "203": 1.0855735540390015, - "204": 0.9855717420578003, - "205": 0.8677594661712646, - "206": 0.9431927800178528, - "207": 1.0541419982910156, - "208": 1.0813190937042236, - "209": 1.0069924592971802, - "210": 0.9948697686195374, - "211": 1.0864923000335693, - "212": 0.9993169903755188, - "213": 0.8439814448356628, - "214": 0.8373750448226929, - "215": 0.9656234979629517 - }, - "loss": { - "162": 2.6909537315368652, - "163": 2.6405367851257324, - "164": 2.717409610748291, - "165": 2.618783473968506, - "166": 2.7271625995635986, - "167": 2.6659412384033203, - "168": 2.795989513397217, - "169": 2.5793042182922363, - "170": 2.671590805053711, - "171": 2.753861427307129, - "172": 2.7049529552459717, - "173": 2.803457736968994, - "174": 2.837716579437256, - "175": 2.5965073108673096, - "176": 2.588749408721924, - "177": 2.6406588554382324, - "178": 2.739302158355713, - "179": 2.663179397583008, - "180": 2.6126646995544434, - "181": 2.6613595485687256, - "182": 2.7359609603881836, - "183": 2.61000919342041, - "184": 2.596292734146118, - "185": 2.6409802436828613, - "186": 2.648367404937744, - "187": 2.6589622497558594, - "188": 2.6571998596191406, - "189": 2.7352123260498047, - "190": 2.5688743591308594, - "191": 2.5573902130126953, - "192": 2.595913887023926, - "193": 2.637622833251953, - "194": 2.6796278953552246, - "195": 2.617431163787842, - "196": 2.711655616760254, - "197": 2.585380792617798, - "198": 2.584141731262207, - "199": 2.6470589637756348, - "200": 2.707695245742798, - "201": 2.5650477409362793, - "202": 2.5718941688537598, - "203": 2.5787854194641113, - "204": 2.651869297027588, - "205": 2.5722475051879883, - "206": 2.604759693145752, - "207": 2.5915374755859375, - "208": 2.628849983215332, - "209": 2.603396415710449, - "210": 2.5950660705566406, - "211": 2.6052541732788086, - "212": 2.6123600006103516, - "213": 2.5820655822753906, - "214": 2.5787758827209473, - "215": 2.580049991607666 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 0.418793, - 0.408465, - 0.4139, - 0.41299, - 0.41172, - 0.380244, - 0.423088, - 0.436754, - 0.391813, - 0.394527, - 0.408924, - 0.322533, - 0.422334, - 0.436188, - 0.433659, - 0.395196, - 0.404348, - 0.425619, - 0.426802, - 0.396473, - 0.418037, - 0.436693, - 0.430589, - 0.415413, - 0.418944, - 0.419621, - 0.387233, - 0.424115, - 0.443025, - 0.435477, - 0.42383, - 0.410628, - 0.416534, - 0.406877, - 0.418725, - 0.434259, - 0.422976, - 0.396056, - 0.427487, - 0.438219, - 0.436851, - 0.419522, - 0.427658, - 0.434491, - 0.429934, - 0.422018, - 0.420599, - 0.425546, - 0.42628, - 0.419863, - 0.426201, - 0.43609, - 0.436466, - 0.428106 - ], - "train_epoch_time": 4.84496545791626, - "train_loss": 2.602953904191596, - "train_score": 0.23778918581337294, - "val_loss": 2.647889215554764, - "val_score": 0.2324241177872867 - }, - { - "epoch": 4, - "grad_norm": 0.8172276020050049, - "learning_rate": 0.464, - "model_norm": 87.3888168334961, - "step_logs": { - "grad_norm": { - "216": 0.9760596752166748, - "217": 0.9239879846572876, - "218": 0.9516480565071106, - "219": 0.9669117331504822, - "220": 0.9742136597633362, - "221": 0.8865550756454468, - "222": 0.8860629200935364, - "223": 1.0282435417175293, - "224": 0.9823834896087646, - "225": 0.97651606798172, - "226": 0.9466106295585632, - "227": 0.9056035876274109, - "228": 0.9550831317901611, - "229": 0.9927959442138672, - "230": 0.9454711675643921, - "231": 0.815541684627533, - "232": 0.8566503524780273, - "233": 1.0298569202423096, - "234": 1.0618904829025269, - "235": 0.9589795470237732, - "236": 1.2321819067001343, - "237": 1.1048648357391357, - "238": 0.8869365453720093, - "239": 0.8407354950904846, - "240": 0.9086779356002808, - "241": 0.9720743894577026, - "242": 0.9201876521110535, - "243": 0.8125502467155457, - "244": 0.7550165057182312, - "245": 0.8655475378036499, - "246": 0.8935474157333374, - "247": 0.8462132215499878, - "248": 0.7538568377494812, - "249": 0.8339540362358093, - "250": 1.0733461380004883, - "251": 0.981153666973114, - "252": 0.9008215665817261, - "253": 0.9153002500534058, - "254": 0.9088084101676941, - "255": 0.8909235596656799, - "256": 0.8397300243377686, - "257": 0.7997252941131592, - "258": 0.9583136439323425, - "259": 0.9640246629714966, - "260": 0.9869414567947388, - "261": 0.8781303763389587, - "262": 0.7422873973846436, - "263": 0.8049411773681641, - "264": 0.9022356271743774, - "265": 0.844919741153717, - "266": 0.8651341795921326, - "267": 0.8847090601921082, - "268": 0.8013103604316711, - "269": 0.8172276020050049 - }, - "loss": { - "216": 2.617234706878662, - "217": 2.5937962532043457, - "218": 2.587912082672119, - "219": 2.5723719596862793, - "220": 2.59344482421875, - "221": 2.582096815109253, - "222": 2.5561814308166504, - "223": 2.5979394912719727, - "224": 2.612966537475586, - "225": 2.5602614879608154, - "226": 2.6081910133361816, - "227": 2.5647189617156982, - "228": 2.597980260848999, - "229": 2.5668869018554688, - "230": 2.5928525924682617, - "231": 2.562885284423828, - "232": 2.5771901607513428, - "233": 2.5621814727783203, - "234": 2.641939163208008, - "235": 2.5838325023651123, - "236": 2.5744218826293945, - "237": 2.675947904586792, - "238": 2.572277069091797, - "239": 2.5442347526550293, - "240": 2.546400547027588, - "241": 2.5873584747314453, - "242": 2.561640739440918, - "243": 2.5563535690307617, - "244": 2.519526481628418, - "245": 2.550574779510498, - "246": 2.537480354309082, - "247": 2.5504183769226074, - "248": 2.529621124267578, - "249": 2.530531644821167, - "250": 2.574716567993164, - "251": 2.6057841777801514, - "252": 2.5374152660369873, - "253": 2.589334487915039, - "254": 2.5366361141204834, - "255": 2.5849251747131348, - "256": 2.54815936088562, - "257": 2.5613887310028076, - "258": 2.5224509239196777, - "259": 2.5913634300231934, - "260": 2.548593044281006, - "261": 2.5898196697235107, - "262": 2.537853956222534, - "263": 2.525055408477783, - "264": 2.5268354415893555, - "265": 2.5603861808776855, - "266": 2.5134687423706055, - "267": 2.534497022628784, - "268": 2.508770704269409, - "269": 2.5317869186401367 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 0.427867, - 0.431081, - 0.429158, - 0.427918, - 0.427688, - 0.433394, - 0.433136, - 0.42397, - 0.427379, - 0.427095, - 0.429747, - 0.431955, - 0.42905, - 0.426046, - 0.429636, - 0.43765, - 0.435247, - 0.423344, - 0.422194, - 0.428608, - 0.408155, - 0.419592, - 0.43326, - 0.435904, - 0.431536, - 0.427757, - 0.430952, - 0.437769, - 0.440859, - 0.434398, - 0.432433, - 0.435624, - 0.441014, - 0.436188, - 0.420362, - 0.427371, - 0.431951, - 0.431603, - 0.431411, - 0.433143, - 0.436008, - 0.438593, - 0.42786, - 0.428359, - 0.426209, - 0.434019, - 0.441749, - 0.437929, - 0.431732, - 0.435809, - 0.434016, - 0.432978, - 0.437993, - 0.437241 - ], - "train_epoch_time": 4.843236684799194, - "train_loss": 2.519512945473451, - "train_score": 0.26451421259465485, - "val_loss": 2.560137695340968, - "val_score": 0.25797843690170347 - }, - { - "epoch": 5, - "grad_norm": 1.2359113693237305, - "learning_rate": 0.464, - "model_norm": 87.43255615234375, - "step_logs": { - "grad_norm": { - "270": 0.909137487411499, - "271": 0.901824414730072, - "272": 0.8281104564666748, - "273": 0.8244837522506714, - "274": 0.8318466544151306, - "275": 0.9051135182380676, - "276": 0.9758633375167847, - "277": 0.9725322127342224, - "278": 0.8185096383094788, - "279": 0.8279964327812195, - "280": 0.8715904355049133, - "281": 1.2166608572006226, - "282": 1.1724765300750732, - "283": 0.9091575741767883, - "284": 0.7718557715415955, - "285": 0.8962677717208862, - "286": 0.9210909605026245, - "287": 1.0917760133743286, - "288": 0.9456706643104553, - "289": 0.759105384349823, - "290": 0.8831875324249268, - "291": 0.9598322510719299, - "292": 1.4102511405944824, - "293": 0.8130441904067993, - "294": 0.7901896238327026, - "295": 0.884803056716919, - "296": 0.8717489242553711, - "297": 0.8379859924316406, - "298": 0.9207040667533875, - "299": 0.9763308763504028, - "300": 1.0011659860610962, - "301": 0.9384574890136719, - "302": 0.9010646343231201, - "303": 1.242040753364563, - "304": 0.8243430256843567, - "305": 0.6908825635910034, - "306": 0.7374172806739807, - "307": 0.8237206935882568, - "308": 1.0682594776153564, - "309": 0.9662548899650574, - "310": 0.8974555134773254, - "311": 0.9084458947181702, - "312": 1.1049094200134277, - "313": 0.9480563998222351, - "314": 0.8933523297309875, - "315": 1.027705430984497, - "316": 1.4170277118682861, - "317": 0.8679870367050171, - "318": 0.716618001461029, - "319": 0.7424120306968689, - "320": 0.8490312099456787, - "321": 0.9254270792007446, - "322": 1.0150771141052246, - "323": 1.2359113693237305 - }, - "loss": { - "270": 2.502199411392212, - "271": 2.5485618114471436, - "272": 2.528486967086792, - "273": 2.532589912414551, - "274": 2.516814708709717, - "275": 2.5192108154296875, - "276": 2.538707733154297, - "277": 2.549635887145996, - "278": 2.512437105178833, - "279": 2.5092053413391113, - "280": 2.507082462310791, - "281": 2.5263195037841797, - "282": 2.602623462677002, - "283": 2.551633834838867, - "284": 2.5005698204040527, - "285": 2.4863510131835938, - "286": 2.524898052215576, - "287": 2.482865333557129, - "288": 2.566267967224121, - "289": 2.4417240619659424, - "290": 2.487529754638672, - "291": 2.4960594177246094, - "292": 2.5086896419525146, - "293": 2.563426971435547, - "294": 2.510998249053955, - "295": 2.532041549682617, - "296": 2.543931007385254, - "297": 2.4897828102111816, - "298": 2.502875328063965, - "299": 2.4739670753479004, - "300": 2.520045757293701, - "301": 2.5044987201690674, - "302": 2.4901108741760254, - "303": 2.471242904663086, - "304": 2.549704074859619, - "305": 2.478025197982788, - "306": 2.4522199630737305, - "307": 2.4467692375183105, - "308": 2.4881672859191895, - "309": 2.5033488273620605, - "310": 2.4888992309570312, - "311": 2.4655141830444336, - "312": 2.4746758937835693, - "313": 2.495021343231201, - "314": 2.4570131301879883, - "315": 2.461097240447998, - "316": 2.52840518951416, - "317": 2.5090882778167725, - "318": 2.4881362915039062, - "319": 2.4649782180786133, - "320": 2.4455957412719727, - "321": 2.4473414421081543, - "322": 2.476041316986084, - "323": 2.4688591957092285 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 0.430973, - 0.432016, - 0.436532, - 0.4368, - 0.436178, - 0.431449, - 0.426852, - 0.427231, - 0.436967, - 0.436341, - 0.433524, - 0.408473, - 0.413348, - 0.431566, - 0.439696, - 0.431646, - 0.430444, - 0.417499, - 0.429293, - 0.439914, - 0.432534, - 0.427402, - 0.391918, - 0.437807, - 0.438692, - 0.432944, - 0.433927, - 0.435503, - 0.430197, - 0.425927, - 0.424801, - 0.429001, - 0.431369, - 0.405302, - 0.436981, - 0.444152, - 0.441297, - 0.435952, - 0.419376, - 0.427049, - 0.431597, - 0.430564, - 0.416348, - 0.428212, - 0.431484, - 0.421986, - 0.39181, - 0.433782, - 0.442797, - 0.441117, - 0.434301, - 0.429159, - 0.423147, - 0.405758 - ], - "train_epoch_time": 4.8421950340271, - "train_loss": 2.5325661044209724, - "train_score": 0.24928376069198893, - "val_loss": 2.5795151303200443, - "val_score": 0.23620030867314365 - }, - { - "epoch": 6, - "grad_norm": 0.9817253351211548, - "learning_rate": 0.464, - "model_norm": 87.48594665527344, - "step_logs": { - "grad_norm": { - "324": 0.996283769607544, - "325": 0.7722855806350708, - "326": 0.8033398985862732, - "327": 0.8197267651557922, - "328": 0.9329664707183838, - "329": 1.2516851425170898, - "330": 0.9120838046073914, - "331": 0.9575769901275635, - "332": 0.9959551095962524, - "333": 0.9943754076957703, - "334": 0.9758836030960083, - "335": 1.2174527645111084, - "336": 1.0190855264663696, - "337": 0.8289433717727661, - "338": 1.024200677871704, - "339": 0.9762762784957886, - "340": 0.9613446593284607, - "341": 1.000328779220581, - "342": 1.1516013145446777, - "343": 0.977079451084137, - "344": 0.9106581211090088, - "345": 1.1612099409103394, - "346": 0.9213308095932007, - "347": 0.818103015422821, - "348": 1.3374097347259521, - "349": 0.8248967528343201, - "350": 0.7944849133491516, - "351": 0.841280996799469, - "352": 0.8733251690864563, - "353": 0.9115166664123535, - "354": 0.8414322137832642, - "355": 0.8549865484237671, - "356": 1.116495966911316, - "357": 0.8457679748535156, - "358": 1.2006250619888306, - "359": 1.0738205909729004, - "360": 0.8266971707344055, - "361": 0.8697921633720398, - "362": 1.067987084388733, - "363": 1.0071406364440918, - "364": 0.8190472722053528, - "365": 0.7928370237350464, - "366": 0.9520807862281799, - "367": 0.8789686560630798, - "368": 0.8155698776245117, - "369": 0.7787619233131409, - "370": 0.9483415484428406, - "371": 0.9625656604766846, - "372": 1.0349348783493042, - "373": 1.2274625301361084, - "374": 0.8992341756820679, - "375": 0.8969486951828003, - "376": 0.9945662617683411, - "377": 0.9817253351211548 - }, - "loss": { - "324": 2.5065536499023438, - "325": 2.4248814582824707, - "326": 2.4337539672851562, - "327": 2.4043781757354736, - "328": 2.4621071815490723, - "329": 2.438467264175415, - "330": 2.4949569702148438, - "331": 2.46988582611084, - "332": 2.4390950202941895, - "333": 2.4317359924316406, - "334": 2.434683322906494, - "335": 2.4207005500793457, - "336": 2.535224437713623, - "337": 2.3995237350463867, - "338": 2.4224109649658203, - "339": 2.4604482650756836, - "340": 2.4257209300994873, - "341": 2.4305429458618164, - "342": 2.4400181770324707, - "343": 2.4389264583587646, - "344": 2.4062788486480713, - "345": 2.413264274597168, - "346": 2.4867923259735107, - "347": 2.3970446586608887, - "348": 2.4382452964782715, - "349": 2.464383840560913, - "350": 2.44979190826416, - "351": 2.417203426361084, - "352": 2.404001474380493, - "353": 2.3936820030212402, - "354": 2.4074127674102783, - "355": 2.3702239990234375, - "356": 2.388772487640381, - "357": 2.4222207069396973, - "358": 2.428964614868164, - "359": 2.4855432510375977, - "360": 2.354668140411377, - "361": 2.3889927864074707, - "362": 2.376720905303955, - "363": 2.4616425037384033, - "364": 2.355320453643799, - "365": 2.373386859893799, - "366": 2.377262592315674, - "367": 2.412806272506714, - "368": 2.368696689605713, - "369": 2.365194320678711, - "370": 2.3522796630859375, - "371": 2.442009925842285, - "372": 2.398568868637085, - "373": 2.4214911460876465, - "374": 2.4210433959960938, - "375": 2.3750839233398438, - "376": 2.3622050285339355, - "377": 2.3980607986450195 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 0.424959, - 0.438952, - 0.437109, - 0.435747, - 0.428828, - 0.403808, - 0.430684, - 0.427204, - 0.423996, - 0.424002, - 0.425396, - 0.406286, - 0.42373, - 0.435094, - 0.42164, - 0.425738, - 0.426318, - 0.423545, - 0.412043, - 0.425371, - 0.429647, - 0.410754, - 0.429951, - 0.435772, - 0.396516, - 0.436066, - 0.437828, - 0.434486, - 0.432189, - 0.429419, - 0.434363, - 0.433017, - 0.413891, - 0.434248, - 0.407846, - 0.418913, - 0.434727, - 0.432243, - 0.417515, - 0.423513, - 0.43524, - 0.43714, - 0.426289, - 0.431914, - 0.43562, - 0.437947, - 0.426196, - 0.426461, - 0.420442, - 0.40547, - 0.430631, - 0.430193, - 0.422914, - 0.424426 - ], - "train_epoch_time": 4.844985723495483, - "train_loss": 2.356054432936002, - "train_score": 0.3018673781513997, - "val_loss": 2.3983173707048873, - "val_score": 0.295749318996609 - }, - { - "epoch": 7, - "grad_norm": 1.005070447921753, - "learning_rate": 0.464, - "model_norm": 87.54090118408203, - "step_logs": { - "grad_norm": { - "378": 0.8268588781356812, - "379": 0.8470097780227661, - "380": 0.7971072196960449, - "381": 0.834544837474823, - "382": 0.8371056318283081, - "383": 0.8414035439491272, - "384": 1.1593236923217773, - "385": 1.146713376045227, - "386": 0.931350827217102, - "387": 1.0470621585845947, - "388": 0.9749681353569031, - "389": 1.0175281763076782, - "390": 0.828610897064209, - "391": 0.7926346659660339, - "392": 0.9615519642829895, - "393": 0.9542427062988281, - "394": 0.9155589938163757, - "395": 0.8437886834144592, - "396": 0.8221478462219238, - "397": 0.8341313600540161, - "398": 0.8109428286552429, - "399": 1.310091257095337, - "400": 0.8244293332099915, - "401": 0.8038738369941711, - "402": 0.9810850024223328, - "403": 1.0706017017364502, - "404": 0.8593807220458984, - "405": 0.7971512675285339, - "406": 0.8965023756027222, - "407": 0.930963933467865, - "408": 0.9270331859588623, - "409": 0.9097792506217957, - "410": 0.999769926071167, - "411": 0.8395422697067261, - "412": 0.7553023099899292, - "413": 0.7785903811454773, - "414": 0.9699762463569641, - "415": 0.9446796774864197, - "416": 0.8213992118835449, - "417": 0.8536132574081421, - "418": 0.9033850431442261, - "419": 0.8954663276672363, - "420": 0.9131744503974915, - "421": 0.9360767602920532, - "422": 1.1010725498199463, - "423": 0.9090158343315125, - "424": 0.7672095894813538, - "425": 0.7239177227020264, - "426": 0.7278748154640198, - "427": 0.8265972137451172, - "428": 0.8000284433364868, - "429": 0.7516077756881714, - "430": 0.7950315475463867, - "431": 1.005070447921753 - }, - "loss": { - "378": 2.362210273742676, - "379": 2.3281240463256836, - "380": 2.3612046241760254, - "381": 2.317265748977661, - "382": 2.3329477310180664, - "383": 2.329225540161133, - "384": 2.350372791290283, - "385": 2.429685354232788, - "386": 2.353789806365967, - "387": 2.381924867630005, - "388": 2.3688807487487793, - "389": 2.3693394660949707, - "390": 2.3619942665100098, - "391": 2.3304827213287354, - "392": 2.317772388458252, - "393": 2.375673294067383, - "394": 2.3185245990753174, - "395": 2.337268829345703, - "396": 2.327305555343628, - "397": 2.3292040824890137, - "398": 2.3386919498443604, - "399": 2.3263797760009766, - "400": 2.393317699432373, - "401": 2.337904214859009, - "402": 2.3414881229400635, - "403": 2.38059663772583, - "404": 2.3296093940734863, - "405": 2.328822612762451, - "406": 2.341782569885254, - "407": 2.3438730239868164, - "408": 2.3233118057250977, - "409": 2.35960054397583, - "410": 2.328158140182495, - "411": 2.367513418197632, - "412": 2.3184781074523926, - "413": 2.302306652069092, - "414": 2.326796054840088, - "415": 2.3582675457000732, - "416": 2.317800283432007, - "417": 2.3093037605285645, - "418": 2.299227714538574, - "419": 2.3459649085998535, - "420": 2.3045103549957275, - "421": 2.3308467864990234, - "422": 2.32253098487854, - "423": 2.372323513031006, - "424": 2.302820920944214, - "425": 2.297060012817383, - "426": 2.2756214141845703, - "427": 2.288423538208008, - "428": 2.2854299545288086, - "429": 2.292900323867798, - "430": 2.273482322692871, - "431": 2.306735038757324 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 0.434804, - 0.433041, - 0.436735, - 0.433755, - 0.433772, - 0.433436, - 0.409653, - 0.41224, - 0.427454, - 0.419233, - 0.424483, - 0.42129, - 0.434685, - 0.436688, - 0.424696, - 0.426109, - 0.428092, - 0.433373, - 0.434709, - 0.433928, - 0.435584, - 0.396187, - 0.435318, - 0.436038, - 0.423601, - 0.417378, - 0.432211, - 0.436376, - 0.429779, - 0.42734, - 0.427328, - 0.429081, - 0.42197, - 0.434023, - 0.438943, - 0.437288, - 0.424205, - 0.426551, - 0.434647, - 0.432351, - 0.428698, - 0.429909, - 0.428064, - 0.426778, - 0.413878, - 0.429308, - 0.438025, - 0.440675, - 0.440222, - 0.433941, - 0.435692, - 0.438912, - 0.435885, - 0.421206 - ], - "train_epoch_time": 4.845086574554443, - "train_loss": 2.326692772800987, - "train_score": 0.31942925027316404, - "val_loss": 2.37390440777714, - "val_score": 0.30585802925577393 - }, - { - "epoch": 8, - "grad_norm": 0.7360766530036926, - "learning_rate": 0.464, - "model_norm": 87.59782409667969, - "step_logs": { - "grad_norm": { - "432": 0.9492727518081665, - "433": 0.9889562129974365, - "434": 0.9355610609054565, - "435": 0.8517712950706482, - "436": 1.0749973058700562, - "437": 0.8070528507232666, - "438": 0.6129425168037415, - "439": 0.7382088899612427, - "440": 0.8592383861541748, - "441": 0.9270436763763428, - "442": 1.020334243774414, - "443": 1.0437281131744385, - "444": 1.07532799243927, - "445": 0.8778788447380066, - "446": 0.7227190732955933, - "447": 0.699779748916626, - "448": 0.7732878923416138, - "449": 0.8422675132751465, - "450": 0.8751144409179688, - "451": 1.074497938156128, - "452": 0.9190720319747925, - "453": 0.7915751934051514, - "454": 0.9204183220863342, - "455": 0.9571609497070312, - "456": 0.8995140790939331, - "457": 0.9322042465209961, - "458": 1.0969650745391846, - "459": 0.8717686533927917, - "460": 0.8497338891029358, - "461": 1.1798006296157837, - "462": 0.9829171895980835, - "463": 0.6557402014732361, - "464": 0.7098110914230347, - "465": 0.820646345615387, - "466": 0.8885836005210876, - "467": 0.8325942158699036, - "468": 0.8422518372535706, - "469": 0.9479615092277527, - "470": 0.8833321928977966, - "471": 0.8008051514625549, - "472": 0.7530004382133484, - "473": 0.8229758739471436, - "474": 0.9000000953674316, - "475": 0.9692131876945496, - "476": 1.2287533283233643, - "477": 1.1396442651748657, - "478": 0.8824846744537354, - "479": 0.8060889840126038, - "480": 0.8273478746414185, - "481": 0.9048917889595032, - "482": 0.9326873421669006, - "483": 0.8799415230751038, - "484": 0.7800669074058533, - "485": 0.7360766530036926 - }, - "loss": { - "432": 2.342108726501465, - "433": 2.3036346435546875, - "434": 2.3311920166015625, - "435": 2.299793243408203, - "436": 2.322309970855713, - "437": 2.326324939727783, - "438": 2.2880859375, - "439": 2.242396831512451, - "440": 2.3136098384857178, - "441": 2.305769920349121, - "442": 2.302980899810791, - "443": 2.3291239738464355, - "444": 2.318730354309082, - "445": 2.327669143676758, - "446": 2.2609777450561523, - "447": 2.237064838409424, - "448": 2.2484545707702637, - "449": 2.2642552852630615, - "450": 2.2931790351867676, - "451": 2.2730207443237305, - "452": 2.3069095611572266, - "453": 2.260981559753418, - "454": 2.275710105895996, - "455": 2.2993481159210205, - "456": 2.274301528930664, - "457": 2.3036623001098633, - "458": 2.2984368801116943, - "459": 2.306331157684326, - "460": 2.2610087394714355, - "461": 2.2936747074127197, - "462": 2.3422675132751465, - "463": 2.234441041946411, - "464": 2.240450620651245, - "465": 2.2709479331970215, - "466": 2.2742180824279785, - "467": 2.267306327819824, - "468": 2.2503585815429688, - "469": 2.2720706462860107, - "470": 2.283860206604004, - "471": 2.2249021530151367, - "472": 2.2511680126190186, - "473": 2.227466106414795, - "474": 2.250838279724121, - "475": 2.2631845474243164, - "476": 2.273127555847168, - "477": 2.3066482543945312, - "478": 2.2900753021240234, - "479": 2.240640163421631, - "480": 2.2646498680114746, - "481": 2.2605137825012207, - "482": 2.2660460472106934, - "483": 2.233567476272583, - "484": 2.2607524394989014, - "485": 2.243975877761841 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 0.425977, - 0.422395, - 0.426821, - 0.432356, - 0.415977, - 0.435699, - 0.446973, - 0.439235, - 0.432017, - 0.427071, - 0.419956, - 0.41858, - 0.415884, - 0.430901, - 0.440397, - 0.441575, - 0.437035, - 0.432558, - 0.430635, - 0.415086, - 0.42767, - 0.435969, - 0.427112, - 0.424738, - 0.428622, - 0.42666, - 0.413746, - 0.431047, - 0.431994, - 0.406736, - 0.423476, - 0.44417, - 0.440993, - 0.434131, - 0.429412, - 0.433267, - 0.432378, - 0.425002, - 0.429923, - 0.434917, - 0.438383, - 0.433425, - 0.428246, - 0.423243, - 0.402046, - 0.41039, - 0.430069, - 0.43475, - 0.433595, - 0.428029, - 0.426055, - 0.42946, - 0.436728, - 0.439387 - ], - "train_epoch_time": 4.845744371414185, - "train_loss": 2.243636061710812, - "train_score": 0.3346025376699576, - "val_loss": 2.3135698170119943, - "val_score": 0.3151236011568632 - }, - { - "epoch": 9, - "grad_norm": 0.843839168548584, - "learning_rate": 0.464, - "model_norm": 87.65990447998047, - "step_logs": { - "grad_norm": { - "486": 0.8283859491348267, - "487": 0.8237842321395874, - "488": 0.820415735244751, - "489": 0.8672774434089661, - "490": 0.8662886023521423, - "491": 0.8299398422241211, - "492": 0.7683955430984497, - "493": 0.7220995426177979, - "494": 0.6857521533966064, - "495": 0.8653535842895508, - "496": 0.8666710257530212, - "497": 0.8675562739372253, - "498": 1.0014952421188354, - "499": 1.167513370513916, - "500": 1.0229462385177612, - "501": 0.7397806644439697, - "502": 0.7110167741775513, - "503": 0.6809916496276855, - "504": 0.767841100692749, - "505": 0.7951772809028625, - "506": 0.7795646786689758, - "507": 0.8319077491760254, - "508": 0.9412534236907959, - "509": 0.9180862903594971, - "510": 0.9317770004272461, - "511": 0.9476799964904785, - "512": 0.9560616612434387, - "513": 0.849151074886322, - "514": 0.8399502038955688, - "515": 0.8634765148162842, - "516": 0.7798449397087097, - "517": 0.7874664664268494, - "518": 0.8707910776138306, - "519": 0.8813244104385376, - "520": 0.842995285987854, - "521": 0.8618345856666565, - "522": 0.8467002511024475, - "523": 0.9257267713546753, - "524": 1.0691838264465332, - "525": 0.8687216639518738, - "526": 0.6504631042480469, - "527": 0.6196876764297485, - "528": 0.7249985933303833, - "529": 0.71677166223526, - "530": 0.7958369255065918, - "531": 0.9018344879150391, - "532": 0.9132189154624939, - "533": 0.9065372347831726, - "534": 0.9300948977470398, - "535": 1.0274509191513062, - "536": 0.893096387386322, - "537": 0.8255204558372498, - "538": 0.8568991422653198, - "539": 0.843839168548584 - }, - "loss": { - "486": 2.2444119453430176, - "487": 2.234156370162964, - "488": 2.232797622680664, - "489": 2.238800525665283, - "490": 2.227388858795166, - "491": 2.2598471641540527, - "492": 2.2319464683532715, - "493": 2.2023682594299316, - "494": 2.222175359725952, - "495": 2.23241925239563, - "496": 2.2589008808135986, - "497": 2.2227718830108643, - "498": 2.2694079875946045, - "499": 2.227041721343994, - "500": 2.2854886054992676, - "501": 2.2369472980499268, - "502": 2.218459129333496, - "503": 2.21677303314209, - "504": 2.232786178588867, - "505": 2.2118406295776367, - "506": 2.2002336978912354, - "507": 2.218547821044922, - "508": 2.221189260482788, - "509": 2.2538862228393555, - "510": 2.2390835285186768, - "511": 2.2270569801330566, - "512": 2.2375235557556152, - "513": 2.273449659347534, - "514": 2.2211389541625977, - "515": 2.2329139709472656, - "516": 2.198747396469116, - "517": 2.2132506370544434, - "518": 2.2233290672302246, - "519": 2.2301084995269775, - "520": 2.231362819671631, - "521": 2.2087626457214355, - "522": 2.2147560119628906, - "523": 2.2510318756103516, - "524": 2.2430527210235596, - "525": 2.26008939743042, - "526": 2.2032294273376465, - "527": 2.142625570297241, - "528": 2.169355630874634, - "529": 2.1630747318267822, - "530": 2.2113561630249023, - "531": 2.1988039016723633, - "532": 2.225597858428955, - "533": 2.230647087097168, - "534": 2.205254316329956, - "535": 2.188803195953369, - "536": 2.2361598014831543, - "537": 2.183591842651367, - "538": 2.192451000213623, - "539": 2.1907026767730713 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 0.433267, - 0.433455, - 0.43367, - 0.430449, - 0.43036, - 0.433356, - 0.43717, - 0.43984, - 0.442286, - 0.430498, - 0.430769, - 0.430204, - 0.420848, - 0.406305, - 0.419446, - 0.439078, - 0.440701, - 0.442522, - 0.437216, - 0.43514, - 0.436057, - 0.432686, - 0.4247, - 0.426957, - 0.425704, - 0.424303, - 0.423832, - 0.432198, - 0.432154, - 0.43064, - 0.436021, - 0.43568, - 0.429978, - 0.42931, - 0.432075, - 0.43042, - 0.431589, - 0.426344, - 0.414939, - 0.430639, - 0.444209, - 0.445477, - 0.439306, - 0.439767, - 0.435089, - 0.427329, - 0.426889, - 0.427463, - 0.425294, - 0.417306, - 0.428537, - 0.432672, - 0.430547, - 0.431464 - ], - "train_epoch_time": 4.845773220062256, - "train_loss": 2.203148044985712, - "train_score": 0.35310146151006305, - "val_loss": 2.276024468867568, - "val_score": 0.3372829365880837 - }, - { - "epoch": 10, - "grad_norm": 1.0875672101974487, - "learning_rate": 0.464, - "model_norm": 87.72252655029297, - "step_logs": { - "grad_norm": { - "540": 0.837222158908844, - "541": 0.7835797667503357, - "542": 0.7610028982162476, - "543": 0.7669121623039246, - "544": 0.8301094174385071, - "545": 0.9284971952438354, - "546": 0.8918527960777283, - "547": 0.866207480430603, - "548": 0.8836102485656738, - "549": 0.893557608127594, - "550": 0.878462553024292, - "551": 0.8611385822296143, - "552": 0.9313326478004456, - "553": 0.9181557297706604, - "554": 0.8733949065208435, - "555": 0.8756493926048279, - "556": 0.9271297454833984, - "557": 0.955710232257843, - "558": 0.9466307759284973, - "559": 0.8552358746528625, - "560": 0.9055444598197937, - "561": 0.8739283084869385, - "562": 0.8171818256378174, - "563": 0.8210787773132324, - "564": 0.827390193939209, - "565": 0.7717651128768921, - "566": 0.7284547090530396, - "567": 0.7656211256980896, - "568": 0.744737982749939, - "569": 0.8547093868255615, - "570": 0.9206417798995972, - "571": 1.081030011177063, - "572": 1.1768319606781006, - "573": 0.866636335849762, - "574": 0.8268339037895203, - "575": 0.8542226552963257, - "576": 0.9271738529205322, - "577": 0.9265819191932678, - "578": 0.9929571747779846, - "579": 0.9603354334831238, - "580": 0.899196207523346, - "581": 0.8540847897529602, - "582": 0.8603414297103882, - "583": 0.8398193120956421, - "584": 0.8098708391189575, - "585": 0.8223497867584229, - "586": 0.8144658207893372, - "587": 0.7410102486610413, - "588": 0.706733226776123, - "589": 0.7647109627723694, - "590": 0.8074276447296143, - "591": 0.8744938969612122, - "592": 0.9778786301612854, - "593": 1.0875672101974487 - }, - "loss": { - "540": 2.205142021179199, - "541": 2.193326950073242, - "542": 2.2021360397338867, - "543": 2.196610927581787, - "544": 2.167433261871338, - "545": 2.186608076095581, - "546": 2.2147414684295654, - "547": 2.201941967010498, - "548": 2.2095420360565186, - "549": 2.1882946491241455, - "550": 2.214224338531494, - "551": 2.200875759124756, - "552": 2.1780171394348145, - "553": 2.20267391204834, - "554": 2.1953084468841553, - "555": 2.2122104167938232, - "556": 2.195798873901367, - "557": 2.194793701171875, - "558": 2.176562786102295, - "559": 2.2067105770111084, - "560": 2.146296501159668, - "561": 2.2106218338012695, - "562": 2.1835570335388184, - "563": 2.1711323261260986, - "564": 2.19504451751709, - "565": 2.1459617614746094, - "566": 2.1423821449279785, - "567": 2.15317702293396, - "568": 2.170269012451172, - "569": 2.1745219230651855, - "570": 2.202603816986084, - "571": 2.1873385906219482, - "572": 2.2740583419799805, - "573": 2.20631742477417, - "574": 2.1620872020721436, - "575": 2.154696464538574, - "576": 2.1710996627807617, - "577": 2.1926236152648926, - "578": 2.2058892250061035, - "579": 2.2136383056640625, - "580": 2.1933369636535645, - "581": 2.171816825866699, - "582": 2.1926708221435547, - "583": 2.145845413208008, - "584": 2.184480667114258, - "585": 2.1450212001800537, - "586": 2.1509454250335693, - "587": 2.1145853996276855, - "588": 2.1454873085021973, - "589": 2.128196954727173, - "590": 2.1574501991271973, - "591": 2.159390449523926, - "592": 2.1849117279052734, - "593": 2.183688163757324 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 0.432132, - 0.435703, - 0.437318, - 0.436862, - 0.432127, - 0.425115, - 0.428313, - 0.430006, - 0.428843, - 0.427788, - 0.429289, - 0.430359, - 0.424756, - 0.426161, - 0.429385, - 0.429466, - 0.425368, - 0.423146, - 0.423545, - 0.430867, - 0.426221, - 0.429568, - 0.43326, - 0.43282, - 0.432693, - 0.435929, - 0.438786, - 0.436435, - 0.438029, - 0.430451, - 0.425971, - 0.41283, - 0.406557, - 0.430037, - 0.432288, - 0.4302, - 0.424963, - 0.425359, - 0.420405, - 0.423105, - 0.427443, - 0.430457, - 0.4303, - 0.431125, - 0.433783, - 0.432375, - 0.433018, - 0.437635, - 0.440224, - 0.436193, - 0.433602, - 0.428771, - 0.42123, - 0.412201 - ], - "train_epoch_time": 4.845861434936523, - "train_loss": 2.2146974980232534, - "train_score": 0.3436345499078376, - "val_loss": 2.3004243965127027, - "val_score": 0.3174153278944276 - }, - { - "epoch": 11, - "grad_norm": 0.6724064946174622, - "learning_rate": 0.464, - "model_norm": 87.78770446777344, - "step_logs": { - "grad_norm": { - "594": 1.0580754280090332, - "595": 0.9212614893913269, - "596": 0.9642578363418579, - "597": 0.9247661828994751, - "598": 0.8898668885231018, - "599": 0.764549195766449, - "600": 0.7429081797599792, - "601": 0.809561014175415, - "602": 0.8214095830917358, - "603": 0.7443106174468994, - "604": 0.7142700552940369, - "605": 0.7997999787330627, - "606": 0.8533837199211121, - "607": 0.8903974890708923, - "608": 0.8421460390090942, - "609": 0.7767924070358276, - "610": 0.7984451055526733, - "611": 0.7904222011566162, - "612": 0.7703580856323242, - "613": 0.7888203859329224, - "614": 0.8999834060668945, - "615": 0.9519425630569458, - "616": 1.2772712707519531, - "617": 1.1403990983963013, - "618": 1.3531757593154907, - "619": 1.0410200357437134, - "620": 0.9213559031486511, - "621": 0.8604251146316528, - "622": 0.9573296308517456, - "623": 0.9610622525215149, - "624": 0.9523951411247253, - "625": 0.876518964767456, - "626": 0.8423910737037659, - "627": 0.8058450818061829, - "628": 0.7705907225608826, - "629": 0.8506250381469727, - "630": 0.927774965763092, - "631": 0.8788832426071167, - "632": 0.8165997266769409, - "633": 0.7511072754859924, - "634": 0.7412652373313904, - "635": 0.7697358727455139, - "636": 0.7523447871208191, - "637": 0.7207874059677124, - "638": 0.736746609210968, - "639": 0.7831798195838928, - "640": 0.782400906085968, - "641": 0.7954618334770203, - "642": 0.8386595249176025, - "643": 0.8775931596755981, - "644": 0.8895736336708069, - "645": 0.8963454961776733, - "646": 0.7848446369171143, - "647": 0.6724064946174622 - }, - "loss": { - "594": 2.2167015075683594, - "595": 2.191697835922241, - "596": 2.186465263366699, - "597": 2.1545634269714355, - "598": 2.1651735305786133, - "599": 2.123749256134033, - "600": 2.115161180496216, - "601": 2.1567606925964355, - "602": 2.1536784172058105, - "603": 2.1429710388183594, - "604": 2.1172914505004883, - "605": 2.147050380706787, - "606": 2.153956174850464, - "607": 2.1501283645629883, - "608": 2.1439740657806396, - "609": 2.1566271781921387, - "610": 2.159396171569824, - "611": 2.1103720664978027, - "612": 2.151888847351074, - "613": 2.110909938812256, - "614": 2.1499547958374023, - "615": 2.155078172683716, - "616": 2.1719589233398438, - "617": 2.2504055500030518, - "618": 2.256662607192993, - "619": 2.2294726371765137, - "620": 2.166477680206299, - "621": 2.1753811836242676, - "622": 2.1534321308135986, - "623": 2.1601600646972656, - "624": 2.170780658721924, - "625": 2.141709327697754, - "626": 2.1476693153381348, - "627": 2.162611961364746, - "628": 2.1294023990631104, - "629": 2.1327340602874756, - "630": 2.1269547939300537, - "631": 2.13181209564209, - "632": 2.134298324584961, - "633": 2.1343894004821777, - "634": 2.084883689880371, - "635": 2.1140456199645996, - "636": 2.1203877925872803, - "637": 2.126919746398926, - "638": 2.1183371543884277, - "639": 2.0925917625427246, - "640": 2.1059441566467285, - "641": 2.1270558834075928, - "642": 2.1192996501922607, - "643": 2.155471086502075, - "644": 2.113860607147217, - "645": 2.131554126739502, - "646": 2.135742425918579, - "647": 2.0920801162719727 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 0.415335, - 0.42575, - 0.422333, - 0.424875, - 0.427709, - 0.43615, - 0.437515, - 0.433443, - 0.432561, - 0.437746, - 0.439434, - 0.434001, - 0.430251, - 0.427435, - 0.430929, - 0.435717, - 0.434256, - 0.434179, - 0.436098, - 0.434299, - 0.426705, - 0.422758, - 0.395142, - 0.409145, - 0.390491, - 0.416976, - 0.425335, - 0.430046, - 0.422303, - 0.422126, - 0.422995, - 0.428351, - 0.430964, - 0.433781, - 0.435805, - 0.430144, - 0.424175, - 0.42802, - 0.43264, - 0.43719, - 0.437264, - 0.435672, - 0.43694, - 0.439115, - 0.437964, - 0.434456, - 0.434686, - 0.434044, - 0.430828, - 0.428481, - 0.426921, - 0.426688, - 0.4349, - 0.441846 - ], - "train_epoch_time": 4.8454203605651855, - "train_loss": 2.098674478489836, - "train_score": 0.38179698715593074, - "val_loss": 2.205912221039204, - "val_score": 0.35292587512259366 - }, - { - "epoch": 12, - "grad_norm": 0.5794128179550171, - "learning_rate": 0.464, - "model_norm": 87.84599304199219, - "step_logs": { - "grad_norm": { - "648": 0.7280871868133545, - "649": 0.7949674129486084, - "650": 0.8373463749885559, - "651": 0.8505271673202515, - "652": 0.8080264925956726, - "653": 0.7680744528770447, - "654": 0.7920119762420654, - "655": 0.7798787951469421, - "656": 0.8150591850280762, - "657": 0.8534115552902222, - "658": 0.9188917875289917, - "659": 0.9360884428024292, - "660": 0.914743185043335, - "661": 0.9153463840484619, - "662": 0.7891954183578491, - "663": 0.6947863698005676, - "664": 0.7170270681381226, - "665": 0.720542311668396, - "666": 0.6777104735374451, - "667": 0.6016910076141357, - "668": 0.6527369618415833, - "669": 0.6453735828399658, - "670": 0.6522302031517029, - "671": 0.7421714663505554, - "672": 0.7705524563789368, - "673": 0.773090124130249, - "674": 0.6778600215911865, - "675": 0.6144305467605591, - "676": 0.6275186538696289, - "677": 0.6657248139381409, - "678": 0.76212477684021, - "679": 0.7866000533103943, - "680": 0.7449837327003479, - "681": 0.6946861147880554, - "682": 0.713527262210846, - "683": 0.7362378835678101, - "684": 0.733871340751648, - "685": 0.723556637763977, - "686": 0.6378058195114136, - "687": 0.5687558054924011, - "688": 0.5698312520980835, - "689": 0.56536465883255, - "690": 0.5483888387680054, - "691": 0.5366402268409729, - "692": 0.6047871112823486, - "693": 0.6195173859596252, - "694": 0.6775323748588562, - "695": 0.7518928647041321, - "696": 0.7077875733375549, - "697": 0.6100155115127563, - "698": 0.5949641466140747, - "699": 0.6668450236320496, - "700": 0.6424546837806702, - "701": 0.5794128179550171 - }, - "loss": { - "648": 2.094564437866211, - "649": 2.105900764465332, - "650": 2.07905912399292, - "651": 2.12368106842041, - "652": 2.1148014068603516, - "653": 2.090708017349243, - "654": 2.1004831790924072, - "655": 2.1072311401367188, - "656": 2.1105024814605713, - "657": 2.0993008613586426, - "658": 2.109952688217163, - "659": 2.1300911903381348, - "660": 2.124464511871338, - "661": 2.088480234146118, - "662": 2.138444185256958, - "663": 2.117091178894043, - "664": 2.0913760662078857, - "665": 2.10170841217041, - "666": 2.056595802307129, - "667": 2.0336270332336426, - "668": 2.066512107849121, - "669": 2.0774474143981934, - "670": 2.0543394088745117, - "671": 2.084582805633545, - "672": 2.090348243713379, - "673": 2.061830520629883, - "674": 2.0692152976989746, - "675": 2.0532665252685547, - "676": 2.0165762901306152, - "677": 2.047377824783325, - "678": 2.040558338165283, - "679": 2.086064338684082, - "680": 2.0734877586364746, - "681": 2.067492961883545, - "682": 2.0537562370300293, - "683": 2.065237522125244, - "684": 2.0659971237182617, - "685": 2.0526604652404785, - "686": 2.0711114406585693, - "687": 2.083730697631836, - "688": 2.033512592315674, - "689": 2.0273308753967285, - "690": 2.0299129486083984, - "691": 2.0306572914123535, - "692": 2.001108169555664, - "693": 1.996739387512207, - "694": 2.0321741104125977, - "695": 2.06087327003479, - "696": 2.025920867919922, - "697": 2.066075325012207, - "698": 2.0253281593322754, - "699": 2.0574288368225098, - "700": 2.0433220863342285, - "701": 2.0183725357055664 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 0.438266, - 0.431293, - 0.425399, - 0.422627, - 0.422994, - 0.422852, - 0.418869, - 0.41722, - 0.412454, - 0.407264, - 0.400472, - 0.397163, - 0.396115, - 0.393113, - 0.399255, - 0.401752, - 0.397729, - 0.395044, - 0.394286, - 0.395173, - 0.39035, - 0.388138, - 0.385003, - 0.378229, - 0.374251, - 0.37128, - 0.373382, - 0.373394, - 0.36994, - 0.365854, - 0.358769, - 0.355432, - 0.354672, - 0.354207, - 0.350682, - 0.34718, - 0.344676, - 0.342392, - 0.34319, - 0.342919, - 0.339948, - 0.337351, - 0.33517, - 0.332796, - 0.327849, - 0.324655, - 0.32023, - 0.315145, - 0.313848, - 0.314488, - 0.312042, - 0.307366, - 0.305345, - 0.304297 - ], - "train_epoch_time": 4.844924688339233, - "train_loss": 2.0192010106452054, - "train_score": 0.4027382980460245, - "val_loss": 2.138548207200901, - "val_score": 0.3692460177823405 - }, - { - "epoch": 13, - "grad_norm": 0.435800164937973, - "learning_rate": 0.3093333333333334, - "model_norm": 87.88268280029297, - "step_logs": { - "grad_norm": { - "702": 0.6073912978172302, - "703": 0.6126645803451538, - "704": 0.5555345416069031, - "705": 0.5069809556007385, - "706": 0.48485493659973145, - "707": 0.5341897010803223, - "708": 0.5930553674697876, - "709": 0.5822255611419678, - "710": 0.5164859890937805, - "711": 0.482624351978302, - "712": 0.49150392413139343, - "713": 0.5376775860786438, - "714": 0.5280558466911316, - "715": 0.5728492736816406, - "716": 0.5391548275947571, - "717": 0.5202983617782593, - "718": 0.5325655341148376, - "719": 0.5430764555931091, - "720": 0.4934731125831604, - "721": 0.43142956495285034, - "722": 0.461704283952713, - "723": 0.4697701632976532, - "724": 0.490200012922287, - "725": 0.5546342730522156, - "726": 0.5595705509185791, - "727": 0.5540966987609863, - "728": 0.5454463958740234, - "729": 0.5378643274307251, - "730": 0.4859277606010437, - "731": 0.48428964614868164, - "732": 0.4919300377368927, - "733": 0.47724759578704834, - "734": 0.43840646743774414, - "735": 0.41453710198402405, - "736": 0.436782568693161, - "737": 0.44236692786216736, - "738": 0.40313801169395447, - "739": 0.42692598700523376, - "740": 0.4014340937137604, - "741": 0.42544159293174744, - "742": 0.427396297454834, - "743": 0.4532626271247864, - "744": 0.4493344724178314, - "745": 0.4231850504875183, - "746": 0.43626317381858826, - "747": 0.4043489396572113, - "748": 0.421360045671463, - "749": 0.3930183947086334, - "750": 0.39664310216903687, - "751": 0.40400469303131104, - "752": 0.4585326611995697, - "753": 0.4349212646484375, - "754": 0.4583161175251007, - "755": 0.435800164937973 - }, - "loss": { - "702": 2.0058786869049072, - "703": 2.018357276916504, - "704": 1.9854450225830078, - "705": 1.9827589988708496, - "706": 2.0101265907287598, - "707": 2.011603355407715, - "708": 2.0356409549713135, - "709": 2.007253646850586, - "710": 2.01055908203125, - "711": 2.0131640434265137, - "712": 2.0131869316101074, - "713": 2.0186500549316406, - "714": 1.9881646633148193, - "715": 2.021908760070801, - "716": 1.9943209886550903, - "717": 2.017972946166992, - "718": 2.008711338043213, - "719": 2.007040500640869, - "720": 2.002372980117798, - "721": 1.9961776733398438, - "722": 1.988890528678894, - "723": 1.9769232273101807, - "724": 1.9916582107543945, - "725": 1.9797203540802002, - "726": 1.9986188411712646, - "727": 1.9732967615127563, - "728": 1.9990925788879395, - "729": 2.0032875537872314, - "730": 2.014237880706787, - "731": 1.9672949314117432, - "732": 2.0122385025024414, - "733": 1.9859099388122559, - "734": 1.9855337142944336, - "735": 1.976871371269226, - "736": 1.9841015338897705, - "737": 1.9613080024719238, - "738": 1.9689198732376099, - "739": 1.9993987083435059, - "740": 2.0084848403930664, - "741": 1.9688761234283447, - "742": 2.0016400814056396, - "743": 2.0074009895324707, - "744": 1.9846971035003662, - "745": 2.0019288063049316, - "746": 1.977049708366394, - "747": 1.9792331457138062, - "748": 1.9722143411636353, - "749": 1.9319303035736084, - "750": 1.968343734741211, - "751": 1.956522822380066, - "752": 1.9476523399353027, - "753": 1.959883451461792, - "754": 1.953392744064331, - "755": 1.9551382064819336 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 0.300777, - 0.297978, - 0.296606, - 0.294991, - 0.292777, - 0.288966, - 0.284956, - 0.282386, - 0.281079, - 0.278979, - 0.276043, - 0.272408, - 0.269761, - 0.26622, - 0.264053, - 0.261695, - 0.258694, - 0.255744, - 0.2538, - 0.25192, - 0.24869, - 0.245767, - 0.242714, - 0.238937, - 0.236142, - 0.233412, - 0.23083, - 0.228178, - 0.226099, - 0.22326, - 0.220446, - 0.217789, - 0.21541, - 0.212828, - 0.209813, - 0.206928, - 0.204482, - 0.20149, - 0.198894, - 0.195851, - 0.19305, - 0.190035, - 0.187237, - 0.184635, - 0.181705, - 0.17911, - 0.176172, - 0.173504, - 0.17068, - 0.167805, - 0.164647, - 0.161983, - 0.159024, - 0.156335 - ], - "train_epoch_time": 4.8451550006866455, - "train_loss": 1.964778862451036, - "train_score": 0.4169980720690367, - "val_loss": 2.096801211447995, - "val_score": 0.3804714407014792 - }, - { - "epoch": 14, - "grad_norm": 0.34223562479019165, - "learning_rate": 0.1546666666666667, - "model_norm": 87.89469909667969, - "step_logs": { - "grad_norm": { - "756": 0.42412814497947693, - "757": 0.4141750931739807, - "758": 0.4284485876560211, - "759": 0.4527234435081482, - "760": 0.4311065375804901, - "761": 0.3678427040576935, - "762": 0.38308480381965637, - "763": 0.368010014295578, - "764": 0.404380738735199, - "765": 0.41867321729660034, - "766": 0.40771737694740295, - "767": 0.3747258484363556, - "768": 0.41306740045547485, - "769": 0.4015268385410309, - "770": 0.40441352128982544, - "771": 0.3970872759819031, - "772": 0.3616786599159241, - "773": 0.41753914952278137, - "774": 0.35900408029556274, - "775": 0.37756288051605225, - "776": 0.3805305063724518, - "777": 0.37806326150894165, - "778": 0.3603123724460602, - "779": 0.39038917422294617, - "780": 0.378569632768631, - "781": 0.37287214398384094, - "782": 0.39142125844955444, - "783": 0.4298277199268341, - "784": 0.37450164556503296, - "785": 0.3769410252571106, - "786": 0.3681488335132599, - "787": 0.3858988881111145, - "788": 0.38970568776130676, - "789": 0.3641791045665741, - "790": 0.3625100553035736, - "791": 0.3731885254383087, - "792": 0.3449625074863434, - "793": 0.35954350233078003, - "794": 0.36654889583587646, - "795": 0.3550361394882202, - "796": 0.36998769640922546, - "797": 0.35033097863197327, - "798": 0.34985867142677307, - "799": 0.36207813024520874, - "800": 0.340498685836792, - "801": 0.34276333451271057, - "802": 0.3665873408317566, - "803": 0.3412649631500244, - "804": 0.36181315779685974, - "805": 0.3816055357456207, - "806": 0.3566543459892273, - "807": 0.3667950928211212, - "808": 0.38622111082077026, - "809": 0.34223562479019165 - }, - "loss": { - "756": 1.9722751379013062, - "757": 1.9607598781585693, - "758": 1.961469054222107, - "759": 1.9577608108520508, - "760": 1.948209524154663, - "761": 1.965173602104187, - "762": 1.9657812118530273, - "763": 1.9519789218902588, - "764": 1.9616801738739014, - "765": 1.9620543718338013, - "766": 1.9820327758789062, - "767": 1.9642672538757324, - "768": 1.9898093938827515, - "769": 1.9398128986358643, - "770": 1.9555704593658447, - "771": 1.9282821416854858, - "772": 1.9710948467254639, - "773": 1.9526134729385376, - "774": 1.978273630142212, - "775": 1.9627265930175781, - "776": 1.954703688621521, - "777": 1.9501171112060547, - "778": 1.9729516506195068, - "779": 1.964715838432312, - "780": 1.9740839004516602, - "781": 1.9470267295837402, - "782": 1.9209100008010864, - "783": 1.9696826934814453, - "784": 1.9587608575820923, - "785": 1.972074031829834, - "786": 1.923396348953247, - "787": 1.9671581983566284, - "788": 1.9669079780578613, - "789": 1.9630398750305176, - "790": 1.9407529830932617, - "791": 1.9612245559692383, - "792": 1.9570255279541016, - "793": 1.9639378786087036, - "794": 1.963686227798462, - "795": 1.9576197862625122, - "796": 1.936382532119751, - "797": 1.9589039087295532, - "798": 1.957580327987671, - "799": 1.9364898204803467, - "800": 1.9418058395385742, - "801": 1.949873924255371, - "802": 1.9419589042663574, - "803": 1.9290494918823242, - "804": 1.9263761043548584, - "805": 1.9481416940689087, - "806": 1.9577698707580566, - "807": 1.9249539375305176, - "808": 1.9796171188354492, - "809": 1.9654319286346436 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 0.153583, - 0.150801, - 0.147907, - 0.144966, - 0.142238, - 0.139671, - 0.13678, - 0.133992, - 0.131034, - 0.128151, - 0.125362, - 0.122621, - 0.119679, - 0.116862, - 0.114022, - 0.111196, - 0.108448, - 0.105476, - 0.102766, - 0.0998833, - 0.0970327, - 0.0941923, - 0.0913788, - 0.0884854, - 0.0856588, - 0.0828161, - 0.0799419, - 0.0770539, - 0.0742711, - 0.0714207, - 0.0685747, - 0.0657127, - 0.0628594, - 0.0600262, - 0.0571731, - 0.0543148, - 0.0514749, - 0.0486135, - 0.0457554, - 0.0429036, - 0.040042, - 0.0371912, - 0.0343335, - 0.0314726, - 0.0286175, - 0.0257578, - 0.0228954, - 0.0200373, - 0.0171752, - 0.0143133, - 0.0114525, - 0.00859001, - 0.00572716, - 0.00286395 - ], - "train_epoch_time": 4.844804048538208, - "train_loss": 1.9481386121068487, - "train_score": 0.42153985841544495, - "val_loss": 2.0832262726246147, - "val_score": 0.38454811286981017 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:32:08.945666", - "final_model_norm": 87.89469909667969, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:30:27.302467", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 3.4953649044036865, - "learning_rate": 4.64e-11, - "model_norm": 87.41687774658203, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.632101535797119, - "3": 9.477651596069336, - "4": 15.083366394042969, - "5": 6.223578929901123, - "6": 4.399288654327393, - "7": 6.111652851104736, - "8": 4.110215187072754, - "9": 6.012516498565674, - "10": 4.587062835693359, - "11": 4.777608871459961, - "12": 7.2164435386657715, - "13": 9.568192481994629, - "14": 3.8904850482940674, - "15": 5.508795261383057, - "16": 9.25052261352539, - "17": 6.2970147132873535, - "18": 10.49487590789795, - "19": 10.287344932556152, - "20": 13.054203033447266, - "21": 3.6671366691589355, - "22": 15.062812805175781, - "23": 6.225735187530518, - "24": 7.710809707641602, - "25": 7.015965461730957, - "26": 4.360213756561279, - "27": 16.566226959228516, - "28": 7.702916622161865, - "29": 9.260043144226074, - "30": 4.714838027954102, - "31": 3.2972371578216553, - "32": 5.28471040725708, - "33": 2.4057743549346924, - "34": 5.037460803985596, - "35": 7.777568340301514, - "36": 5.691040515899658, - "37": 5.113794326782227, - "38": 5.331128120422363, - "39": 23.418071746826172, - "40": 13.3868408203125, - "41": 3.1640076637268066, - "42": 8.675661087036133, - "43": 4.468127250671387, - "44": 4.63825798034668, - "45": 5.239930152893066, - "46": 8.783048629760742, - "47": 7.4971137046813965, - "48": 4.174231052398682, - "49": 2.8159408569335938, - "50": 4.93419885635376, - "51": 3.471348524093628, - "52": 7.316342353820801, - "53": 3.4953649044036865 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.8128108978271484, - "3": 3.8596410751342773, - "4": 4.148884296417236, - "5": 4.280588150024414, - "6": 3.5731735229492188, - "7": 3.730853796005249, - "8": 3.9655306339263916, - "9": 3.49727725982666, - "10": 3.813404083251953, - "11": 3.6488544940948486, - "12": 6.760714530944824, - "13": 3.6842222213745117, - "14": 4.1691131591796875, - "15": 3.6978936195373535, - "16": 4.105098247528076, - "17": 5.215545654296875, - "18": 4.9595465660095215, - "19": 4.571078300476074, - "20": 4.288591384887695, - "21": 3.841993570327759, - "22": 4.388175964355469, - "23": 3.681670665740967, - "24": 3.673552989959717, - "25": 3.8043699264526367, - "26": 4.5079240798950195, - "27": 5.53628396987915, - "28": 3.921935796737671, - "29": 4.44952917098999, - "30": 3.9125852584838867, - "31": 4.293788433074951, - "32": 3.923111915588379, - "33": 3.471259593963623, - "34": 3.690646171569824, - "35": 4.4711809158325195, - "36": 4.153311729431152, - "37": 3.850543975830078, - "38": 3.814574956893921, - "39": 5.762028694152832, - "40": 5.557949066162109, - "41": 3.583878755569458, - "42": 4.019481658935547, - "43": 4.102827072143555, - "44": 4.205428123474121, - "45": 4.144927024841309, - "46": 4.410046577453613, - "47": 4.4519453048706055, - "48": 3.869866371154785, - "49": 3.4741594791412354, - "50": 3.508986473083496, - "51": 3.837757110595703, - "52": 4.058722019195557, - "53": 3.5802788734436035 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 4.64e-11, - 0.00606328, - 0.0167652, - 0.0210278, - 0.0183967, - 0.0383495, - 0.048384, - 0.0490197, - 0.0641029, - 0.0583378, - 0.0738842, - 0.0773754, - 0.0779342, - 0.0482771, - 0.105127, - 0.0885962, - 0.0582833, - 0.0986183, - 0.0585115, - 0.0579794, - 0.0395945, - 0.145318, - 0.03252, - 0.100512, - 0.0794757, - 0.0927668, - 0.159917, - 0.0347504, - 0.0876193, - 0.074898, - 0.155455, - 0.210878, - 0.144365, - 0.243957, - 0.151349, - 0.101592, - 0.145088, - 0.158525, - 0.152414, - 0.0198606, - 0.0531471, - 0.248452, - 0.0838331, - 0.202471, - 0.199725, - 0.175231, - 0.0901816, - 0.116207, - 0.222408, - 0.299368, - 0.1778, - 0.268447, - 0.114293, - 0.258973 - ], - "train_epoch_time": 4.845700025558472, - "train_loss": 3.511465792197579, - "train_score": 0.14736706425931567, - "val_loss": 3.5557357807794476, - "val_score": 0.14010924961413637 - }, - { - "epoch": 1, - "grad_norm": 1.9664967060089111, - "learning_rate": 0.464, - "model_norm": 87.36225891113281, - "step_logs": { - "grad_norm": { - "54": 5.270796298980713, - "55": 4.202520370483398, - "56": 10.859441757202148, - "57": 4.501511096954346, - "58": 4.667898178100586, - "59": 15.348048210144043, - "60": 8.129271507263184, - "61": 6.9826531410217285, - "62": 2.8545353412628174, - "63": 8.056032180786133, - "64": 5.776970863342285, - "65": 4.928697109222412, - "66": 2.7388803958892822, - "67": 6.421074390411377, - "68": 3.449686050415039, - "69": 4.1241865158081055, - "70": 8.620981216430664, - "71": 2.5956056118011475, - "72": 4.451788425445557, - "73": 4.842113018035889, - "74": 3.030059814453125, - "75": 2.548440933227539, - "76": 3.4492027759552, - "77": 3.1907198429107666, - "78": 3.236433506011963, - "79": 1.7249805927276611, - "80": 2.1449711322784424, - "81": 4.539532661437988, - "82": 2.4484031200408936, - "83": 4.745017051696777, - "84": 2.536980152130127, - "85": 1.7241555452346802, - "86": 2.8588666915893555, - "87": 2.6496448516845703, - "88": 3.7677969932556152, - "89": 2.689509391784668, - "90": 1.9279940128326416, - "91": 2.6054961681365967, - "92": 2.0998308658599854, - "93": 4.391077995300293, - "94": 1.9211870431900024, - "95": 1.905600905418396, - "96": 2.057025194168091, - "97": 2.375565767288208, - "98": 2.6710333824157715, - "99": 1.7314995527267456, - "100": 1.4255518913269043, - "101": 2.0370473861694336, - "102": 4.326293468475342, - "103": 1.6464062929153442, - "104": 2.397109031677246, - "105": 1.6464992761611938, - "106": 2.808077096939087, - "107": 1.9664967060089111 - }, - "loss": { - "54": 3.531726121902466, - "55": 3.464325428009033, - "56": 4.565157890319824, - "57": 4.1141228675842285, - "58": 4.4550676345825195, - "59": 4.6623029708862305, - "60": 4.204016208648682, - "61": 3.8621020317077637, - "62": 3.56404972076416, - "63": 3.872037410736084, - "64": 3.9847793579101562, - "65": 4.252335548400879, - "66": 3.3278677463531494, - "67": 4.12910795211792, - "68": 3.715725898742676, - "69": 3.4431114196777344, - "70": 4.531065940856934, - "71": 3.4832961559295654, - "72": 3.4611239433288574, - "73": 3.645770311355591, - "74": 3.7510828971862793, - "75": 3.6006414890289307, - "76": 3.3778152465820312, - "77": 3.5806689262390137, - "78": 3.6561760902404785, - "79": 3.119947910308838, - "80": 3.093405246734619, - "81": 3.556382179260254, - "82": 3.2736024856567383, - "83": 3.7718029022216797, - "84": 3.239236354827881, - "85": 3.1432065963745117, - "86": 3.0772080421447754, - "87": 3.390064239501953, - "88": 3.5369338989257812, - "89": 3.340794563293457, - "90": 3.168405771255493, - "91": 3.1797964572906494, - "92": 3.1759305000305176, - "93": 3.412914276123047, - "94": 3.1868836879730225, - "95": 2.9689383506774902, - "96": 3.100706100463867, - "97": 3.0078396797180176, - "98": 3.190727710723877, - "99": 3.143548011779785, - "100": 2.801726818084717, - "101": 2.9404473304748535, - "102": 3.4823737144470215, - "103": 3.0103201866149902, - "104": 2.8882369995117188, - "105": 3.028944253921509, - "106": 2.9876348972320557, - "107": 3.1696414947509766 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.16425, - 0.212577, - 0.0663517, - 0.21655, - 0.217362, - 0.0364729, - 0.099851, - 0.118099, - 0.303186, - 0.0949151, - 0.15766, - 0.199541, - 0.30467, - 0.139903, - 0.266204, - 0.216209, - 0.0965579, - 0.320283, - 0.199276, - 0.186196, - 0.295947, - 0.327115, - 0.255348, - 0.27958, - 0.278737, - 0.379934, - 0.344966, - 0.197925, - 0.32565, - 0.194558, - 0.317596, - 0.38051, - 0.287094, - 0.313417, - 0.240267, - 0.308855, - 0.364728, - 0.310305, - 0.350958, - 0.200805, - 0.36573, - 0.361438, - 0.352424, - 0.323282, - 0.305515, - 0.379934, - 0.397166, - 0.349556, - 0.206503, - 0.383818, - 0.317468, - 0.384219, - 0.287784, - 0.361638 - ], - "train_epoch_time": 4.842940807342529, - "train_loss": 3.0945656823633048, - "train_score": 0.2102369530342915, - "val_loss": 3.112576820272803, - "val_score": 0.20478885654193252 - }, - { - "epoch": 2, - "grad_norm": 1.458455204963684, - "learning_rate": 0.464, - "model_norm": 87.34402465820312, - "step_logs": { - "grad_norm": { - "108": 2.2182905673980713, - "109": 2.0108089447021484, - "110": 1.5361435413360596, - "111": 2.0051770210266113, - "112": 2.2770516872406006, - "113": 1.674286127090454, - "114": 3.0552706718444824, - "115": 1.5741430521011353, - "116": 1.4676458835601807, - "117": 1.5990766286849976, - "118": 2.2896153926849365, - "119": 1.7487237453460693, - "120": 1.3608037233352661, - "121": 1.4323080778121948, - "122": 1.7352120876312256, - "123": 1.8004534244537354, - "124": 1.522517204284668, - "125": 1.97489333152771, - "126": 1.500195026397705, - "127": 1.1285768747329712, - "128": 1.5444203615188599, - "129": 1.5232722759246826, - "130": 1.168062448501587, - "131": 1.5509209632873535, - "132": 1.750795602798462, - "133": 1.473191499710083, - "134": 1.5706579685211182, - "135": 1.6227079629898071, - "136": 1.541170358657837, - "137": 1.4366670846939087, - "138": 1.9454410076141357, - "139": 1.7283855676651, - "140": 2.0398221015930176, - "141": 1.3189575672149658, - "142": 1.5378857851028442, - "143": 1.6694495677947998, - "144": 1.3832626342773438, - "145": 1.410828948020935, - "146": 1.6587904691696167, - "147": 2.24880051612854, - "148": 1.2307977676391602, - "149": 1.3819741010665894, - "150": 1.4442795515060425, - "151": 1.5331897735595703, - "152": 1.118809461593628, - "153": 1.5031019449234009, - "154": 2.323068857192993, - "155": 1.6250786781311035, - "156": 1.6264251470565796, - "157": 1.6101235151290894, - "158": 1.5338894128799438, - "159": 1.05475652217865, - "160": 1.373598337173462, - "161": 1.458455204963684 - }, - "loss": { - "108": 3.109159469604492, - "109": 3.070673942565918, - "110": 2.8874285221099854, - "111": 2.941579818725586, - "112": 2.9926042556762695, - "113": 2.9000515937805176, - "114": 3.1140685081481934, - "115": 3.102757215499878, - "116": 2.8098974227905273, - "117": 2.8647236824035645, - "118": 2.926743507385254, - "119": 3.038897752761841, - "120": 2.8261895179748535, - "121": 2.7418599128723145, - "122": 2.8474154472351074, - "123": 2.8810558319091797, - "124": 2.926438093185425, - "125": 2.8318533897399902, - "126": 2.9802098274230957, - "127": 2.703420639038086, - "128": 2.761086940765381, - "129": 2.911237955093384, - "130": 2.716820001602173, - "131": 2.7275338172912598, - "132": 2.866767406463623, - "133": 2.8330001831054688, - "134": 2.7773144245147705, - "135": 2.815718650817871, - "136": 2.7914857864379883, - "137": 2.7595999240875244, - "138": 2.806817054748535, - "139": 2.9285173416137695, - "140": 2.8892822265625, - "141": 2.8088126182556152, - "142": 2.7513771057128906, - "143": 2.82440185546875, - "144": 2.8121166229248047, - "145": 2.7674131393432617, - "146": 2.8214759826660156, - "147": 2.9398136138916016, - "148": 2.803689479827881, - "149": 2.6837542057037354, - "150": 2.8016669750213623, - "151": 2.7459566593170166, - "152": 2.7188401222229004, - "153": 2.6839404106140137, - "154": 2.9162955284118652, - "155": 2.933291435241699, - "156": 2.7737374305725098, - "157": 2.7417349815368652, - "158": 2.8533411026000977, - "159": 2.656414031982422, - "160": 2.652618646621704, - "161": 2.7714900970458984 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.339384, - 0.355422, - 0.390047, - 0.352286, - 0.330965, - 0.379006, - 0.273675, - 0.391469, - 0.39394, - 0.384398, - 0.327787, - 0.376177, - 0.402774, - 0.395369, - 0.372593, - 0.367951, - 0.391968, - 0.351642, - 0.394826, - 0.41828, - 0.386532, - 0.39159, - 0.415581, - 0.385191, - 0.371775, - 0.393978, - 0.384719, - 0.381278, - 0.387505, - 0.395391, - 0.353435, - 0.375205, - 0.347799, - 0.405704, - 0.386851, - 0.377563, - 0.40074, - 0.397647, - 0.378388, - 0.331644, - 0.412315, - 0.398249, - 0.395657, - 0.387117, - 0.419222, - 0.388189, - 0.32463, - 0.383829, - 0.379937, - 0.380524, - 0.389489, - 0.422909, - 0.398277, - 0.393869 - ], - "train_epoch_time": 4.842704772949219, - "train_loss": 2.790942992370474, - "train_score": 0.22029120336304095, - "val_loss": 2.8064462073771743, - "val_score": 0.2155568311600406 - }, - { - "epoch": 3, - "grad_norm": 1.433650016784668, - "learning_rate": 0.464, - "model_norm": 87.3509292602539, - "step_logs": { - "grad_norm": { - "162": 1.4366334676742554, - "163": 1.213464617729187, - "164": 1.1782711744308472, - "165": 1.8073817491531372, - "166": 1.3293166160583496, - "167": 1.121290922164917, - "168": 1.6984219551086426, - "169": 1.644121527671814, - "170": 1.2490148544311523, - "171": 1.6104655265808105, - "172": 1.358503818511963, - "173": 1.0819065570831299, - "174": 1.1684364080429077, - "175": 1.2244898080825806, - "176": 1.1080474853515625, - "177": 1.0779063701629639, - "178": 1.114522933959961, - "179": 1.3154613971710205, - "180": 1.5206512212753296, - "181": 1.2605290412902832, - "182": 1.306101679801941, - "183": 1.2241108417510986, - "184": 0.9293332099914551, - "185": 0.9950765371322632, - "186": 1.1068443059921265, - "187": 1.3171552419662476, - "188": 1.0813138484954834, - "189": 1.0367534160614014, - "190": 1.4814493656158447, - "191": 1.3181469440460205, - "192": 0.8359196186065674, - "193": 0.7603257298469543, - "194": 1.0790683031082153, - "195": 1.3632005453109741, - "196": 1.3106358051300049, - "197": 1.3817201852798462, - "198": 1.1835811138153076, - "199": 0.970302164554596, - "200": 1.0245310068130493, - "201": 1.3890938758850098, - "202": 1.2016632556915283, - "203": 0.9569770097732544, - "204": 0.9658185243606567, - "205": 1.333294153213501, - "206": 1.220920443534851, - "207": 1.174423098564148, - "208": 1.1236298084259033, - "209": 0.8445212244987488, - "210": 0.9995211362838745, - "211": 1.2012298107147217, - "212": 1.110680341720581, - "213": 0.979672908782959, - "214": 0.9985975623130798, - "215": 1.433650016784668 - }, - "loss": { - "162": 2.793078899383545, - "163": 2.6818556785583496, - "164": 2.660399913787842, - "165": 2.7539844512939453, - "166": 2.8909783363342285, - "167": 2.609816551208496, - "168": 2.7761762142181396, - "169": 2.8091959953308105, - "170": 2.758303642272949, - "171": 2.7061643600463867, - "172": 2.8290352821350098, - "173": 2.649831533432007, - "174": 2.6693527698516846, - "175": 2.66512393951416, - "176": 2.661381244659424, - "177": 2.620033025741577, - "178": 2.662315845489502, - "179": 2.6457929611206055, - "180": 2.728728771209717, - "181": 2.7737159729003906, - "182": 2.667226791381836, - "183": 2.727423667907715, - "184": 2.612975597381592, - "185": 2.5970468521118164, - "186": 2.6217851638793945, - "187": 2.660849094390869, - "188": 2.6600699424743652, - "189": 2.6397581100463867, - "190": 2.6401283740997314, - "191": 2.779290199279785, - "192": 2.604036331176758, - "193": 2.541745185852051, - "194": 2.58569073677063, - "195": 2.6986641883850098, - "196": 2.70920467376709, - "197": 2.6580862998962402, - "198": 2.7164857387542725, - "199": 2.5856382846832275, - "200": 2.6239733695983887, - "201": 2.6230173110961914, - "202": 2.7349560260772705, - "203": 2.567758083343506, - "204": 2.5872833728790283, - "205": 2.6019580364227295, - "206": 2.7150871753692627, - "207": 2.612663745880127, - "208": 2.682765007019043, - "209": 2.571889877319336, - "210": 2.574979305267334, - "211": 2.653383255004883, - "212": 2.615011692047119, - "213": 2.5925087928771973, - "214": 2.577688694000244, - "215": 2.6449642181396484 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 0.396096, - 0.411573, - 0.413891, - 0.363869, - 0.406373, - 0.417354, - 0.373873, - 0.37932, - 0.410179, - 0.379597, - 0.403006, - 0.420868, - 0.414783, - 0.41043, - 0.41914, - 0.420716, - 0.41868, - 0.40287, - 0.387765, - 0.409568, - 0.404047, - 0.411544, - 0.430953, - 0.426292, - 0.418618, - 0.403035, - 0.421062, - 0.423951, - 0.388982, - 0.405227, - 0.436807, - 0.440744, - 0.420109, - 0.400084, - 0.404499, - 0.397726, - 0.414419, - 0.427856, - 0.424595, - 0.396355, - 0.413366, - 0.428541, - 0.428185, - 0.400517, - 0.411576, - 0.413372, - 0.418326, - 0.435952, - 0.425684, - 0.412018, - 0.418227, - 0.4273, - 0.425785, - 0.393126 - ], - "train_epoch_time": 4.842948913574219, - "train_loss": 2.715890288866062, - "train_score": 0.21206397948682393, - "val_loss": 2.7603072714997214, - "val_score": 0.20775330104318743 - }, - { - "epoch": 4, - "grad_norm": 1.0544430017471313, - "learning_rate": 0.464, - "model_norm": 87.37629699707031, - "step_logs": { - "grad_norm": { - "216": 1.1416711807250977, - "217": 0.9183707237243652, - "218": 1.1421544551849365, - "219": 1.3010965585708618, - "220": 1.0878509283065796, - "221": 0.929415762424469, - "222": 0.8989261388778687, - "223": 0.921040415763855, - "224": 1.1952918767929077, - "225": 1.0492099523544312, - "226": 0.9612029194831848, - "227": 1.0214899778366089, - "228": 0.9509407877922058, - "229": 1.140182375907898, - "230": 1.0640504360198975, - "231": 1.034746766090393, - "232": 0.9620423913002014, - "233": 0.9955230951309204, - "234": 0.9503745436668396, - "235": 0.909529447555542, - "236": 0.9207335114479065, - "237": 1.0392158031463623, - "238": 0.923897922039032, - "239": 0.9228456020355225, - "240": 0.9023650884628296, - "241": 0.9074633717536926, - "242": 0.9456944465637207, - "243": 1.037077784538269, - "244": 0.9104472398757935, - "245": 0.8049510717391968, - "246": 0.7960094213485718, - "247": 0.8483934998512268, - "248": 0.8847818374633789, - "249": 0.874873697757721, - "250": 0.9503462910652161, - "251": 0.9728066921234131, - "252": 0.9684033393859863, - "253": 1.0017497539520264, - "254": 0.96277916431427, - "255": 0.9461209177970886, - "256": 0.8669742345809937, - "257": 0.8068379759788513, - "258": 0.8170136213302612, - "259": 0.9688451886177063, - "260": 1.0162771940231323, - "261": 0.997868001461029, - "262": 1.1000066995620728, - "263": 1.0799529552459717, - "264": 1.0200011730194092, - "265": 1.0192959308624268, - "266": 0.9461516737937927, - "267": 0.8776339888572693, - "268": 1.0877230167388916, - "269": 1.0544430017471313 - }, - "loss": { - "216": 2.714728355407715, - "217": 2.5691323280334473, - "218": 2.6134748458862305, - "219": 2.6674747467041016, - "220": 2.640254259109497, - "221": 2.5924925804138184, - "222": 2.5615062713623047, - "223": 2.5858359336853027, - "224": 2.601901054382324, - "225": 2.6623408794403076, - "226": 2.5938425064086914, - "227": 2.5729002952575684, - "228": 2.59918212890625, - "229": 2.5741467475891113, - "230": 2.672935724258423, - "231": 2.5458154678344727, - "232": 2.639704942703247, - "233": 2.570923328399658, - "234": 2.591862201690674, - "235": 2.5478522777557373, - "236": 2.595989227294922, - "237": 2.5573132038116455, - "238": 2.6101834774017334, - "239": 2.557218074798584, - "240": 2.5839805603027344, - "241": 2.5425610542297363, - "242": 2.609896183013916, - "243": 2.5647411346435547, - "244": 2.6072728633880615, - "245": 2.540611743927002, - "246": 2.5323143005371094, - "247": 2.5325565338134766, - "248": 2.553086042404175, - "249": 2.541210651397705, - "250": 2.5510361194610596, - "251": 2.551211357116699, - "252": 2.569657325744629, - "253": 2.563237190246582, - "254": 2.584824323654175, - "255": 2.564854383468628, - "256": 2.567793130874634, - "257": 2.524254560470581, - "258": 2.556800127029419, - "259": 2.531148672103882, - "260": 2.598818302154541, - "261": 2.566972494125366, - "262": 2.5986692905426025, - "263": 2.5702319145202637, - "264": 2.5863614082336426, - "265": 2.5713467597961426, - "266": 2.5680689811706543, - "267": 2.538209915161133, - "268": 2.5335302352905273, - "269": 2.6261777877807617 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 0.417495, - 0.431162, - 0.415844, - 0.404451, - 0.420295, - 0.430706, - 0.432357, - 0.431182, - 0.411569, - 0.423385, - 0.428583, - 0.424098, - 0.429345, - 0.415336, - 0.422482, - 0.422751, - 0.429096, - 0.425909, - 0.429293, - 0.431497, - 0.431322, - 0.422596, - 0.431279, - 0.430721, - 0.432389, - 0.431571, - 0.429829, - 0.42286, - 0.432127, - 0.43808, - 0.438542, - 0.435298, - 0.433185, - 0.433694, - 0.428781, - 0.427233, - 0.42778, - 0.425365, - 0.428361, - 0.429244, - 0.434493, - 0.437806, - 0.437501, - 0.427242, - 0.42483, - 0.42569, - 0.418763, - 0.419805, - 0.424393, - 0.424232, - 0.429283, - 0.433482, - 0.418643, - 0.422501 - ], - "train_epoch_time": 4.842125177383423, - "train_loss": 2.540635450806474, - "train_score": 0.25658738332636216, - "val_loss": 2.577581350893706, - "val_score": 0.2525159657069928 - }, - { - "epoch": 5, - "grad_norm": 0.7355007529258728, - "learning_rate": 0.464, - "model_norm": 87.41879272460938, - "step_logs": { - "grad_norm": { - "270": 0.8205832839012146, - "271": 0.8407583832740784, - "272": 0.9962707757949829, - "273": 0.8767731785774231, - "274": 0.7861824631690979, - "275": 0.8610600233078003, - "276": 1.0702558755874634, - "277": 0.9638050198554993, - "278": 0.784705400466919, - "279": 0.8742997646331787, - "280": 0.9360121488571167, - "281": 0.8274256587028503, - "282": 0.746822714805603, - "283": 0.8617690205574036, - "284": 1.086946964263916, - "285": 0.935854971408844, - "286": 0.6854451298713684, - "287": 0.6958264112472534, - "288": 0.8681105971336365, - "289": 0.948701024055481, - "290": 0.8501421213150024, - "291": 0.8043948411941528, - "292": 0.8479794263839722, - "293": 0.9081390500068665, - "294": 0.8667063117027283, - "295": 0.8414411544799805, - "296": 0.8328216075897217, - "297": 0.7659209966659546, - "298": 0.8017351627349854, - "299": 0.9452565312385559, - "300": 1.324703574180603, - "301": 1.0370687246322632, - "302": 0.7906301617622375, - "303": 0.8647352457046509, - "304": 0.8563400506973267, - "305": 0.8151233792304993, - "306": 0.9363498091697693, - "307": 0.9981499314308167, - "308": 0.9101542830467224, - "309": 0.918056845664978, - "310": 1.1482096910476685, - "311": 0.9801045060157776, - "312": 0.8164383172988892, - "313": 0.8930280208587646, - "314": 1.1796107292175293, - "315": 0.9580897092819214, - "316": 0.8253657817840576, - "317": 0.8682921528816223, - "318": 0.9067890048027039, - "319": 0.9489942193031311, - "320": 1.52891206741333, - "321": 0.8044270277023315, - "322": 0.7654194235801697, - "323": 0.7355007529258728 - }, - "loss": { - "270": 2.530991554260254, - "271": 2.53185772895813, - "272": 2.5386762619018555, - "273": 2.5862128734588623, - "274": 2.52093243598938, - "275": 2.532766580581665, - "276": 2.539736747741699, - "277": 2.6006762981414795, - "278": 2.509798049926758, - "279": 2.5284221172332764, - "280": 2.5356507301330566, - "281": 2.5504307746887207, - "282": 2.518136739730835, - "283": 2.5362398624420166, - "284": 2.533015727996826, - "285": 2.5774025917053223, - "286": 2.4998836517333984, - "287": 2.472355365753174, - "288": 2.501762866973877, - "289": 2.5282649993896484, - "290": 2.5422515869140625, - "291": 2.4945034980773926, - "292": 2.4808168411254883, - "293": 2.5010881423950195, - "294": 2.515791416168213, - "295": 2.5069451332092285, - "296": 2.506500720977783, - "297": 2.4782958030700684, - "298": 2.5075578689575195, - "299": 2.4994449615478516, - "300": 2.5394954681396484, - "301": 2.586496353149414, - "302": 2.483872175216675, - "303": 2.490853786468506, - "304": 2.4748849868774414, - "305": 2.4569995403289795, - "306": 2.4507343769073486, - "307": 2.500828742980957, - "308": 2.5189921855926514, - "309": 2.464787006378174, - "310": 2.4796557426452637, - "311": 2.5486512184143066, - "312": 2.4313340187072754, - "313": 2.4666738510131836, - "314": 2.4663496017456055, - "315": 2.5455098152160645, - "316": 2.4493112564086914, - "317": 2.4726908206939697, - "318": 2.452273368835449, - "319": 2.4863457679748535, - "320": 2.495359182357788, - "321": 2.5245702266693115, - "322": 2.4951186180114746, - "323": 2.480879306793213 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 0.437026, - 0.435774, - 0.425413, - 0.434067, - 0.439027, - 0.434492, - 0.420048, - 0.428492, - 0.439012, - 0.433589, - 0.429566, - 0.436797, - 0.441322, - 0.434484, - 0.418693, - 0.430093, - 0.444614, - 0.443835, - 0.433691, - 0.428602, - 0.43529, - 0.437662, - 0.434764, - 0.431026, - 0.43394, - 0.435467, - 0.436009, - 0.439845, - 0.437955, - 0.428465, - 0.399891, - 0.423176, - 0.438404, - 0.433788, - 0.434155, - 0.436608, - 0.42844, - 0.424743, - 0.431109, - 0.429896, - 0.41305, - 0.426689, - 0.436252, - 0.431625, - 0.410296, - 0.428178, - 0.435875, - 0.433346, - 0.43051, - 0.428031, - 0.381162, - 0.437956, - 0.440029, - 0.441657 - ], - "train_epoch_time": 4.842427968978882, - "train_loss": 2.459283773663054, - "train_score": 0.2681234307405425, - "val_loss": 2.5116698925311747, - "val_score": 0.2564850029044803 - }, - { - "epoch": 6, - "grad_norm": 0.9734485745429993, - "learning_rate": 0.464, - "model_norm": 87.46509552001953, - "step_logs": { - "grad_norm": { - "324": 0.7187559604644775, - "325": 0.8011908531188965, - "326": 0.9322064518928528, - "327": 0.9514461755752563, - "328": 0.8893263339996338, - "329": 0.9108681082725525, - "330": 0.925350546836853, - "331": 0.8453577756881714, - "332": 1.0446093082427979, - "333": 0.8957911729812622, - "334": 0.7204957008361816, - "335": 0.7012603282928467, - "336": 0.8112923502922058, - "337": 0.947418212890625, - "338": 1.1726282835006714, - "339": 1.1130179166793823, - "340": 0.866492748260498, - "341": 0.8699214458465576, - "342": 1.004923701286316, - "343": 0.9799953103065491, - "344": 0.8774306178092957, - "345": 1.2152811288833618, - "346": 0.771652340888977, - "347": 0.7924943566322327, - "348": 0.9351041316986084, - "349": 1.0074272155761719, - "350": 1.3154418468475342, - "351": 0.9169334173202515, - "352": 0.8625748157501221, - "353": 0.8255141973495483, - "354": 0.8864129781723022, - "355": 0.9081965088844299, - "356": 0.9370619654655457, - "357": 0.8638302683830261, - "358": 0.8353853225708008, - "359": 0.8667851686477661, - "360": 0.9097339510917664, - "361": 0.9536703824996948, - "362": 0.9217488765716553, - "363": 0.8648698329925537, - "364": 0.7994353175163269, - "365": 0.7991637587547302, - "366": 0.8861297965049744, - "367": 0.8647029995918274, - "368": 0.8331355452537537, - "369": 0.830400824546814, - "370": 0.8206387758255005, - "371": 0.9508046507835388, - "372": 1.0702282190322876, - "373": 0.9545499682426453, - "374": 0.937156617641449, - "375": 1.494293451309204, - "376": 0.825318455696106, - "377": 0.9734485745429993 - }, - "loss": { - "324": 2.457473039627075, - "325": 2.473825454711914, - "326": 2.4800539016723633, - "327": 2.5082457065582275, - "328": 2.4482905864715576, - "329": 2.454723358154297, - "330": 2.449164867401123, - "331": 2.4679617881774902, - "332": 2.4207777976989746, - "333": 2.496211528778076, - "334": 2.4067816734313965, - "335": 2.3856005668640137, - "336": 2.3871731758117676, - "337": 2.4347410202026367, - "338": 2.452162027359009, - "339": 2.510870933532715, - "340": 2.450366973876953, - "341": 2.4260406494140625, - "342": 2.4311933517456055, - "343": 2.4496262073516846, - "344": 2.428589344024658, - "345": 2.4351205825805664, - "346": 2.487351894378662, - "347": 2.4238204956054688, - "348": 2.4073033332824707, - "349": 2.454934597015381, - "350": 2.4357969760894775, - "351": 2.526148796081543, - "352": 2.3934249877929688, - "353": 2.381464958190918, - "354": 2.4217166900634766, - "355": 2.4232378005981445, - "356": 2.3913872241973877, - "357": 2.4414122104644775, - "358": 2.3776772022247314, - "359": 2.4333183765411377, - "360": 2.39973521232605, - "361": 2.430523157119751, - "362": 2.400848865509033, - "363": 2.4175333976745605, - "364": 2.353456974029541, - "365": 2.40130352973938, - "366": 2.3738744258880615, - "367": 2.410468578338623, - "368": 2.371448040008545, - "369": 2.4066576957702637, - "370": 2.370410919189453, - "371": 2.394441843032837, - "372": 2.408982276916504, - "373": 2.4111380577087402, - "374": 2.356574058532715, - "375": 2.4638688564300537, - "376": 2.455554723739624, - "377": 2.461418867111206 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 0.442423, - 0.437654, - 0.429116, - 0.42815, - 0.43165, - 0.430261, - 0.429188, - 0.434791, - 0.42007, - 0.431797, - 0.441888, - 0.442822, - 0.436104, - 0.427441, - 0.410585, - 0.416344, - 0.433205, - 0.432687, - 0.423215, - 0.425315, - 0.432212, - 0.406765, - 0.439586, - 0.437688, - 0.427937, - 0.423391, - 0.398347, - 0.43074, - 0.432787, - 0.435113, - 0.431518, - 0.430041, - 0.427576, - 0.433277, - 0.434419, - 0.432984, - 0.429625, - 0.426936, - 0.428795, - 0.432924, - 0.4365, - 0.437033, - 0.43093, - 0.43285, - 0.434495, - 0.435079, - 0.435308, - 0.42663, - 0.417902, - 0.426599, - 0.427074, - 0.383391, - 0.435945, - 0.425955 - ], - "train_epoch_time": 4.842965602874756, - "train_loss": 2.447208639538955, - "train_score": 0.26918489961234193, - "val_loss": 2.4942915494351654, - "val_score": 0.26127475561695995 - }, - { - "epoch": 7, - "grad_norm": 0.9931488037109375, - "learning_rate": 0.464, - "model_norm": 87.51716613769531, - "step_logs": { - "grad_norm": { - "378": 0.9711732268333435, - "379": 0.8608596920967102, - "380": 0.8385066390037537, - "381": 0.9775044322013855, - "382": 1.2387624979019165, - "383": 1.1272770166397095, - "384": 1.0324372053146362, - "385": 0.8504167795181274, - "386": 0.8226385712623596, - "387": 0.9326133131980896, - "388": 0.9151419401168823, - "389": 1.0062415599822998, - "390": 0.9342063069343567, - "391": 0.7492815256118774, - "392": 0.7576513290405273, - "393": 0.8583515882492065, - "394": 0.8657914400100708, - "395": 1.0114110708236694, - "396": 0.877298891544342, - "397": 0.6910508871078491, - "398": 0.7735267877578735, - "399": 0.7676224708557129, - "400": 0.8051762580871582, - "401": 0.9203561544418335, - "402": 0.9358859658241272, - "403": 1.0726943016052246, - "404": 0.9116992354393005, - "405": 1.0098536014556885, - "406": 0.8614804148674011, - "407": 0.8529787659645081, - "408": 0.8717355132102966, - "409": 0.8469391465187073, - "410": 0.9287498593330383, - "411": 0.8325091600418091, - "412": 0.887321949005127, - "413": 0.8735884428024292, - "414": 0.8441396355628967, - "415": 0.9033401012420654, - "416": 0.9610435962677002, - "417": 0.9324353337287903, - "418": 0.87081378698349, - "419": 0.8831162452697754, - "420": 0.8046960830688477, - "421": 0.8732675313949585, - "422": 0.9297661185264587, - "423": 0.9147859811782837, - "424": 1.2592324018478394, - "425": 1.0980116128921509, - "426": 1.0170390605926514, - "427": 0.942375898361206, - "428": 0.8641000986099243, - "429": 0.9221069812774658, - "430": 1.0613394975662231, - "431": 0.9931488037109375 - }, - "loss": { - "378": 2.4451165199279785, - "379": 2.3860435485839844, - "380": 2.3800244331359863, - "381": 2.38053035736084, - "382": 2.448452949523926, - "383": 2.4239790439605713, - "384": 2.434828519821167, - "385": 2.3803062438964844, - "386": 2.3709335327148438, - "387": 2.389134407043457, - "388": 2.4044575691223145, - "389": 2.393167495727539, - "390": 2.401137590408325, - "391": 2.3586223125457764, - "392": 2.3603355884552, - "393": 2.3524577617645264, - "394": 2.395683765411377, - "395": 2.3487906455993652, - "396": 2.4343361854553223, - "397": 2.320807933807373, - "398": 2.373628854751587, - "399": 2.3598885536193848, - "400": 2.339709520339966, - "401": 2.364112377166748, - "402": 2.373948335647583, - "403": 2.350099802017212, - "404": 2.371169090270996, - "405": 2.351757049560547, - "406": 2.369741916656494, - "407": 2.328277111053467, - "408": 2.352144718170166, - "409": 2.330667495727539, - "410": 2.3731205463409424, - "411": 2.354344367980957, - "412": 2.3447418212890625, - "413": 2.3495028018951416, - "414": 2.3375487327575684, - "415": 2.3459177017211914, - "416": 2.3344593048095703, - "417": 2.350660800933838, - "418": 2.330465793609619, - "419": 2.3261756896972656, - "420": 2.329648971557617, - "421": 2.300516128540039, - "422": 2.3484814167022705, - "423": 2.339043617248535, - "424": 2.3472890853881836, - "425": 2.4060328006744385, - "426": 2.3729686737060547, - "427": 2.3613250255584717, - "428": 2.3165369033813477, - "429": 2.332780361175537, - "430": 2.3335185050964355, - "431": 2.3769116401672363 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 0.425887, - 0.432813, - 0.434239, - 0.424472, - 0.405098, - 0.413686, - 0.421219, - 0.433447, - 0.435182, - 0.427863, - 0.429309, - 0.422526, - 0.427916, - 0.439717, - 0.439218, - 0.432569, - 0.432597, - 0.421419, - 0.432291, - 0.442859, - 0.438363, - 0.438593, - 0.435974, - 0.42839, - 0.427414, - 0.416669, - 0.429103, - 0.421587, - 0.432571, - 0.432635, - 0.431646, - 0.433077, - 0.427915, - 0.434337, - 0.430465, - 0.431484, - 0.433352, - 0.429351, - 0.424991, - 0.427331, - 0.431431, - 0.430514, - 0.435891, - 0.430864, - 0.427493, - 0.428439, - 0.401133, - 0.415677, - 0.421386, - 0.426764, - 0.431717, - 0.427822, - 0.417269, - 0.423252 - ], - "train_epoch_time": 4.842416763305664, - "train_loss": 2.3158105449320767, - "train_score": 0.3202194673515632, - "val_loss": 2.36853690645588, - "val_score": 0.308472660324193 - }, - { - "epoch": 8, - "grad_norm": 0.7776458859443665, - "learning_rate": 0.464, - "model_norm": 87.57430267333984, - "step_logs": { - "grad_norm": { - "432": 0.8584507703781128, - "433": 0.913176417350769, - "434": 1.148016333580017, - "435": 0.9464519023895264, - "436": 0.7887730002403259, - "437": 0.8173912167549133, - "438": 0.8222514390945435, - "439": 0.8680413365364075, - "440": 0.9511731266975403, - "441": 0.9565020203590393, - "442": 1.0319210290908813, - "443": 0.8324898481369019, - "444": 0.6966243386268616, - "445": 0.7177284955978394, - "446": 0.8587246537208557, - "447": 1.071374773979187, - "448": 1.1695053577423096, - "449": 0.9351375699043274, - "450": 0.9293837547302246, - "451": 0.8108306527137756, - "452": 0.7694725394248962, - "453": 0.7713205218315125, - "454": 0.8990504145622253, - "455": 0.9682612419128418, - "456": 0.8445351719856262, - "457": 0.7650795578956604, - "458": 0.8014143705368042, - "459": 0.8509171009063721, - "460": 0.9707989692687988, - "461": 0.861588716506958, - "462": 0.7892059087753296, - "463": 0.7764274477958679, - "464": 0.7194980978965759, - "465": 0.7126712799072266, - "466": 0.8123999238014221, - "467": 0.9879125356674194, - "468": 0.9892673492431641, - "469": 1.1081101894378662, - "470": 1.7786822319030762, - "471": 1.500467300415039, - "472": 1.124647855758667, - "473": 1.1007084846496582, - "474": 0.7789115905761719, - "475": 1.0154168605804443, - "476": 0.960903525352478, - "477": 0.8702136874198914, - "478": 0.9485010504722595, - "479": 0.8980382680892944, - "480": 0.8163084387779236, - "481": 0.8291359543800354, - "482": 0.7678986191749573, - "483": 0.6684704422950745, - "484": 0.6550145745277405, - "485": 0.7776458859443665 - }, - "loss": { - "432": 2.3186731338500977, - "433": 2.3324971199035645, - "434": 2.342672348022461, - "435": 2.375446319580078, - "436": 2.3103208541870117, - "437": 2.289398670196533, - "438": 2.2947425842285156, - "439": 2.277359962463379, - "440": 2.334648847579956, - "441": 2.3235483169555664, - "442": 2.3202476501464844, - "443": 2.3141045570373535, - "444": 2.2952592372894287, - "445": 2.2474730014801025, - "446": 2.271878719329834, - "447": 2.31207275390625, - "448": 2.340684175491333, - "449": 2.36981201171875, - "450": 2.3036441802978516, - "451": 2.2990965843200684, - "452": 2.3098270893096924, - "453": 2.274724006652832, - "454": 2.2898430824279785, - "455": 2.3211851119995117, - "456": 2.2781529426574707, - "457": 2.2576098442077637, - "458": 2.262582540512085, - "459": 2.285156488418579, - "460": 2.2788801193237305, - "461": 2.314042329788208, - "462": 2.2632551193237305, - "463": 2.260465621948242, - "464": 2.2466373443603516, - "465": 2.2557120323181152, - "466": 2.2535109519958496, - "467": 2.2910313606262207, - "468": 2.3230910301208496, - "469": 2.324202060699463, - "470": 2.337447166442871, - "471": 2.4766297340393066, - "472": 2.410165309906006, - "473": 2.3862979412078857, - "474": 2.286574363708496, - "475": 2.2892537117004395, - "476": 2.3257291316986084, - "477": 2.331841468811035, - "478": 2.2833876609802246, - "479": 2.284867525100708, - "480": 2.2700722217559814, - "481": 2.2664663791656494, - "482": 2.269594192504883, - "483": 2.269068479537964, - "484": 2.2483325004577637, - "485": 2.264895439147949 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 0.432136, - 0.428462, - 0.410431, - 0.426672, - 0.436715, - 0.434577, - 0.434313, - 0.430922, - 0.425725, - 0.425162, - 0.41935, - 0.433855, - 0.442304, - 0.440572, - 0.431506, - 0.416077, - 0.408607, - 0.427409, - 0.426867, - 0.435132, - 0.437955, - 0.437456, - 0.428878, - 0.424246, - 0.43258, - 0.437673, - 0.435331, - 0.432227, - 0.423379, - 0.431859, - 0.436153, - 0.436964, - 0.440454, - 0.440965, - 0.434479, - 0.422267, - 0.422689, - 0.413338, - 0.353118, - 0.383186, - 0.413639, - 0.415105, - 0.437094, - 0.420103, - 0.424867, - 0.43149, - 0.425139, - 0.42888, - 0.434416, - 0.433495, - 0.437622, - 0.443727, - 0.444329, - 0.436934 - ], - "train_epoch_time": 4.84152889251709, - "train_loss": 2.261845543736878, - "train_score": 0.34534388442805714, - "val_loss": 2.3263614760748146, - "val_score": 0.32416493275409175 - }, - { - "epoch": 9, - "grad_norm": 0.7436298131942749, - "learning_rate": 0.464, - "model_norm": 87.63212585449219, - "step_logs": { - "grad_norm": { - "486": 0.824658989906311, - "487": 0.7818910479545593, - "488": 0.8712555170059204, - "489": 1.0522475242614746, - "490": 0.9231565594673157, - "491": 0.8687759041786194, - "492": 0.9941099286079407, - "493": 1.0223734378814697, - "494": 0.9244717955589294, - "495": 1.058251976966858, - "496": 1.0190001726150513, - "497": 0.8731686472892761, - "498": 0.8970601558685303, - "499": 0.8075329661369324, - "500": 0.7713789939880371, - "501": 0.7968210577964783, - "502": 0.7654058933258057, - "503": 0.7904702425003052, - "504": 0.8240793943405151, - "505": 0.8736516833305359, - "506": 0.8800214529037476, - "507": 0.828270435333252, - "508": 0.7866398096084595, - "509": 0.7940694689750671, - "510": 0.8814888000488281, - "511": 0.9283365607261658, - "512": 0.8684666156768799, - "513": 0.7865447998046875, - "514": 0.7620531916618347, - "515": 0.7946272492408752, - "516": 0.8509487509727478, - "517": 1.0813733339309692, - "518": 1.0491727590560913, - "519": 0.7492812871932983, - "520": 0.7069373726844788, - "521": 0.781988263130188, - "522": 0.8679794669151306, - "523": 0.9445253610610962, - "524": 0.9281182289123535, - "525": 0.7825267314910889, - "526": 0.71170574426651, - "527": 0.7191166877746582, - "528": 0.8151636719703674, - "529": 0.8615965247154236, - "530": 0.8744245171546936, - "531": 0.8416059017181396, - "532": 0.897475004196167, - "533": 0.8868516087532043, - "534": 0.7889246344566345, - "535": 0.8108001947402954, - "536": 0.8731734752655029, - "537": 0.9442831873893738, - "538": 0.8500063419342041, - "539": 0.7436298131942749 - }, - "loss": { - "486": 2.2751922607421875, - "487": 2.2577123641967773, - "488": 2.268313407897949, - "489": 2.294137477874756, - "490": 2.2868940830230713, - "491": 2.2579355239868164, - "492": 2.2897439002990723, - "493": 2.3227405548095703, - "494": 2.272470474243164, - "495": 2.2700045108795166, - "496": 2.3085427284240723, - "497": 2.265313148498535, - "498": 2.2471089363098145, - "499": 2.2609357833862305, - "500": 2.253617525100708, - "501": 2.242844343185425, - "502": 2.2181596755981445, - "503": 2.243516206741333, - "504": 2.2470316886901855, - "505": 2.2407383918762207, - "506": 2.276848554611206, - "507": 2.2264628410339355, - "508": 2.244060516357422, - "509": 2.234928607940674, - "510": 2.2360825538635254, - "511": 2.2703027725219727, - "512": 2.2440080642700195, - "513": 2.2522902488708496, - "514": 2.2549145221710205, - "515": 2.2166194915771484, - "516": 2.2307488918304443, - "517": 2.222540855407715, - "518": 2.305981159210205, - "519": 2.2309727668762207, - "520": 2.2235610485076904, - "521": 2.1981966495513916, - "522": 2.212188243865967, - "523": 2.2313284873962402, - "524": 2.2742764949798584, - "525": 2.25014328956604, - "526": 2.212052345275879, - "527": 2.200212001800537, - "528": 2.240995407104492, - "529": 2.2436797618865967, - "530": 2.2521815299987793, - "531": 2.2249138355255127, - "532": 2.215155601501465, - "533": 2.2271718978881836, - "534": 2.231881618499756, - "535": 2.214655876159668, - "536": 2.2488224506378174, - "537": 2.2581586837768555, - "538": 2.2572288513183594, - "539": 2.2004570960998535 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 0.43391, - 0.436574, - 0.430571, - 0.417277, - 0.427077, - 0.430606, - 0.421768, - 0.420137, - 0.426764, - 0.416347, - 0.420156, - 0.430394, - 0.428407, - 0.434899, - 0.437218, - 0.435404, - 0.43721, - 0.435838, - 0.433598, - 0.430017, - 0.430063, - 0.433044, - 0.436101, - 0.435495, - 0.429384, - 0.426444, - 0.430436, - 0.436203, - 0.43784, - 0.435236, - 0.431504, - 0.413523, - 0.417737, - 0.438405, - 0.441004, - 0.435869, - 0.430024, - 0.424614, - 0.426521, - 0.436445, - 0.440594, - 0.440007, - 0.434135, - 0.430922, - 0.430122, - 0.432087, - 0.427903, - 0.428864, - 0.435805, - 0.434105, - 0.430165, - 0.425061, - 0.431925, - 0.438438 - ], - "train_epoch_time": 4.841368198394775, - "train_loss": 2.2177160761106647, - "train_score": 0.3403089131652197, - "val_loss": 2.2879242376947238, - "val_score": 0.32227235279729255 - }, - { - "epoch": 10, - "grad_norm": 0.9012609124183655, - "learning_rate": 0.464, - "model_norm": 87.6935043334961, - "step_logs": { - "grad_norm": { - "540": 0.779034435749054, - "541": 0.7930557131767273, - "542": 0.8253334164619446, - "543": 0.8134466409683228, - "544": 1.0176053047180176, - "545": 1.0052220821380615, - "546": 0.8841612935066223, - "547": 0.8768597841262817, - "548": 0.9078478217124939, - "549": 0.8520333766937256, - "550": 0.7919204831123352, - "551": 0.816322386264801, - "552": 0.8913934230804443, - "553": 0.9353684782981873, - "554": 0.8901406526565552, - "555": 0.8221054673194885, - "556": 0.8405773639678955, - "557": 0.7872676849365234, - "558": 0.7312039732933044, - "559": 0.7674193382263184, - "560": 0.7997778058052063, - "561": 0.859046459197998, - "562": 0.7318307161331177, - "563": 0.6345550417900085, - "564": 0.6962268352508545, - "565": 0.7579600811004639, - "566": 0.8382430076599121, - "567": 0.8840183615684509, - "568": 0.9224840998649597, - "569": 0.8858940601348877, - "570": 0.8380024433135986, - "571": 0.8245536088943481, - "572": 0.7209427356719971, - "573": 0.7269752025604248, - "574": 0.8401072025299072, - "575": 0.8850416541099548, - "576": 0.7944228053092957, - "577": 0.8254134058952332, - "578": 0.8889157772064209, - "579": 0.9426918625831604, - "580": 0.902326762676239, - "581": 0.8255059123039246, - "582": 0.7830792665481567, - "583": 0.7566782236099243, - "584": 0.8512044548988342, - "585": 0.9580790400505066, - "586": 0.870844841003418, - "587": 0.7903308272361755, - "588": 0.826323390007019, - "589": 0.8290290832519531, - "590": 0.9161742329597473, - "591": 0.8972206115722656, - "592": 0.9315125346183777, - "593": 0.9012609124183655 - }, - "loss": { - "540": 2.220630645751953, - "541": 2.203184127807617, - "542": 2.2084784507751465, - "543": 2.2190728187561035, - "544": 2.2008109092712402, - "545": 2.273651599884033, - "546": 2.23386549949646, - "547": 2.2281219959259033, - "548": 2.2046709060668945, - "549": 2.226468086242676, - "550": 2.203355073928833, - "551": 2.2306888103485107, - "552": 2.1902008056640625, - "553": 2.241281270980835, - "554": 2.234280586242676, - "555": 2.20111083984375, - "556": 2.2291407585144043, - "557": 2.213456392288208, - "558": 2.163346290588379, - "559": 2.178955078125, - "560": 2.203125238418579, - "561": 2.1998467445373535, - "562": 2.2070279121398926, - "563": 2.1700072288513184, - "564": 2.1725716590881348, - "565": 2.180734872817993, - "566": 2.1932973861694336, - "567": 2.220599412918091, - "568": 2.222261905670166, - "569": 2.229309558868408, - "570": 2.2023744583129883, - "571": 2.1994171142578125, - "572": 2.19333553314209, - "573": 2.154064655303955, - "574": 2.1894516944885254, - "575": 2.2035470008850098, - "576": 2.2258341312408447, - "577": 2.195603847503662, - "578": 2.2182822227478027, - "579": 2.222468137741089, - "580": 2.2159554958343506, - "581": 2.220186471939087, - "582": 2.186936616897583, - "583": 2.149834394454956, - "584": 2.2068819999694824, - "585": 2.204127788543701, - "586": 2.253419876098633, - "587": 2.150207042694092, - "588": 2.1659839153289795, - "589": 2.204482078552246, - "590": 2.189301013946533, - "591": 2.208019256591797, - "592": 2.1686458587646484, - "593": 2.2045767307281494 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 0.436334, - 0.435179, - 0.433015, - 0.433978, - 0.418335, - 0.42063, - 0.429157, - 0.429606, - 0.426969, - 0.431369, - 0.435258, - 0.433926, - 0.427978, - 0.425468, - 0.428727, - 0.433144, - 0.432216, - 0.435696, - 0.438838, - 0.436621, - 0.434718, - 0.430496, - 0.43927, - 0.44485, - 0.441164, - 0.437274, - 0.431899, - 0.428975, - 0.426141, - 0.428965, - 0.43204, - 0.43295, - 0.43982, - 0.439011, - 0.431714, - 0.428649, - 0.435362, - 0.43284, - 0.428582, - 0.42461, - 0.427554, - 0.433155, - 0.435659, - 0.436999, - 0.431159, - 0.423119, - 0.430396, - 0.434703, - 0.432377, - 0.432702, - 0.426099, - 0.427814, - 0.424587, - 0.427461 - ], - "train_epoch_time": 4.840528964996338, - "train_loss": 2.1896640532670095, - "train_score": 0.3543243364504454, - "val_loss": 2.26704915355459, - "val_score": 0.33408976829558373 - }, - { - "epoch": 11, - "grad_norm": 1.0583347082138062, - "learning_rate": 0.464, - "model_norm": 87.7582778930664, - "step_logs": { - "grad_norm": { - "594": 0.9341737031936646, - "595": 0.9632622003555298, - "596": 1.0863580703735352, - "597": 0.6909806132316589, - "598": 0.6440359950065613, - "599": 0.7869378328323364, - "600": 0.8006590008735657, - "601": 0.8540022373199463, - "602": 0.9666603803634644, - "603": 0.9440022706985474, - "604": 0.947803258895874, - "605": 1.02663254737854, - "606": 0.9005405902862549, - "607": 0.7349113821983337, - "608": 0.7511271834373474, - "609": 0.7522056698799133, - "610": 0.7426261901855469, - "611": 0.871614396572113, - "612": 0.8691319227218628, - "613": 0.8341525793075562, - "614": 0.7974764704704285, - "615": 0.803653359413147, - "616": 0.8657097220420837, - "617": 0.8455263376235962, - "618": 0.8263103365898132, - "619": 0.9229281544685364, - "620": 0.9237298369407654, - "621": 0.843511700630188, - "622": 0.7880929708480835, - "623": 0.8206955790519714, - "624": 0.8767843246459961, - "625": 0.9239310622215271, - "626": 1.0672721862792969, - "627": 1.097472906112671, - "628": 1.1559778451919556, - "629": 0.9792971014976501, - "630": 0.861211359500885, - "631": 0.9408342838287354, - "632": 0.870436429977417, - "633": 0.7878254055976868, - "634": 0.872065007686615, - "635": 0.9755440354347229, - "636": 1.0096006393432617, - "637": 0.8474461436271667, - "638": 0.885677695274353, - "639": 0.8158318996429443, - "640": 0.7676178812980652, - "641": 0.7629214525222778, - "642": 0.793358564376831, - "643": 0.7681413888931274, - "644": 0.8716553449630737, - "645": 0.9326581954956055, - "646": 1.056408405303955, - "647": 1.0583347082138062 - }, - "loss": { - "594": 2.216533660888672, - "595": 2.190443515777588, - "596": 2.199070453643799, - "597": 2.197890043258667, - "598": 2.1279449462890625, - "599": 2.1496286392211914, - "600": 2.1944611072540283, - "601": 2.1710777282714844, - "602": 2.2079710960388184, - "603": 2.204212188720703, - "604": 2.178293466567993, - "605": 2.2084767818450928, - "606": 2.221353054046631, - "607": 2.1685538291931152, - "608": 2.1588807106018066, - "609": 2.1699604988098145, - "610": 2.1372690200805664, - "611": 2.1742005348205566, - "612": 2.1818742752075195, - "613": 2.171539068222046, - "614": 2.1581578254699707, - "615": 2.121051549911499, - "616": 2.1752583980560303, - "617": 2.1521987915039062, - "618": 2.1538634300231934, - "619": 2.1955318450927734, - "620": 2.180851459503174, - "621": 2.1731820106506348, - "622": 2.160604476928711, - "623": 2.165349006652832, - "624": 2.1680428981781006, - "625": 2.1506142616271973, - "626": 2.1739940643310547, - "627": 2.222702980041504, - "628": 2.184702157974243, - "629": 2.184560775756836, - "630": 2.1318483352661133, - "631": 2.163470506668091, - "632": 2.174398422241211, - "633": 2.155637502670288, - "634": 2.1470487117767334, - "635": 2.175360679626465, - "636": 2.1443963050842285, - "637": 2.1520023345947266, - "638": 2.1553986072540283, - "639": 2.1636781692504883, - "640": 2.1627466678619385, - "641": 2.1270852088928223, - "642": 2.1557087898254395, - "643": 2.103940010070801, - "644": 2.1279478073120117, - "645": 2.1773481369018555, - "646": 2.149991035461426, - "647": 2.1986966133117676 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 0.425165, - 0.422481, - 0.412625, - 0.441737, - 0.443925, - 0.434931, - 0.434549, - 0.430453, - 0.422516, - 0.424211, - 0.423482, - 0.417747, - 0.427769, - 0.438654, - 0.437476, - 0.437532, - 0.437792, - 0.429206, - 0.429502, - 0.431894, - 0.434308, - 0.433384, - 0.429657, - 0.4308, - 0.432213, - 0.425685, - 0.425387, - 0.431244, - 0.43499, - 0.432769, - 0.428731, - 0.424874, - 0.413711, - 0.412182, - 0.406339, - 0.421111, - 0.429346, - 0.423775, - 0.429296, - 0.434946, - 0.428766, - 0.421245, - 0.417914, - 0.430657, - 0.427873, - 0.433092, - 0.436415, - 0.436302, - 0.434563, - 0.435655, - 0.428505, - 0.424642, - 0.414129, - 0.414958 - ], - "train_epoch_time": 4.841063022613525, - "train_loss": 2.1805734154825744, - "train_score": 0.357954851130673, - "val_loss": 2.2797273450822972, - "val_score": 0.3361482849487892 - }, - { - "epoch": 12, - "grad_norm": 0.5975656509399414, - "learning_rate": 0.464, - "model_norm": 87.81676483154297, - "step_logs": { - "grad_norm": { - "648": 1.1492356061935425, - "649": 1.0196889638900757, - "650": 0.8637744784355164, - "651": 0.8311303853988647, - "652": 0.7575616240501404, - "653": 0.711988091468811, - "654": 0.7553699612617493, - "655": 0.7828322649002075, - "656": 0.8395745754241943, - "657": 0.9479196667671204, - "658": 0.8363834023475647, - "659": 0.7534735202789307, - "660": 0.6846778988838196, - "661": 0.6148439645767212, - "662": 0.5814868211746216, - "663": 0.5759593844413757, - "664": 0.572715163230896, - "665": 0.6244068741798401, - "666": 0.6298314332962036, - "667": 0.5988413691520691, - "668": 0.571150004863739, - "669": 0.5969657897949219, - "670": 0.5965138673782349, - "671": 0.675101637840271, - "672": 0.7497316598892212, - "673": 0.687667965888977, - "674": 0.671347975730896, - "675": 0.6587310433387756, - "676": 0.6218955516815186, - "677": 0.6500345468521118, - "678": 0.7552685141563416, - "679": 0.8805782198905945, - "680": 0.9224926829338074, - "681": 0.7951744198799133, - "682": 0.7153221368789673, - "683": 0.7064132690429688, - "684": 0.6845642924308777, - "685": 0.6810371279716492, - "686": 0.6073359847068787, - "687": 0.555525004863739, - "688": 0.5570313930511475, - "689": 0.5753442049026489, - "690": 0.5745162963867188, - "691": 0.5818527936935425, - "692": 0.6150049567222595, - "693": 0.6485708951950073, - "694": 0.6962539553642273, - "695": 0.6749153733253479, - "696": 0.610314667224884, - "697": 0.5902243256568909, - "698": 0.618800938129425, - "699": 0.6315382122993469, - "700": 0.5988143086433411, - "701": 0.5975656509399414 - }, - "loss": { - "648": 2.201709508895874, - "649": 2.218527317047119, - "650": 2.1624703407287598, - "651": 2.16119384765625, - "652": 2.113049268722534, - "653": 2.1079556941986084, - "654": 2.107941150665283, - "655": 2.132331609725952, - "656": 2.1268858909606934, - "657": 2.1456098556518555, - "658": 2.158702850341797, - "659": 2.108194351196289, - "660": 2.1002771854400635, - "661": 2.093564033508301, - "662": 2.075716018676758, - "663": 2.065596103668213, - "664": 2.084507942199707, - "665": 2.0992631912231445, - "666": 2.0900769233703613, - "667": 2.1014933586120605, - "668": 2.06412410736084, - "669": 2.06667160987854, - "670": 2.093775987625122, - "671": 2.0799057483673096, - "672": 2.0605130195617676, - "673": 2.0615901947021484, - "674": 2.071089029312134, - "675": 2.075998544692993, - "676": 2.07181453704834, - "677": 2.0697274208068848, - "678": 2.1032066345214844, - "679": 2.063380479812622, - "680": 2.0770063400268555, - "681": 2.104832887649536, - "682": 2.113340377807617, - "683": 2.077195882797241, - "684": 2.0580098628997803, - "685": 2.0724377632141113, - "686": 2.067328691482544, - "687": 2.057271957397461, - "688": 2.0643768310546875, - "689": 2.065443515777588, - "690": 2.012594223022461, - "691": 2.035238742828369, - "692": 2.057743787765503, - "693": 2.062107563018799, - "694": 2.066854953765869, - "695": 2.0930724143981934, - "696": 2.0564115047454834, - "697": 2.0373926162719727, - "698": 2.0564770698547363, - "699": 2.0385019779205322, - "700": 2.0522985458374023, - "701": 2.055959701538086 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 0.407314, - 0.416165, - 0.424696, - 0.424511, - 0.426342, - 0.426612, - 0.421336, - 0.417327, - 0.411043, - 0.40139, - 0.406671, - 0.408694, - 0.409973, - 0.410932, - 0.409754, - 0.407268, - 0.404853, - 0.399886, - 0.396909, - 0.39575, - 0.394052, - 0.390263, - 0.387775, - 0.381483, - 0.37504, - 0.375496, - 0.373692, - 0.371648, - 0.370529, - 0.36668, - 0.359635, - 0.350499, - 0.345956, - 0.350055, - 0.351037, - 0.348525, - 0.346646, - 0.344234, - 0.344253, - 0.343227, - 0.340491, - 0.337203, - 0.334282, - 0.331444, - 0.327794, - 0.324036, - 0.319787, - 0.317986, - 0.31714, - 0.31494, - 0.311487, - 0.308336, - 0.306622, - 0.303957 - ], - "train_epoch_time": 4.840806722640991, - "train_loss": 2.0371761134570434, - "train_score": 0.3957204986507958, - "val_loss": 2.1470213062317036, - "val_score": 0.3678333100987904 - }, - { - "epoch": 13, - "grad_norm": 0.39106497168540955, - "learning_rate": 0.3093333333333334, - "model_norm": 87.85234069824219, - "step_logs": { - "grad_norm": { - "702": 0.5970368385314941, - "703": 0.6340122222900391, - "704": 0.6445062160491943, - "705": 0.6437075138092041, - "706": 0.6375112533569336, - "707": 0.5934171080589294, - "708": 0.5600729584693909, - "709": 0.5130089521408081, - "710": 0.5271528363227844, - "711": 0.5570048689842224, - "712": 0.5029557943344116, - "713": 0.48491641879081726, - "714": 0.4905167818069458, - "715": 0.5370640158653259, - "716": 0.5350328087806702, - "717": 0.5381401181221008, - "718": 0.519120454788208, - "719": 0.5183349251747131, - "720": 0.5376592874526978, - "721": 0.542233407497406, - "722": 0.5692716240882874, - "723": 0.5099456906318665, - "724": 0.4888961911201477, - "725": 0.4807478189468384, - "726": 0.48631423711776733, - "727": 0.5174635648727417, - "728": 0.5809503793716431, - "729": 0.6511600017547607, - "730": 0.6437714099884033, - "731": 0.5654249787330627, - "732": 0.5159339904785156, - "733": 0.4789518415927887, - "734": 0.436004638671875, - "735": 0.42933663725852966, - "736": 0.411679744720459, - "737": 0.4152348041534424, - "738": 0.4593541920185089, - "739": 0.45132145285606384, - "740": 0.42840060591697693, - "741": 0.4757876694202423, - "742": 0.44800931215286255, - "743": 0.4203818142414093, - "744": 0.4935603141784668, - "745": 0.47921067476272583, - "746": 0.4374750554561615, - "747": 0.4020148515701294, - "748": 0.38653868436813354, - "749": 0.4267551600933075, - "750": 0.404039591550827, - "751": 0.4053076207637787, - "752": 0.41479888558387756, - "753": 0.42037707567214966, - "754": 0.3903408348560333, - "755": 0.39106497168540955 - }, - "loss": { - "702": 2.047452926635742, - "703": 2.0433740615844727, - "704": 2.03933048248291, - "705": 2.0338144302368164, - "706": 2.038938522338867, - "707": 2.0089528560638428, - "708": 2.026998996734619, - "709": 2.0096888542175293, - "710": 2.027291774749756, - "711": 2.0200047492980957, - "712": 1.9866796731948853, - "713": 2.01995849609375, - "714": 2.03725266456604, - "715": 2.01417875289917, - "716": 2.0321667194366455, - "717": 2.0208499431610107, - "718": 2.0232436656951904, - "719": 2.0460236072540283, - "720": 2.026439666748047, - "721": 1.9988811016082764, - "722": 2.0238771438598633, - "723": 2.021852731704712, - "724": 2.0371651649475098, - "725": 1.994450330734253, - "726": 2.005601406097412, - "727": 2.0040032863616943, - "728": 2.0220701694488525, - "729": 2.0117228031158447, - "730": 2.02345609664917, - "731": 2.0317282676696777, - "732": 2.012515068054199, - "733": 1.9958631992340088, - "734": 2.0060486793518066, - "735": 2.0161843299865723, - "736": 1.993544578552246, - "737": 1.9995037317276, - "738": 1.9948532581329346, - "739": 2.022557020187378, - "740": 1.9958159923553467, - "741": 1.9894587993621826, - "742": 2.0125489234924316, - "743": 1.9957914352416992, - "744": 2.0075745582580566, - "745": 1.9775092601776123, - "746": 2.0013368129730225, - "747": 1.9752076864242554, - "748": 1.9865528345108032, - "749": 1.9957020282745361, - "750": 2.006993293762207, - "751": 1.9674067497253418, - "752": 2.0028648376464844, - "753": 1.984318733215332, - "754": 1.9704029560089111, - "755": 1.9741883277893066 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 0.301222, - 0.297501, - 0.294499, - 0.291801, - 0.289288, - 0.287577, - 0.28569, - 0.283906, - 0.280905, - 0.277513, - 0.275763, - 0.273406, - 0.27057, - 0.266899, - 0.264224, - 0.261382, - 0.258962, - 0.256257, - 0.253124, - 0.250222, - 0.247064, - 0.245255, - 0.242812, - 0.24007, - 0.237227, - 0.234012, - 0.230349, - 0.226463, - 0.223882, - 0.222314, - 0.220155, - 0.217783, - 0.215457, - 0.212726, - 0.210058, - 0.207218, - 0.203997, - 0.201297, - 0.198662, - 0.195432, - 0.192892, - 0.190285, - 0.186894, - 0.184182, - 0.181716, - 0.179122, - 0.176402, - 0.173334, - 0.170659, - 0.167804, - 0.164946, - 0.162081, - 0.159406, - 0.156576 - ], - "train_epoch_time": 4.84127140045166, - "train_loss": 1.9806178782557484, - "train_score": 0.4138080613770478, - "val_loss": 2.09881235482909, - "val_score": 0.38297843717543323 - }, - { - "epoch": 14, - "grad_norm": 0.33874234557151794, - "learning_rate": 0.1546666666666667, - "model_norm": 87.8645248413086, - "step_logs": { - "grad_norm": { - "756": 0.37414446473121643, - "757": 0.3537454307079315, - "758": 0.3970438241958618, - "759": 0.38566383719444275, - "760": 0.40795400738716125, - "761": 0.39117708802223206, - "762": 0.3797253370285034, - "763": 0.42360520362854004, - "764": 0.41331180930137634, - "765": 0.36566537618637085, - "766": 0.38493847846984863, - "767": 0.37495824694633484, - "768": 0.3491826057434082, - "769": 0.35057926177978516, - "770": 0.36475133895874023, - "771": 0.3751797378063202, - "772": 0.3661869466304779, - "773": 0.3664822578430176, - "774": 0.3681018352508545, - "775": 0.39027464389801025, - "776": 0.3661656975746155, - "777": 0.3489469289779663, - "778": 0.3828277289867401, - "779": 0.4010617733001709, - "780": 0.3795221149921417, - "781": 0.3674989938735962, - "782": 0.3609263002872467, - "783": 0.365070641040802, - "784": 0.37838661670684814, - "785": 0.3415158987045288, - "786": 0.3888396918773651, - "787": 0.33122503757476807, - "788": 0.3711661398410797, - "789": 0.3774604797363281, - "790": 0.35745200514793396, - "791": 0.41088002920150757, - "792": 0.3404195308685303, - "793": 0.3303207457065582, - "794": 0.37241995334625244, - "795": 0.3590289354324341, - "796": 0.35071736574172974, - "797": 0.3599846363067627, - "798": 0.33293843269348145, - "799": 0.3308292031288147, - "800": 0.3526424467563629, - "801": 0.34812334179878235, - "802": 0.37489041686058044, - "803": 0.33675456047058105, - "804": 0.36126208305358887, - "805": 0.34167638421058655, - "806": 0.33782336115837097, - "807": 0.35379254817962646, - "808": 0.33602678775787354, - "809": 0.33874234557151794 - }, - "loss": { - "756": 1.9569520950317383, - "757": 1.9686423540115356, - "758": 1.9982889890670776, - "759": 1.9629350900650024, - "760": 1.954088807106018, - "761": 1.9893782138824463, - "762": 1.9996222257614136, - "763": 1.9854071140289307, - "764": 1.9949249029159546, - "765": 1.9680728912353516, - "766": 1.9819000959396362, - "767": 1.9962029457092285, - "768": 1.9658184051513672, - "769": 1.9386069774627686, - "770": 2.0059897899627686, - "771": 1.9722788333892822, - "772": 1.9910156726837158, - "773": 1.9721592664718628, - "774": 1.9776965379714966, - "775": 1.984902262687683, - "776": 1.976353645324707, - "777": 1.9893345832824707, - "778": 1.9513378143310547, - "779": 1.9796507358551025, - "780": 1.9403984546661377, - "781": 1.9532275199890137, - "782": 1.9857637882232666, - "783": 1.9697333574295044, - "784": 2.002728223800659, - "785": 1.9567406177520752, - "786": 1.9737015962600708, - "787": 1.9744305610656738, - "788": 2.006575584411621, - "789": 1.9719974994659424, - "790": 1.9621679782867432, - "791": 1.9796128273010254, - "792": 1.9757723808288574, - "793": 1.971252679824829, - "794": 1.960754156112671, - "795": 1.98601496219635, - "796": 1.9481157064437866, - "797": 1.9799139499664307, - "798": 1.9645318984985352, - "799": 1.9503973722457886, - "800": 1.9789137840270996, - "801": 1.9745160341262817, - "802": 1.949885606765747, - "803": 1.945481300354004, - "804": 1.959337830543518, - "805": 1.9622700214385986, - "806": 1.973149299621582, - "807": 1.9708741903305054, - "808": 1.9373981952667236, - "809": 1.9534707069396973 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 0.153816, - 0.151074, - 0.148068, - 0.14527, - 0.142342, - 0.139592, - 0.136803, - 0.133803, - 0.131014, - 0.128327, - 0.125434, - 0.122629, - 0.119849, - 0.116997, - 0.114134, - 0.11126, - 0.108442, - 0.105594, - 0.102748, - 0.0998628, - 0.0970621, - 0.0942459, - 0.0913399, - 0.088471, - 0.0856528, - 0.0828239, - 0.0799871, - 0.0771315, - 0.0742714, - 0.0714525, - 0.0685602, - 0.0657562, - 0.0628763, - 0.0600177, - 0.0571773, - 0.0542938, - 0.0514777, - 0.0486258, - 0.045753, - 0.0429031, - 0.0400481, - 0.0371892, - 0.0343371, - 0.0314783, - 0.0286162, - 0.0257574, - 0.0228947, - 0.0200377, - 0.0171754, - 0.0143149, - 0.011453, - 0.00859025, - 0.00572744, - 0.00286396 - ], - "train_epoch_time": 4.8408122062683105, - "train_loss": 1.9635653591908546, - "train_score": 0.4184025287970238, - "val_loss": 2.086306357356355, - "val_score": 0.38613124337179927 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:33:50.676876", - "final_model_norm": 87.8645248413086, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:32:09.107956", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 10.867344856262207, - "learning_rate": 4.64e-11, - "model_norm": 87.38897705078125, - "step_logs": { - "grad_norm": { - "0": 22.7664794921875, - "1": 23.4499454498291, - "2": 6.445989608764648, - "3": 8.906142234802246, - "4": 17.763187408447266, - "5": 6.188925266265869, - "6": 4.13126277923584, - "7": 5.662048816680908, - "8": 4.080653667449951, - "9": 5.714040756225586, - "10": 3.9929022789001465, - "11": 7.0095601081848145, - "12": 8.02617073059082, - "13": 12.596332550048828, - "14": 6.581796646118164, - "15": 4.435499668121338, - "16": 17.17401695251465, - "17": 5.262341022491455, - "18": 4.078556060791016, - "19": 44.627567291259766, - "20": 4.673335552215576, - "21": 7.911524772644043, - "22": 5.466825485229492, - "23": 8.20952320098877, - "24": 21.045005798339844, - "25": 6.467375755310059, - "26": 6.189671993255615, - "27": 12.949029922485352, - "28": 8.57295036315918, - "29": 5.5378007888793945, - "30": 14.023530960083008, - "31": 6.726585865020752, - "32": 7.064029693603516, - "33": 7.079834938049316, - "34": 5.3993988037109375, - "35": 11.558791160583496, - "36": 8.70158863067627, - "37": 4.486657619476318, - "38": 17.66220474243164, - "39": 5.690054416656494, - "40": 7.140784740447998, - "41": 5.444159507751465, - "42": 10.740113258361816, - "43": 4.331324100494385, - "44": 10.871084213256836, - "45": 6.3053879737854, - "46": 5.462993144989014, - "47": 4.7902679443359375, - "48": 3.2392709255218506, - "49": 2.816694974899292, - "50": 11.274415016174316, - "51": 7.449441909790039, - "52": 4.374978542327881, - "53": 10.867344856262207 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.8003854751586914, - "3": 3.804929733276367, - "4": 4.3205342292785645, - "5": 4.274775505065918, - "6": 3.550309181213379, - "7": 3.7156550884246826, - "8": 3.9138545989990234, - "9": 3.5210957527160645, - "10": 3.758052349090576, - "11": 3.648374557495117, - "12": 6.392759323120117, - "13": 4.713771820068359, - "14": 4.208435535430908, - "15": 3.9465599060058594, - "16": 5.199435710906982, - "17": 4.328524589538574, - "18": 3.651263475418091, - "19": 5.360636234283447, - "20": 3.920323371887207, - "21": 4.008439064025879, - "22": 4.396872043609619, - "23": 3.7561728954315186, - "24": 5.591197967529297, - "25": 3.7877416610717773, - "26": 4.384615898132324, - "27": 4.973392009735107, - "28": 5.224878311157227, - "29": 5.193818092346191, - "30": 4.656402111053467, - "31": 4.017825603485107, - "32": 3.922196388244629, - "33": 4.70242977142334, - "34": 3.6501121520996094, - "35": 4.688882827758789, - "36": 4.468406677246094, - "37": 4.085740089416504, - "38": 6.512267112731934, - "39": 4.839545249938965, - "40": 4.314859390258789, - "41": 4.304404258728027, - "42": 5.375008583068848, - "43": 4.207009315490723, - "44": 6.017759799957275, - "45": 4.3909454345703125, - "46": 3.86588716506958, - "47": 4.739500045776367, - "48": 3.9381890296936035, - "49": 3.6182003021240234, - "50": 4.826262474060059, - "51": 4.624513626098633, - "52": 5.69394588470459, - "53": 5.001783847808838 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "step_size_list": [ - 4.64e-11, - 0.00593737, - 0.0168503, - 0.0215783, - 0.0157592, - 0.0384145, - 0.0491077, - 0.0507405, - 0.0641144, - 0.0602063, - 0.077537, - 0.0604964, - 0.0713351, - 0.03981, - 0.0778582, - 0.103344, - 0.0284914, - 0.104849, - 0.120999, - 0.00522371, - 0.122348, - 0.0772862, - 0.12053, - 0.0732249, - 0.0226777, - 0.101712, - 0.117461, - 0.0479651, - 0.091897, - 0.149968, - 0.040471, - 0.109807, - 0.102788, - 0.116347, - 0.139609, - 0.0577171, - 0.0872155, - 0.186017, - 0.0373316, - 0.163718, - 0.116242, - 0.164714, - 0.0752112, - 0.211163, - 0.0815104, - 0.144469, - 0.161224, - 0.212155, - 0.279551, - 0.303442, - 0.0652571, - 0.122622, - 0.260692, - 0.0716287 - ], - "train_epoch_time": 4.84460973739624, - "train_loss": 3.801257535682689, - "train_score": 0.13558890781494945, - "val_loss": 3.8098775253777664, - "val_score": 0.1370012920123154 - }, - { - "epoch": 1, - "grad_norm": 2.0714566707611084, - "learning_rate": 0.464, - "model_norm": 87.30049133300781, - "step_logs": { - "grad_norm": { - "54": 3.2989003658294678, - "55": 4.222681999206543, - "56": 7.397814750671387, - "57": 3.8926427364349365, - "58": 14.16972827911377, - "59": 6.420956134796143, - "60": 4.9557952880859375, - "61": 13.485730171203613, - "62": 4.930192470550537, - "63": 3.3796579837799072, - "64": 4.4158830642700195, - "65": 5.228013515472412, - "66": 8.382903099060059, - "67": 7.993526458740234, - "68": 6.3797197341918945, - "69": 11.84712028503418, - "70": 3.5176312923431396, - "71": 5.429211616516113, - "72": 3.579590082168579, - "73": 5.0470170974731445, - "74": 7.79220724105835, - "75": 5.338777542114258, - "76": 4.107653617858887, - "77": 12.636404991149902, - "78": 3.68166184425354, - "79": 2.7825984954833984, - "80": 3.896320343017578, - "81": 7.72820520401001, - "82": 2.3804285526275635, - "83": 13.873343467712402, - "84": 6.812201023101807, - "85": 3.397881507873535, - "86": 4.052323818206787, - "87": 3.012146472930908, - "88": 2.5745558738708496, - "89": 2.8760454654693604, - "90": 3.4528188705444336, - "91": 4.478063106536865, - "92": 2.996354103088379, - "93": 5.239041328430176, - "94": 3.2973387241363525, - "95": 3.3594624996185303, - "96": 3.310357093811035, - "97": 4.669572830200195, - "98": 3.144587278366089, - "99": 2.272308588027954, - "100": 5.3997416496276855, - "101": 2.291703939437866, - "102": 1.769398808479309, - "103": 2.8895630836486816, - "104": 2.5099306106567383, - "105": 5.2935075759887695, - "106": 2.0428225994110107, - "107": 2.0714566707611084 - }, - "loss": { - "54": 3.7783703804016113, - "55": 3.9213552474975586, - "56": 4.5158867835998535, - "57": 3.687530517578125, - "58": 6.772468566894531, - "59": 5.479622840881348, - "60": 4.148347854614258, - "61": 6.270169258117676, - "62": 4.35581111907959, - "63": 3.503696918487549, - "64": 3.747910976409912, - "65": 4.157194137573242, - "66": 4.164535999298096, - "67": 4.440500259399414, - "68": 3.896484851837158, - "69": 5.742057800292969, - "70": 3.98677396774292, - "71": 4.361114501953125, - "72": 4.1221923828125, - "73": 4.0096588134765625, - "74": 5.064618110656738, - "75": 5.140599250793457, - "76": 3.7918624877929688, - "77": 6.432853698730469, - "78": 4.206114768981934, - "79": 3.278982639312744, - "80": 3.826313018798828, - "81": 4.816197872161865, - "82": 3.3911356925964355, - "83": 4.876935005187988, - "84": 3.8991923332214355, - "85": 4.186374187469482, - "86": 3.430126428604126, - "87": 3.3390538692474365, - "88": 3.622817039489746, - "89": 3.3547282218933105, - "90": 3.3335962295532227, - "91": 3.782844066619873, - "92": 3.63641357421875, - "93": 4.060509204864502, - "94": 3.517932653427124, - "95": 3.6322638988494873, - "96": 3.4169673919677734, - "97": 3.781601905822754, - "98": 3.8285770416259766, - "99": 3.433558702468872, - "100": 3.5310988426208496, - "101": 3.4940805435180664, - "102": 2.991687297821045, - "103": 3.139760732650757, - "104": 3.3575704097747803, - "105": 3.537698745727539, - "106": 3.453799247741699, - "107": 3.001152753829956 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "step_size_list": [ - 0.27814, - 0.225797, - 0.121734, - 0.237544, - 0.058898, - 0.169, - 0.195489, - 0.0600328, - 0.202211, - 0.264188, - 0.210233, - 0.183739, - 0.0944086, - 0.106953, - 0.135539, - 0.0695566, - 0.269758, - 0.180681, - 0.269587, - 0.187563, - 0.122706, - 0.202944, - 0.228308, - 0.0686513, - 0.2655, - 0.299774, - 0.241606, - 0.11968, - 0.334375, - 0.0456875, - 0.123367, - 0.282956, - 0.219835, - 0.284593, - 0.325735, - 0.295159, - 0.253593, - 0.208086, - 0.295016, - 0.180669, - 0.270237, - 0.269633, - 0.266049, - 0.198484, - 0.290144, - 0.343989, - 0.159139, - 0.344031, - 0.373355, - 0.286959, - 0.323278, - 0.163518, - 0.36241, - 0.348426 - ], - "train_epoch_time": 4.842065095901489, - "train_loss": 3.0175992001762006, - "train_score": 0.20004595585242235, - "val_loss": 3.032195486239533, - "val_score": 0.19873887817156163 - }, - { - "epoch": 2, - "grad_norm": 0.7995576858520508, - "learning_rate": 0.464, - "model_norm": 87.32730865478516, - "step_logs": { - "grad_norm": { - "108": 1.7453795671463013, - "109": 2.3346517086029053, - "110": 1.7789217233657837, - "111": 1.5735795497894287, - "112": 1.9343189001083374, - "113": 2.1023476123809814, - "114": 1.7054225206375122, - "115": 1.8063548803329468, - "116": 1.4714947938919067, - "117": 1.2690017223358154, - "118": 1.2429780960083008, - "119": 1.3882229328155518, - "120": 1.2908350229263306, - "121": 1.7685282230377197, - "122": 1.3473387956619263, - "123": 1.6238688230514526, - "124": 1.3509222269058228, - "125": 1.2665413618087769, - "126": 1.188738465309143, - "127": 1.5948922634124756, - "128": 1.218187928199768, - "129": 0.8909094929695129, - "130": 1.313584327697754, - "131": 1.4468961954116821, - "132": 1.267411708831787, - "133": 1.5071102380752563, - "134": 1.4385671615600586, - "135": 1.036034345626831, - "136": 1.005440592765808, - "137": 1.2462759017944336, - "138": 1.0342200994491577, - "139": 0.9531611800193787, - "140": 1.0670197010040283, - "141": 1.552345633506775, - "142": 1.0417265892028809, - "143": 0.9641990661621094, - "144": 1.0335783958435059, - "145": 1.43638014793396, - "146": 1.1249327659606934, - "147": 0.7396571636199951, - "148": 0.7599216103553772, - "149": 1.0708014965057373, - "150": 1.2021658420562744, - "151": 1.2595103979110718, - "152": 1.0128695964813232, - "153": 0.7846184372901917, - "154": 0.9114413857460022, - "155": 1.3085514307022095, - "156": 1.051295518875122, - "157": 1.017424464225769, - "158": 1.0869580507278442, - "159": 1.28909170627594, - "160": 0.999721884727478, - "161": 0.7995576858520508 - }, - "loss": { - "108": 3.0271198749542236, - "109": 2.991933822631836, - "110": 3.1839370727539062, - "111": 2.8543524742126465, - "112": 3.0024099349975586, - "113": 2.972348690032959, - "114": 3.1099231243133545, - "115": 2.882803440093994, - "116": 3.00272274017334, - "117": 2.8202266693115234, - "118": 2.73990797996521, - "119": 2.8246216773986816, - "120": 2.8119752407073975, - "121": 2.8203301429748535, - "122": 2.9565765857696533, - "123": 2.7843191623687744, - "124": 2.9501309394836426, - "125": 2.731534242630005, - "126": 2.781210422515869, - "127": 2.719125747680664, - "128": 2.923259735107422, - "129": 2.7054128646850586, - "130": 2.6743741035461426, - "131": 2.852283477783203, - "132": 2.741102457046509, - "133": 2.7418837547302246, - "134": 2.8744490146636963, - "135": 2.71822190284729, - "136": 2.6969385147094727, - "137": 2.6778035163879395, - "138": 2.777622699737549, - "139": 2.650146484375, - "140": 2.680689811706543, - "141": 2.7095980644226074, - "142": 2.790527105331421, - "143": 2.6293399333953857, - "144": 2.6897921562194824, - "145": 2.6595771312713623, - "146": 2.801690101623535, - "147": 2.603154182434082, - "148": 2.5941762924194336, - "149": 2.620685338973999, - "150": 2.717017889022827, - "151": 2.687593936920166, - "152": 2.7148540019989014, - "153": 2.5804102420806885, - "154": 2.61409068107605, - "155": 2.657287120819092, - "156": 2.745023727416992, - "157": 2.630979537963867, - "158": 2.68674373626709, - "159": 2.6528375148773193, - "160": 2.747378349304199, - "161": 2.5929226875305176 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "step_size_list": [ - 0.376173, - 0.326152, - 0.377055, - 0.386261, - 0.359936, - 0.344986, - 0.381274, - 0.367498, - 0.397499, - 0.409723, - 0.410321, - 0.400591, - 0.407922, - 0.36905, - 0.406146, - 0.380415, - 0.405765, - 0.408363, - 0.415073, - 0.381256, - 0.415111, - 0.434431, - 0.403588, - 0.396485, - 0.408467, - 0.3892, - 0.397591, - 0.42506, - 0.426878, - 0.408967, - 0.425946, - 0.429815, - 0.422381, - 0.384638, - 0.425602, - 0.428823, - 0.424853, - 0.393228, - 0.419989, - 0.442428, - 0.441214, - 0.421241, - 0.413031, - 0.408113, - 0.4266, - 0.439665, - 0.43214, - 0.403655, - 0.424361, - 0.425189, - 0.421045, - 0.405125, - 0.427887, - 0.438895 - ], - "train_epoch_time": 4.841308832168579, - "train_loss": 2.5992508830777883, - "train_score": 0.2544487535953522, - "val_loss": 2.622389969404201, - "val_score": 0.24980266955087707 - }, - { - "epoch": 3, - "grad_norm": 0.9289785027503967, - "learning_rate": 0.464, - "model_norm": 87.36541748046875, - "step_logs": { - "grad_norm": { - "162": 0.7847865223884583, - "163": 0.88823401927948, - "164": 1.0743495225906372, - "165": 1.1417078971862793, - "166": 1.0487600564956665, - "167": 1.1039804220199585, - "168": 1.0553600788116455, - "169": 1.234736442565918, - "170": 1.0465025901794434, - "171": 0.7626662850379944, - "172": 0.8245648741722107, - "173": 1.2805877923965454, - "174": 1.0924804210662842, - "175": 0.6467738151550293, - "176": 0.5612294673919678, - "177": 0.8982486128807068, - "178": 1.001540184020996, - "179": 0.9854945540428162, - "180": 0.9586362242698669, - "181": 0.9121066331863403, - "182": 0.9199315905570984, - "183": 0.9640321135520935, - "184": 0.8909121751785278, - "185": 0.7867289185523987, - "186": 0.8402086496353149, - "187": 0.886196494102478, - "188": 0.8527680039405823, - "189": 0.8706535696983337, - "190": 0.8681197762489319, - "191": 0.9133608937263489, - "192": 1.1165709495544434, - "193": 1.1397994756698608, - "194": 1.094490885734558, - "195": 0.888022243976593, - "196": 0.9068925380706787, - "197": 1.2189306020736694, - "198": 1.0212775468826294, - "199": 0.8594958186149597, - "200": 0.9148173928260803, - "201": 1.2075608968734741, - "202": 1.0625265836715698, - "203": 0.7372179627418518, - "204": 0.707021951675415, - "205": 0.881438672542572, - "206": 0.9729852676391602, - "207": 0.9525948762893677, - "208": 0.851600170135498, - "209": 0.7714646458625793, - "210": 0.9278709292411804, - "211": 1.1792516708374023, - "212": 1.0500853061676025, - "213": 0.8017606139183044, - "214": 0.8323712944984436, - "215": 0.9289785027503967 - }, - "loss": { - "162": 2.5928449630737305, - "163": 2.6062660217285156, - "164": 2.6345601081848145, - "165": 2.6397294998168945, - "166": 2.6643354892730713, - "167": 2.648961067199707, - "168": 2.6670284271240234, - "169": 2.615281581878662, - "170": 2.736809730529785, - "171": 2.5692615509033203, - "172": 2.597013235092163, - "173": 2.5934057235717773, - "174": 2.754359006881714, - "175": 2.560157299041748, - "176": 2.519620656967163, - "177": 2.563488006591797, - "178": 2.651240348815918, - "179": 2.5946297645568848, - "180": 2.62282133102417, - "181": 2.5967390537261963, - "182": 2.612783432006836, - "183": 2.586153984069824, - "184": 2.632042407989502, - "185": 2.5606541633605957, - "186": 2.597174644470215, - "187": 2.5834105014801025, - "188": 2.614773988723755, - "189": 2.5667665004730225, - "190": 2.6083247661590576, - "191": 2.5700886249542236, - "192": 2.647434711456299, - "193": 2.6329309940338135, - "194": 2.6459603309631348, - "195": 2.605299949645996, - "196": 2.5579757690429688, - "197": 2.619462251663208, - "198": 2.6579835414886475, - "199": 2.5811972618103027, - "200": 2.58602237701416, - "201": 2.5991976261138916, - "202": 2.685232639312744, - "203": 2.5744965076446533, - "204": 2.529045820236206, - "205": 2.5680088996887207, - "206": 2.608891010284424, - "207": 2.5749635696411133, - "208": 2.5732386112213135, - "209": 2.5692808628082275, - "210": 2.5827243328094482, - "211": 2.58756160736084, - "212": 2.6649670600891113, - "213": 2.5771422386169434, - "214": 2.578545093536377, - "215": 2.5754103660583496 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "step_size_list": [ - 0.439765, - 0.433552, - 0.42119, - 0.416307, - 0.423445, - 0.419249, - 0.423016, - 0.408723, - 0.424583, - 0.440846, - 0.437431, - 0.404639, - 0.421615, - 0.447053, - 0.450922, - 0.432424, - 0.426558, - 0.426926, - 0.429118, - 0.431898, - 0.43157, - 0.428293, - 0.43366, - 0.439362, - 0.436475, - 0.433431, - 0.435876, - 0.434247, - 0.434851, - 0.431505, - 0.418299, - 0.41634, - 0.419897, - 0.433555, - 0.431791, - 0.410041, - 0.425283, - 0.43511, - 0.431596, - 0.410562, - 0.422763, - 0.442336, - 0.443656, - 0.433568, - 0.42797, - 0.428931, - 0.435523, - 0.440336, - 0.430692, - 0.41256, - 0.42336, - 0.438618, - 0.436773, - 0.43053 - ], - "train_epoch_time": 4.841070890426636, - "train_loss": 2.5811669449553087, - "train_score": 0.2214860563255964, - "val_loss": 2.615156807116608, - "val_score": 0.21387055117624088 - }, - { - "epoch": 4, - "grad_norm": 0.889293372631073, - "learning_rate": 0.464, - "model_norm": 87.40538787841797, - "step_logs": { - "grad_norm": { - "216": 0.754064679145813, - "217": 0.8650315999984741, - "218": 0.8457391858100891, - "219": 0.8708078861236572, - "220": 0.8217610120773315, - "221": 0.710178792476654, - "222": 0.7253114581108093, - "223": 0.9253487586975098, - "224": 0.8692904710769653, - "225": 0.715171754360199, - "226": 0.7517708539962769, - "227": 0.8607370257377625, - "228": 0.8758278489112854, - "229": 0.894248902797699, - "230": 0.9296528100967407, - "231": 0.8673824071884155, - "232": 0.8326968550682068, - "233": 0.8765765428543091, - "234": 0.8940305113792419, - "235": 0.924471914768219, - "236": 0.9716203808784485, - "237": 0.9140962362289429, - "238": 0.9629188776016235, - "239": 0.9863651990890503, - "240": 1.0309852361679077, - "241": 0.8867030143737793, - "242": 0.8277438282966614, - "243": 0.8383505940437317, - "244": 0.8075698614120483, - "245": 0.7585670948028564, - "246": 0.7546662092208862, - "247": 0.7675836086273193, - "248": 0.8228592276573181, - "249": 0.8094436526298523, - "250": 0.7157390117645264, - "251": 0.7271272540092468, - "252": 0.772714376449585, - "253": 0.7835621237754822, - "254": 0.8827233910560608, - "255": 0.803135335445404, - "256": 0.6097615361213684, - "257": 0.6437383890151978, - "258": 0.7866104245185852, - "259": 0.8246266841888428, - "260": 0.818315863609314, - "261": 0.7815305590629578, - "262": 0.7399082779884338, - "263": 0.8718881011009216, - "264": 0.8604130148887634, - "265": 0.8218329548835754, - "266": 0.8615092635154724, - "267": 0.8987654447555542, - "268": 0.9400328397750854, - "269": 0.889293372631073 - }, - "loss": { - "216": 2.5780444145202637, - "217": 2.5299715995788574, - "218": 2.5789334774017334, - "219": 2.5701842308044434, - "220": 2.604111671447754, - "221": 2.527941942214966, - "222": 2.5688040256500244, - "223": 2.5431976318359375, - "224": 2.6267378330230713, - "225": 2.5190181732177734, - "226": 2.551858901977539, - "227": 2.545146942138672, - "228": 2.585883140563965, - "229": 2.5538382530212402, - "230": 2.578336238861084, - "231": 2.572866439819336, - "232": 2.5440096855163574, - "233": 2.5524444580078125, - "234": 2.5869879722595215, - "235": 2.55580997467041, - "236": 2.6068756580352783, - "237": 2.57442307472229, - "238": 2.5645904541015625, - "239": 2.582531452178955, - "240": 2.5895957946777344, - "241": 2.5937376022338867, - "242": 2.5293283462524414, - "243": 2.5690677165985107, - "244": 2.528202533721924, - "245": 2.5501508712768555, - "246": 2.509793758392334, - "247": 2.5440261363983154, - "248": 2.550060272216797, - "249": 2.557403564453125, - "250": 2.5279016494750977, - "251": 2.538327693939209, - "252": 2.553427219390869, - "253": 2.545814037322998, - "254": 2.527787685394287, - "255": 2.5648326873779297, - "256": 2.501943826675415, - "257": 2.509753465652466, - "258": 2.517652988433838, - "259": 2.5426149368286133, - "260": 2.5298633575439453, - "261": 2.549063205718994, - "262": 2.505011558532715, - "263": 2.5539746284484863, - "264": 2.563380241394043, - "265": 2.5415501594543457, - "266": 2.5311694145202637, - "267": 2.549185276031494, - "268": 2.542485237121582, - "269": 2.560265064239502 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "step_size_list": [ - 0.441413, - 0.434206, - 0.435949, - 0.434274, - 0.437669, - 0.443473, - 0.442954, - 0.430382, - 0.434969, - 0.443126, - 0.441324, - 0.434647, - 0.434123, - 0.432575, - 0.43052, - 0.434522, - 0.436405, - 0.433709, - 0.432965, - 0.430595, - 0.428038, - 0.431508, - 0.428092, - 0.426705, - 0.423656, - 0.433513, - 0.436564, - 0.436308, - 0.437799, - 0.440918, - 0.440794, - 0.44034, - 0.437076, - 0.437968, - 0.443165, - 0.442611, - 0.440123, - 0.439414, - 0.433032, - 0.43842, - 0.448536, - 0.446881, - 0.438971, - 0.436892, - 0.437155, - 0.439564, - 0.441609, - 0.434028, - 0.434863, - 0.437054, - 0.434446, - 0.432225, - 0.429378, - 0.432972 - ], - "train_epoch_time": 4.841597557067871, - "train_loss": 2.5323087305044343, - "train_score": 0.2518965207863401, - "val_loss": 2.5780961787248997, - "val_score": 0.24644804832308492 - }, - { - "epoch": 5, - "grad_norm": 0.9988219738006592, - "learning_rate": 0.464, - "model_norm": 87.45085144042969, - "step_logs": { - "grad_norm": { - "270": 0.8288596272468567, - "271": 0.7934896349906921, - "272": 0.9915941953659058, - "273": 0.9043965935707092, - "274": 0.7631224989891052, - "275": 0.7688599228858948, - "276": 0.8321759700775146, - "277": 0.8154940605163574, - "278": 0.7812998294830322, - "279": 0.7776891589164734, - "280": 0.7468566298484802, - "281": 0.7364938259124756, - "282": 0.7987938523292542, - "283": 0.7861807942390442, - "284": 0.8148996233940125, - "285": 0.9629576206207275, - "286": 0.9761831760406494, - "287": 0.8699951171875, - "288": 0.7334563136100769, - "289": 0.7432525157928467, - "290": 0.7942057847976685, - "291": 0.7675772905349731, - "292": 0.7070748209953308, - "293": 0.7475346922874451, - "294": 0.7945957779884338, - "295": 0.8732880353927612, - "296": 0.9207392930984497, - "297": 0.9889922142028809, - "298": 0.8606439828872681, - "299": 0.7848327159881592, - "300": 0.7060503959655762, - "301": 0.7030293345451355, - "302": 0.7826439738273621, - "303": 1.0410118103027344, - "304": 0.8113794922828674, - "305": 0.7012067437171936, - "306": 0.8564667701721191, - "307": 1.0028573274612427, - "308": 1.307279348373413, - "309": 0.8510302305221558, - "310": 0.825967013835907, - "311": 0.8582208156585693, - "312": 0.9986169934272766, - "313": 0.9275185465812683, - "314": 1.0599502325057983, - "315": 0.8711197376251221, - "316": 0.7934519648551941, - "317": 0.8633790016174316, - "318": 0.8468979597091675, - "319": 0.9627843499183655, - "320": 0.8951519131660461, - "321": 0.772579550743103, - "322": 0.9886743426322937, - "323": 0.9988219738006592 - }, - "loss": { - "270": 2.534425735473633, - "271": 2.538745403289795, - "272": 2.5303900241851807, - "273": 2.5948383808135986, - "274": 2.5028274059295654, - "275": 2.516233205795288, - "276": 2.5069284439086914, - "277": 2.554931163787842, - "278": 2.4920616149902344, - "279": 2.54748272895813, - "280": 2.495222568511963, - "281": 2.5200893878936768, - "282": 2.5054917335510254, - "283": 2.554323196411133, - "284": 2.4950976371765137, - "285": 2.5486481189727783, - "286": 2.5618343353271484, - "287": 2.5696475505828857, - "288": 2.506333351135254, - "289": 2.505603790283203, - "290": 2.510037422180176, - "291": 2.5071001052856445, - "292": 2.482905626296997, - "293": 2.4872779846191406, - "294": 2.506176471710205, - "295": 2.492616653442383, - "296": 2.5271568298339844, - "297": 2.518397808074951, - "298": 2.532978057861328, - "299": 2.4740850925445557, - "300": 2.4835891723632812, - "301": 2.4555060863494873, - "302": 2.495685338973999, - "303": 2.461479902267456, - "304": 2.5400919914245605, - "305": 2.476271152496338, - "306": 2.484402656555176, - "307": 2.465885639190674, - "308": 2.5619664192199707, - "309": 2.5466525554656982, - "310": 2.5172061920166016, - "311": 2.4887819290161133, - "312": 2.4919095039367676, - "313": 2.487433910369873, - "314": 2.4931745529174805, - "315": 2.4929707050323486, - "316": 2.437417507171631, - "317": 2.458775043487549, - "318": 2.454230546951294, - "319": 2.427515983581543, - "320": 2.5097384452819824, - "321": 2.421292304992676, - "322": 2.4354043006896973, - "323": 2.4980649948120117 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "step_size_list": [ - 0.436546, - 0.438755, - 0.425629, - 0.43238, - 0.440235, - 0.440017, - 0.436054, - 0.437576, - 0.43905, - 0.439777, - 0.441122, - 0.441932, - 0.438115, - 0.439337, - 0.437016, - 0.427883, - 0.427139, - 0.43432, - 0.44199, - 0.441421, - 0.438439, - 0.44001, - 0.443292, - 0.441013, - 0.438378, - 0.433247, - 0.430496, - 0.425647, - 0.434521, - 0.438663, - 0.443354, - 0.443299, - 0.439003, - 0.420999, - 0.437682, - 0.443567, - 0.434254, - 0.423891, - 0.401816, - 0.43528, - 0.436551, - 0.434189, - 0.42458, - 0.429535, - 0.420082, - 0.433394, - 0.437767, - 0.433509, - 0.434538, - 0.426239, - 0.432001, - 0.438899, - 0.424475, - 0.424654 - ], - "train_epoch_time": 4.841272354125977, - "train_loss": 2.4665567812652807, - "train_score": 0.25491503755023526, - "val_loss": 2.514818378211984, - "val_score": 0.24674852926339677 - }, - { - "epoch": 6, - "grad_norm": 1.0370677709579468, - "learning_rate": 0.464, - "model_norm": 87.49996948242188, - "step_logs": { - "grad_norm": { - "324": 0.9800245761871338, - "325": 0.8283821940422058, - "326": 0.7775773406028748, - "327": 0.9417735934257507, - "328": 0.9661932587623596, - "329": 0.9594317674636841, - "330": 0.7793853878974915, - "331": 0.7245911359786987, - "332": 0.7517696619033813, - "333": 1.074756383895874, - "334": 0.9611711502075195, - "335": 0.6978328227996826, - "336": 0.6949235200881958, - "337": 1.037560224533081, - "338": 0.7125256657600403, - "339": 0.6865198016166687, - "340": 0.8098397254943848, - "341": 1.0357433557510376, - "342": 1.1265802383422852, - "343": 1.0206893682479858, - "344": 1.0810078382492065, - "345": 1.1776796579360962, - "346": 0.9603952169418335, - "347": 0.9163500070571899, - "348": 0.8210528492927551, - "349": 0.8515852689743042, - "350": 0.7847344875335693, - "351": 0.7387920022010803, - "352": 0.7622573375701904, - "353": 0.800679087638855, - "354": 0.88532954454422, - "355": 0.9865078926086426, - "356": 0.8688163161277771, - "357": 0.7257600426673889, - "358": 0.7686408162117004, - "359": 0.9300809502601624, - "360": 0.9252261519432068, - "361": 0.9970901608467102, - "362": 1.9082632064819336, - "363": 0.6677086353302002, - "364": 0.6691631078720093, - "365": 0.8937729597091675, - "366": 1.0749813318252563, - "367": 1.1392680406570435, - "368": 1.0505801439285278, - "369": 0.7789050340652466, - "370": 0.8209730386734009, - "371": 0.954608678817749, - "372": 1.3039089441299438, - "373": 1.0377908945083618, - "374": 1.0113333463668823, - "375": 1.0593420267105103, - "376": 0.9562830924987793, - "377": 1.0370677709579468 - }, - "loss": { - "324": 2.4723334312438965, - "325": 2.4934020042419434, - "326": 2.4199047088623047, - "327": 2.4279255867004395, - "328": 2.481714963912964, - "329": 2.4320318698883057, - "330": 2.4659900665283203, - "331": 2.3892579078674316, - "332": 2.4013938903808594, - "333": 2.403709650039673, - "334": 2.476989269256592, - "335": 2.420353889465332, - "336": 2.3951759338378906, - "337": 2.396409749984741, - "338": 2.4970617294311523, - "339": 2.4162540435791016, - "340": 2.39434814453125, - "341": 2.4294943809509277, - "342": 2.464895725250244, - "343": 2.429109811782837, - "344": 2.44795560836792, - "345": 2.437321186065674, - "346": 2.442086696624756, - "347": 2.3903326988220215, - "348": 2.4357619285583496, - "349": 2.3904590606689453, - "350": 2.4162936210632324, - "351": 2.374342441558838, - "352": 2.397109270095825, - "353": 2.3708555698394775, - "354": 2.4121124744415283, - "355": 2.407874584197998, - "356": 2.4555654525756836, - "357": 2.377549171447754, - "358": 2.380791187286377, - "359": 2.3692097663879395, - "360": 2.393126964569092, - "361": 2.386563301086426, - "362": 2.4485650062561035, - "363": 2.4321084022521973, - "364": 2.3986318111419678, - "365": 2.432197332382202, - "366": 2.448928117752075, - "367": 2.4770030975341797, - "368": 2.4764983654022217, - "369": 2.415029287338257, - "370": 2.39186954498291, - "371": 2.3887853622436523, - "372": 2.417573928833008, - "373": 2.451000928878784, - "374": 2.41477632522583, - "375": 2.3854353427886963, - "376": 2.432030200958252, - "377": 2.398467540740967 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "step_size_list": [ - 0.425638, - 0.436152, - 0.438577, - 0.427748, - 0.426757, - 0.426545, - 0.438917, - 0.441492, - 0.439977, - 0.417459, - 0.427048, - 0.443307, - 0.443266, - 0.420206, - 0.443099, - 0.443911, - 0.436276, - 0.420884, - 0.414486, - 0.42201, - 0.417736, - 0.409888, - 0.426618, - 0.429034, - 0.436005, - 0.43349, - 0.438097, - 0.440507, - 0.439296, - 0.43661, - 0.431472, - 0.424222, - 0.433112, - 0.441317, - 0.438741, - 0.427765, - 0.428444, - 0.423108, - 0.344975, - 0.445072, - 0.444738, - 0.431147, - 0.418216, - 0.413707, - 0.420519, - 0.438446, - 0.435528, - 0.426273, - 0.398915, - 0.421074, - 0.422484, - 0.418341, - 0.426771, - 0.420278 - ], - "train_epoch_time": 4.8418288230896, - "train_loss": 2.4159175653197673, - "train_score": 0.2785699874718528, - "val_loss": 2.45024912584526, - "val_score": 0.2707466270975623 - }, - { - "epoch": 7, - "grad_norm": 0.7485067844390869, - "learning_rate": 0.464, - "model_norm": 87.55538940429688, - "step_logs": { - "grad_norm": { - "378": 1.0064396858215332, - "379": 0.8539838194847107, - "380": 0.8043100237846375, - "381": 0.8470326066017151, - "382": 0.9387885928153992, - "383": 0.8522586822509766, - "384": 0.8815553784370422, - "385": 1.0263776779174805, - "386": 0.9056224822998047, - "387": 0.7453776597976685, - "388": 0.7296508550643921, - "389": 0.7386884689331055, - "390": 0.805571436882019, - "391": 0.9203474521636963, - "392": 1.0028228759765625, - "393": 1.0470596551895142, - "394": 0.9864926338195801, - "395": 0.9148935675621033, - "396": 0.8554189801216125, - "397": 0.8848603367805481, - "398": 1.2553482055664062, - "399": 0.6481537818908691, - "400": 0.6834568381309509, - "401": 0.8915372490882874, - "402": 0.9963279366493225, - "403": 1.0326377153396606, - "404": 0.892877995967865, - "405": 0.7649993896484375, - "406": 0.7975972294807434, - "407": 0.9212661385536194, - "408": 0.8549370169639587, - "409": 0.8188075423240662, - "410": 0.8381356596946716, - "411": 0.825498104095459, - "412": 0.8052555918693542, - "413": 0.8285270929336548, - "414": 0.8110234141349792, - "415": 0.8597236275672913, - "416": 0.8731857538223267, - "417": 0.8154451251029968, - "418": 0.7808809280395508, - "419": 0.7859033942222595, - "420": 0.8809411525726318, - "421": 0.9184620380401611, - "422": 0.9998373985290527, - "423": 0.887656033039093, - "424": 0.8951314091682434, - "425": 0.9491535425186157, - "426": 0.8798481822013855, - "427": 0.8304247856140137, - "428": 0.8761394023895264, - "429": 0.8243850469589233, - "430": 0.7738993167877197, - "431": 0.7485067844390869 - }, - "loss": { - "378": 2.4386940002441406, - "379": 2.374080181121826, - "380": 2.3617091178894043, - "381": 2.342466115951538, - "382": 2.3666210174560547, - "383": 2.37374210357666, - "384": 2.3594861030578613, - "385": 2.3664932250976562, - "386": 2.384056568145752, - "387": 2.3169920444488525, - "388": 2.2913923263549805, - "389": 2.3237414360046387, - "390": 2.3744289875030518, - "391": 2.3328261375427246, - "392": 2.36293888092041, - "393": 2.3453316688537598, - "394": 2.382493495941162, - "395": 2.3690600395202637, - "396": 2.3495705127716064, - "397": 2.355351686477661, - "398": 2.3587958812713623, - "399": 2.383967399597168, - "400": 2.3315072059631348, - "401": 2.342471122741699, - "402": 2.3810830116271973, - "403": 2.379814386367798, - "404": 2.3917574882507324, - "405": 2.304711103439331, - "406": 2.324188232421875, - "407": 2.3327572345733643, - "408": 2.3713417053222656, - "409": 2.2914185523986816, - "410": 2.3370871543884277, - "411": 2.310056209564209, - "412": 2.3397903442382812, - "413": 2.302206039428711, - "414": 2.331683874130249, - "415": 2.313957691192627, - "416": 2.3480639457702637, - "417": 2.3266613483428955, - "418": 2.2904434204101562, - "419": 2.3172760009765625, - "420": 2.3151981830596924, - "421": 2.3076066970825195, - "422": 2.3341920375823975, - "423": 2.3350610733032227, - "424": 2.3181991577148438, - "425": 2.3311357498168945, - "426": 2.3104472160339355, - "427": 2.308957099914551, - "428": 2.308932065963745, - "429": 2.3289713859558105, - "430": 2.302090883255005, - "431": 2.2854814529418945 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "step_size_list": [ - 0.423218, - 0.433132, - 0.436275, - 0.433216, - 0.4271, - 0.433244, - 0.431061, - 0.420566, - 0.429705, - 0.439548, - 0.440268, - 0.440028, - 0.436333, - 0.42795, - 0.422303, - 0.418603, - 0.423836, - 0.428848, - 0.432734, - 0.430777, - 0.401732, - 0.445775, - 0.443391, - 0.430139, - 0.42308, - 0.420307, - 0.430694, - 0.438186, - 0.436295, - 0.427883, - 0.433034, - 0.434505, - 0.433753, - 0.434279, - 0.435969, - 0.433979, - 0.435498, - 0.431987, - 0.431494, - 0.435148, - 0.437008, - 0.436979, - 0.43052, - 0.427725, - 0.422064, - 0.430313, - 0.429555, - 0.425821, - 0.430533, - 0.433933, - 0.430774, - 0.434579, - 0.437588, - 0.439031 - ], - "train_epoch_time": 4.842438220977783, - "train_loss": 2.279833248595425, - "train_score": 0.32523090032970203, - "val_loss": 2.333723345525503, - "val_score": 0.3123923646869944 - }, - { - "epoch": 8, - "grad_norm": 0.8299069404602051, - "learning_rate": 0.464, - "model_norm": 87.61180114746094, - "step_logs": { - "grad_norm": { - "432": 0.7126715779304504, - "433": 0.8567155599594116, - "434": 0.9202989935874939, - "435": 0.9785818457603455, - "436": 0.9496986269950867, - "437": 0.9030150175094604, - "438": 1.1402196884155273, - "439": 0.7947887778282166, - "440": 0.787836492061615, - "441": 0.9827735424041748, - "442": 0.924113929271698, - "443": 0.9095634818077087, - "444": 1.1255983114242554, - "445": 1.1245964765548706, - "446": 0.9567276239395142, - "447": 1.1173712015151978, - "448": 0.9507923722267151, - "449": 0.801628053188324, - "450": 0.7259226441383362, - "451": 0.812200665473938, - "452": 0.9129747152328491, - "453": 0.8578234910964966, - "454": 0.7506998181343079, - "455": 0.7694582939147949, - "456": 0.8032817840576172, - "457": 0.8745932579040527, - "458": 0.8218798637390137, - "459": 0.7167028784751892, - "460": 0.7230066061019897, - "461": 0.780096173286438, - "462": 0.742834210395813, - "463": 0.8151125311851501, - "464": 0.9034141302108765, - "465": 0.8522658348083496, - "466": 0.8728897571563721, - "467": 0.8497021794319153, - "468": 0.7926927804946899, - "469": 0.8608328700065613, - "470": 0.8690261840820312, - "471": 1.0343111753463745, - "472": 0.9771400690078735, - "473": 0.8763394951820374, - "474": 0.8738444447517395, - "475": 0.902094304561615, - "476": 0.9636691808700562, - "477": 1.0210340023040771, - "478": 0.9713259339332581, - "479": 1.132158875465393, - "480": 0.8729812502861023, - "481": 0.7968984246253967, - "482": 0.7133024334907532, - "483": 0.6922010183334351, - "484": 0.7628591060638428, - "485": 0.8299069404602051 - }, - "loss": { - "432": 2.3060684204101562, - "433": 2.276191234588623, - "434": 2.3359580039978027, - "435": 2.3350090980529785, - "436": 2.3345181941986084, - "437": 2.312978744506836, - "438": 2.2810492515563965, - "439": 2.3284752368927, - "440": 2.285083293914795, - "441": 2.281200885772705, - "442": 2.3516182899475098, - "443": 2.284452199935913, - "444": 2.3611257076263428, - "445": 2.3429629802703857, - "446": 2.3390960693359375, - "447": 2.313493251800537, - "448": 2.366291046142578, - "449": 2.295682430267334, - "450": 2.271575450897217, - "451": 2.2555222511291504, - "452": 2.3030929565429688, - "453": 2.2810072898864746, - "454": 2.2763078212738037, - "455": 2.27951717376709, - "456": 2.2690677642822266, - "457": 2.291797637939453, - "458": 2.2770628929138184, - "459": 2.255760669708252, - "460": 2.235276699066162, - "461": 2.276254653930664, - "462": 2.2803478240966797, - "463": 2.2522382736206055, - "464": 2.285918712615967, - "465": 2.2736093997955322, - "466": 2.265974998474121, - "467": 2.2974672317504883, - "468": 2.276423454284668, - "469": 2.271206855773926, - "470": 2.3198113441467285, - "471": 2.278186798095703, - "472": 2.319728374481201, - "473": 2.2563881874084473, - "474": 2.278583288192749, - "475": 2.288527488708496, - "476": 2.2797842025756836, - "477": 2.272386074066162, - "478": 2.3173093795776367, - "479": 2.2954373359680176, - "480": 2.322462320327759, - "481": 2.232104778289795, - "482": 2.26352596282959, - "483": 2.218979597091675, - "484": 2.2360379695892334, - "485": 2.2544596195220947 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "step_size_list": [ - 0.441444, - 0.431705, - 0.427998, - 0.423688, - 0.425832, - 0.428918, - 0.409811, - 0.436526, - 0.436493, - 0.422499, - 0.427945, - 0.428037, - 0.412631, - 0.412359, - 0.425382, - 0.41237, - 0.426223, - 0.435705, - 0.440303, - 0.434517, - 0.428058, - 0.431691, - 0.438797, - 0.437629, - 0.435282, - 0.430653, - 0.434123, - 0.440717, - 0.440121, - 0.436901, - 0.439336, - 0.434278, - 0.428506, - 0.431982, - 0.430423, - 0.43247, - 0.436074, - 0.431349, - 0.431416, - 0.418416, - 0.423554, - 0.430043, - 0.430527, - 0.428639, - 0.423936, - 0.419365, - 0.423955, - 0.410783, - 0.431175, - 0.43527, - 0.441002, - 0.441865, - 0.437579, - 0.43329 - ], - "train_epoch_time": 4.842189073562622, - "train_loss": 2.2583873221316675, - "train_score": 0.3253160867920223, - "val_loss": 2.321609194325251, - "val_score": 0.3118586760574586 - }, - { - "epoch": 9, - "grad_norm": 0.68621426820755, - "learning_rate": 0.464, - "model_norm": 87.66968536376953, - "step_logs": { - "grad_norm": { - "486": 0.7682287096977234, - "487": 0.7649548053741455, - "488": 0.9660560488700867, - "489": 0.9740469455718994, - "490": 0.8074227571487427, - "491": 0.7553938031196594, - "492": 0.8166458010673523, - "493": 0.840368390083313, - "494": 0.8939388990402222, - "495": 0.8838664889335632, - "496": 0.7769646644592285, - "497": 0.7230124473571777, - "498": 0.7600592970848083, - "499": 0.7785629034042358, - "500": 0.7993154525756836, - "501": 0.802345335483551, - "502": 0.8185387253761292, - "503": 0.8377872705459595, - "504": 0.791024386882782, - "505": 0.8677616715431213, - "506": 0.7530810832977295, - "507": 0.7036998867988586, - "508": 0.8211548328399658, - "509": 0.9169280529022217, - "510": 0.886559784412384, - "511": 0.7795705795288086, - "512": 0.7227236032485962, - "513": 0.74482661485672, - "514": 0.8771432638168335, - "515": 0.8894820809364319, - "516": 0.8720706105232239, - "517": 0.9437795281410217, - "518": 0.8314654231071472, - "519": 0.774798572063446, - "520": 0.871120035648346, - "521": 0.8756012916564941, - "522": 0.8168379068374634, - "523": 0.8266562819480896, - "524": 0.8706150650978088, - "525": 0.8751426339149475, - "526": 1.0410782098770142, - "527": 0.9852874875068665, - "528": 0.8789790272712708, - "529": 0.8169657588005066, - "530": 0.7922195196151733, - "531": 0.8600579500198364, - "532": 0.8866491913795471, - "533": 0.8692024350166321, - "534": 0.8261463642120361, - "535": 0.8096601963043213, - "536": 0.8385648131370544, - "537": 0.7517908215522766, - "538": 0.6367648243904114, - "539": 0.68621426820755 - }, - "loss": { - "486": 2.2780823707580566, - "487": 2.2468371391296387, - "488": 2.2590010166168213, - "489": 2.281876802444458, - "490": 2.2365760803222656, - "491": 2.248771905899048, - "492": 2.2549967765808105, - "493": 2.2779555320739746, - "494": 2.2223997116088867, - "495": 2.2605366706848145, - "496": 2.2568793296813965, - "497": 2.226069450378418, - "498": 2.236762046813965, - "499": 2.2230029106140137, - "500": 2.2332019805908203, - "501": 2.2457990646362305, - "502": 2.2245359420776367, - "503": 2.2295455932617188, - "504": 2.2105393409729004, - "505": 2.2362282276153564, - "506": 2.2436485290527344, - "507": 2.1828665733337402, - "508": 2.2447609901428223, - "509": 2.2604458332061768, - "510": 2.238154172897339, - "511": 2.2404041290283203, - "512": 2.2351233959198, - "513": 2.2271037101745605, - "514": 2.2268412113189697, - "515": 2.2441372871398926, - "516": 2.2347819805145264, - "517": 2.246114492416382, - "518": 2.2599329948425293, - "519": 2.2221996784210205, - "520": 2.2236242294311523, - "521": 2.2674546241760254, - "522": 2.2368814945220947, - "523": 2.2382960319519043, - "524": 2.2562508583068848, - "525": 2.2370681762695312, - "526": 2.255112648010254, - "527": 2.2622761726379395, - "528": 2.208037853240967, - "529": 2.2299203872680664, - "530": 2.2126784324645996, - "531": 2.2205216884613037, - "532": 2.234477996826172, - "533": 2.234245777130127, - "534": 2.235602617263794, - "535": 2.2378177642822266, - "536": 2.2401041984558105, - "537": 2.212348461151123, - "538": 2.173534393310547, - "539": 2.2230114936828613 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "step_size_list": [ - 0.437693, - 0.437562, - 0.423417, - 0.423179, - 0.43461, - 0.438203, - 0.434207, - 0.432866, - 0.428273, - 0.429559, - 0.436889, - 0.440027, - 0.437769, - 0.436393, - 0.435119, - 0.435067, - 0.433695, - 0.432418, - 0.435407, - 0.430378, - 0.438297, - 0.440801, - 0.433771, - 0.427142, - 0.429044, - 0.436528, - 0.440137, - 0.43865, - 0.429567, - 0.428918, - 0.430047, - 0.424908, - 0.433252, - 0.436635, - 0.429958, - 0.430249, - 0.433969, - 0.433309, - 0.430451, - 0.429858, - 0.417453, - 0.421988, - 0.429161, - 0.433872, - 0.435352, - 0.430713, - 0.428985, - 0.430247, - 0.433309, - 0.434472, - 0.432502, - 0.438038, - 0.444751, - 0.442266 - ], - "train_epoch_time": 4.842597723007202, - "train_loss": 2.2036793600709057, - "train_score": 0.3462719690337927, - "val_loss": 2.2865703012858414, - "val_score": 0.32615617796966867 - }, - { - "epoch": 10, - "grad_norm": 0.9096331000328064, - "learning_rate": 0.464, - "model_norm": 87.72895812988281, - "step_logs": { - "grad_norm": { - "540": 0.8921065330505371, - "541": 0.8062778115272522, - "542": 0.7187851667404175, - "543": 0.8345754146575928, - "544": 0.9551219344139099, - "545": 1.0174036026000977, - "546": 0.868165910243988, - "547": 0.7207449078559875, - "548": 0.7266085147857666, - "549": 0.6963417530059814, - "550": 0.6848155856132507, - "551": 0.797979474067688, - "552": 1.1448265314102173, - "553": 0.9235154986381531, - "554": 0.7863271832466125, - "555": 0.7994105815887451, - "556": 0.8655866384506226, - "557": 0.8106824159622192, - "558": 0.7803632020950317, - "559": 0.8532289266586304, - "560": 0.8216025829315186, - "561": 0.7853909134864807, - "562": 0.7817880511283875, - "563": 0.7839528322219849, - "564": 0.8452964425086975, - "565": 0.8417201638221741, - "566": 0.8234250545501709, - "567": 0.8137179017066956, - "568": 0.8520249128341675, - "569": 0.8314141035079956, - "570": 0.8129189014434814, - "571": 0.7779190540313721, - "572": 0.7898862957954407, - "573": 0.8024963140487671, - "574": 0.7748833298683167, - "575": 0.7861606478691101, - "576": 0.8677897453308105, - "577": 0.8517161011695862, - "578": 0.7643398642539978, - "579": 0.7023939490318298, - "580": 0.7665775418281555, - "581": 0.8610114455223083, - "582": 0.8333024382591248, - "583": 0.7674388289451599, - "584": 0.7553243041038513, - "585": 0.7182978987693787, - "586": 0.7470502853393555, - "587": 0.8406779766082764, - "588": 0.9378892779350281, - "589": 0.9103984832763672, - "590": 0.8171938061714172, - "591": 0.9231045842170715, - "592": 1.0083247423171997, - "593": 0.9096331000328064 - }, - "loss": { - "540": 2.215850830078125, - "541": 2.236928701400757, - "542": 2.1841204166412354, - "543": 2.200261116027832, - "544": 2.235758066177368, - "545": 2.247260093688965, - "546": 2.2625303268432617, - "547": 2.1999077796936035, - "548": 2.1832923889160156, - "549": 2.182635545730591, - "550": 2.1730289459228516, - "551": 2.1816012859344482, - "552": 2.22847843170166, - "553": 2.2593977451324463, - "554": 2.1985721588134766, - "555": 2.213987350463867, - "556": 2.2262539863586426, - "557": 2.20068359375, - "558": 2.1766653060913086, - "559": 2.1952016353607178, - "560": 2.213383913040161, - "561": 2.168879508972168, - "562": 2.222299814224243, - "563": 2.2107787132263184, - "564": 2.206465482711792, - "565": 2.192739963531494, - "566": 2.220984697341919, - "567": 2.1941514015197754, - "568": 2.1865310668945312, - "569": 2.213221549987793, - "570": 2.2188873291015625, - "571": 2.20096492767334, - "572": 2.185303211212158, - "573": 2.1692090034484863, - "574": 2.1808671951293945, - "575": 2.18877911567688, - "576": 2.164050579071045, - "577": 2.194611072540283, - "578": 2.1801581382751465, - "579": 2.1817259788513184, - "580": 2.1732735633850098, - "581": 2.1978366374969482, - "582": 2.1710891723632812, - "583": 2.1611733436584473, - "584": 2.1839187145233154, - "585": 2.165365219116211, - "586": 2.172858238220215, - "587": 2.2187869548797607, - "588": 2.2081055641174316, - "589": 2.2036852836608887, - "590": 2.1941847801208496, - "591": 2.179983139038086, - "592": 2.2141075134277344, - "593": 2.193830728530884 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "step_size_list": [ - 0.428311, - 0.434692, - 0.439861, - 0.432254, - 0.423875, - 0.419203, - 0.430712, - 0.439901, - 0.439352, - 0.441257, - 0.441876, - 0.434572, - 0.408291, - 0.426637, - 0.43558, - 0.434878, - 0.430395, - 0.433935, - 0.435719, - 0.430851, - 0.433339, - 0.435279, - 0.43617, - 0.435888, - 0.431576, - 0.431643, - 0.43331, - 0.43364, - 0.430816, - 0.43265, - 0.434012, - 0.436177, - 0.435175, - 0.434101, - 0.436141, - 0.435472, - 0.429338, - 0.430952, - 0.436842, - 0.440871, - 0.436611, - 0.430325, - 0.431949, - 0.436408, - 0.437486, - 0.439694, - 0.437906, - 0.432071, - 0.424745, - 0.426762, - 0.433398, - 0.425421, - 0.419327, - 0.426666 - ], - "train_epoch_time": 4.842779159545898, - "train_loss": 2.188983239402388, - "train_score": 0.3554407281034133, - "val_loss": 2.275259516953601, - "val_score": 0.3314302889405655 - }, - { - "epoch": 11, - "grad_norm": 0.8140822052955627, - "learning_rate": 0.464, - "model_norm": 87.79326629638672, - "step_logs": { - "grad_norm": { - "594": 0.8143988251686096, - "595": 0.7691192626953125, - "596": 0.7511328458786011, - "597": 0.7437258362770081, - "598": 0.8332107663154602, - "599": 0.9590739011764526, - "600": 0.9414939880371094, - "601": 0.9596734046936035, - "602": 0.9427944421768188, - "603": 0.8789117336273193, - "604": 0.7952477335929871, - "605": 0.767628014087677, - "606": 0.7885223627090454, - "607": 0.7696501612663269, - "608": 0.8021920919418335, - "609": 0.7617115378379822, - "610": 0.7050363421440125, - "611": 0.6947162747383118, - "612": 0.7675658464431763, - "613": 0.9178090691566467, - "614": 0.8634825348854065, - "615": 0.7828942537307739, - "616": 0.7477189898490906, - "617": 0.8095752000808716, - "618": 0.7939623594284058, - "619": 0.7833624482154846, - "620": 0.810734212398529, - "621": 0.8283319473266602, - "622": 0.9187922477722168, - "623": 0.874248206615448, - "624": 0.9026694297790527, - "625": 1.0013917684555054, - "626": 0.8714126348495483, - "627": 0.7520427107810974, - "628": 0.795939564704895, - "629": 0.9054486155509949, - "630": 0.9085151553153992, - "631": 0.8748542666435242, - "632": 0.9008886218070984, - "633": 0.9165480732917786, - "634": 1.0872836112976074, - "635": 1.1195107698440552, - "636": 1.0432250499725342, - "637": 0.8284159898757935, - "638": 0.8854562640190125, - "639": 0.9075348973274231, - "640": 0.8460088968276978, - "641": 0.7267642617225647, - "642": 0.7285876870155334, - "643": 0.7849075198173523, - "644": 0.8879084587097168, - "645": 0.9245346784591675, - "646": 0.8939499258995056, - "647": 0.8140822052955627 - }, - "loss": { - "594": 2.1735947132110596, - "595": 2.1577436923980713, - "596": 2.18422794342041, - "597": 2.1580567359924316, - "598": 2.178335189819336, - "599": 2.138554096221924, - "600": 2.2060482501983643, - "601": 2.1847915649414062, - "602": 2.20259952545166, - "603": 2.1774821281433105, - "604": 2.175421714782715, - "605": 2.175705909729004, - "606": 2.178267478942871, - "607": 2.1804356575012207, - "608": 2.171926259994507, - "609": 2.132654905319214, - "610": 2.132364273071289, - "611": 2.1559953689575195, - "612": 2.1362977027893066, - "613": 2.184828996658325, - "614": 2.176003932952881, - "615": 2.1745426654815674, - "616": 2.186737060546875, - "617": 2.146594524383545, - "618": 2.184572219848633, - "619": 2.143556833267212, - "620": 2.152655601501465, - "621": 2.1449923515319824, - "622": 2.170114755630493, - "623": 2.163323402404785, - "624": 2.193357229232788, - "625": 2.1667516231536865, - "626": 2.188030958175659, - "627": 2.1220176219940186, - "628": 2.1484503746032715, - "629": 2.1130330562591553, - "630": 2.171513319015503, - "631": 2.1274163722991943, - "632": 2.1388068199157715, - "633": 2.182231903076172, - "634": 2.1721105575561523, - "635": 2.195622444152832, - "636": 2.188385009765625, - "637": 2.1716933250427246, - "638": 2.168369770050049, - "639": 2.15797758102417, - "640": 2.1595335006713867, - "641": 2.1329140663146973, - "642": 2.1231374740600586, - "643": 2.1575169563293457, - "644": 2.146681070327759, - "645": 2.1851820945739746, - "646": 2.167185068130493, - "647": 2.164590358734131 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "step_size_list": [ - 0.433324, - 0.436253, - 0.437766, - 0.437957, - 0.432054, - 0.4219, - 0.424434, - 0.422665, - 0.424277, - 0.428715, - 0.434683, - 0.436569, - 0.435181, - 0.436489, - 0.434157, - 0.436452, - 0.440194, - 0.441092, - 0.436098, - 0.425903, - 0.429831, - 0.43552, - 0.438019, - 0.433306, - 0.434886, - 0.435102, - 0.433305, - 0.431945, - 0.425591, - 0.428849, - 0.427183, - 0.41901, - 0.429424, - 0.43698, - 0.43429, - 0.425683, - 0.426398, - 0.428255, - 0.426457, - 0.425958, - 0.41198, - 0.409738, - 0.416003, - 0.432306, - 0.428089, - 0.426257, - 0.43087, - 0.438791, - 0.438561, - 0.435171, - 0.42757, - 0.425395, - 0.427433, - 0.433227 - ], - "train_epoch_time": 4.844959735870361, - "train_loss": 2.1331555023768027, - "train_score": 0.36913782276074203, - "val_loss": 2.2340438579445454, - "val_score": 0.343391217265693 - }, - { - "epoch": 12, - "grad_norm": 0.6578726768493652, - "learning_rate": 0.464, - "model_norm": 87.8521728515625, - "step_logs": { - "grad_norm": { - "648": 0.7942488789558411, - "649": 0.8315700888633728, - "650": 0.8246243596076965, - "651": 0.8345829248428345, - "652": 0.84620600938797, - "653": 0.8950802087783813, - "654": 0.8761196732521057, - "655": 0.760361909866333, - "656": 0.713737964630127, - "657": 0.6862717866897583, - "658": 0.7494129538536072, - "659": 0.7932485342025757, - "660": 0.8148427605628967, - "661": 0.8828828930854797, - "662": 0.8043906688690186, - "663": 0.7468248009681702, - "664": 0.7115612626075745, - "665": 0.7042707204818726, - "666": 0.7152650952339172, - "667": 0.6333206295967102, - "668": 0.6261699199676514, - "669": 0.7369368672370911, - "670": 0.8285579681396484, - "671": 0.8881261348724365, - "672": 0.8377741575241089, - "673": 0.7106428742408752, - "674": 0.6459449529647827, - "675": 0.6162481904029846, - "676": 0.6551740169525146, - "677": 0.68690425157547, - "678": 0.6702134013175964, - "679": 0.6396130323410034, - "680": 0.6492288708686829, - "681": 0.6239981055259705, - "682": 0.6616118550300598, - "683": 0.6591967940330505, - "684": 0.6587465405464172, - "685": 0.6365826725959778, - "686": 0.5938937067985535, - "687": 0.5714289546012878, - "688": 0.6066100001335144, - "689": 0.5469507575035095, - "690": 0.5249015092849731, - "691": 0.5263426303863525, - "692": 0.5630566477775574, - "693": 0.6263416409492493, - "694": 0.6880822777748108, - "695": 0.6980996131896973, - "696": 0.7213163375854492, - "697": 0.7061492204666138, - "698": 0.6284188032150269, - "699": 0.662869393825531, - "700": 0.6534774303436279, - "701": 0.6578726768493652 - }, - "loss": { - "648": 2.1203856468200684, - "649": 2.1409246921539307, - "650": 2.1532139778137207, - "651": 2.1269664764404297, - "652": 2.1562998294830322, - "653": 2.141098976135254, - "654": 2.134981870651245, - "655": 2.119570255279541, - "656": 2.1185030937194824, - "657": 2.1137454509735107, - "658": 2.108039617538452, - "659": 2.1088359355926514, - "660": 2.1146793365478516, - "661": 2.1406309604644775, - "662": 2.1560144424438477, - "663": 2.1203246116638184, - "664": 2.110269069671631, - "665": 2.1034622192382812, - "666": 2.107478380203247, - "667": 2.075822353363037, - "668": 2.073240280151367, - "669": 2.109856128692627, - "670": 2.1094839572906494, - "671": 2.093839406967163, - "672": 2.1225485801696777, - "673": 2.1203927993774414, - "674": 2.0864572525024414, - "675": 2.0666136741638184, - "676": 2.106548309326172, - "677": 2.082718849182129, - "678": 2.086256980895996, - "679": 2.103593587875366, - "680": 2.080355167388916, - "681": 2.0590333938598633, - "682": 2.083422899246216, - "683": 2.099015712738037, - "684": 2.0764753818511963, - "685": 2.0707638263702393, - "686": 2.0588200092315674, - "687": 2.0492873191833496, - "688": 2.0794143676757812, - "689": 2.0440845489501953, - "690": 2.0369601249694824, - "691": 2.064953327178955, - "692": 2.0237653255462646, - "693": 2.0306992530822754, - "694": 2.0534825325012207, - "695": 2.049145221710205, - "696": 2.0784759521484375, - "697": 2.0491557121276855, - "698": 2.047065496444702, - "699": 2.035550594329834, - "700": 2.069148063659668, - "701": 2.0469658374786377 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "step_size_list": [ - 0.434042, - 0.429174, - 0.427347, - 0.423805, - 0.420915, - 0.414783, - 0.413594, - 0.418605, - 0.418873, - 0.417824, - 0.411494, - 0.406279, - 0.402483, - 0.395996, - 0.39855, - 0.398945, - 0.398197, - 0.395922, - 0.392781, - 0.39399, - 0.391654, - 0.383899, - 0.376426, - 0.370351, - 0.371013, - 0.374878, - 0.374928, - 0.373401, - 0.369359, - 0.365181, - 0.363288, - 0.362002, - 0.358811, - 0.357009, - 0.353021, - 0.350554, - 0.347774, - 0.345907, - 0.344675, - 0.342678, - 0.338952, - 0.337996, - 0.335896, - 0.333221, - 0.329259, - 0.324604, - 0.319991, - 0.316968, - 0.313698, - 0.311392, - 0.311162, - 0.307379, - 0.305141, - 0.302223 - ], - "train_epoch_time": 4.845282554626465, - "train_loss": 2.0502974032671584, - "train_score": 0.3936233411560442, - "val_loss": 2.1602321713991746, - "val_score": 0.3634292483329773 - }, - { - "epoch": 13, - "grad_norm": 0.4436917304992676, - "learning_rate": 0.3093333333333334, - "model_norm": 87.8878402709961, - "step_logs": { - "grad_norm": { - "702": 0.6699285507202148, - "703": 0.5805220007896423, - "704": 0.6074411869049072, - "705": 0.6303211450576782, - "706": 0.5985572338104248, - "707": 0.608280599117279, - "708": 0.6303935050964355, - "709": 0.6220338344573975, - "710": 0.6174399256706238, - "711": 0.5413736701011658, - "712": 0.5112146735191345, - "713": 0.4871719181537628, - "714": 0.4399351477622986, - "715": 0.44969791173934937, - "716": 0.4524420499801636, - "717": 0.46417248249053955, - "718": 0.48964205384254456, - "719": 0.5409447550773621, - "720": 0.5520158410072327, - "721": 0.5363030433654785, - "722": 0.5482470393180847, - "723": 0.5800043940544128, - "724": 0.546682596206665, - "725": 0.5169199705123901, - "726": 0.514824390411377, - "727": 0.4663446843624115, - "728": 0.45961812138557434, - "729": 0.45055440068244934, - "730": 0.46595969796180725, - "731": 0.46011409163475037, - "732": 0.44026613235473633, - "733": 0.43594837188720703, - "734": 0.4653131663799286, - "735": 0.4784716069698334, - "736": 0.42221033573150635, - "737": 0.4199584424495697, - "738": 0.43594419956207275, - "739": 0.4137166738510132, - "740": 0.42047253251075745, - "741": 0.4431529641151428, - "742": 0.4429382085800171, - "743": 0.4476552903652191, - "744": 0.4506986737251282, - "745": 0.41717639565467834, - "746": 0.44431763887405396, - "747": 0.4076555073261261, - "748": 0.43802496790885925, - "749": 0.4421563446521759, - "750": 0.44402697682380676, - "751": 0.42554566264152527, - "752": 0.40179625153541565, - "753": 0.404940128326416, - "754": 0.43356096744537354, - "755": 0.4436917304992676 - }, - "loss": { - "702": 2.0448737144470215, - "703": 2.046647787094116, - "704": 2.041076898574829, - "705": 2.0410609245300293, - "706": 2.0298800468444824, - "707": 2.032756805419922, - "708": 2.0318081378936768, - "709": 2.058128833770752, - "710": 2.0420947074890137, - "711": 2.0357046127319336, - "712": 2.0125179290771484, - "713": 2.0334067344665527, - "714": 2.0085813999176025, - "715": 2.0451345443725586, - "716": 1.9992128610610962, - "717": 1.9843368530273438, - "718": 2.0078001022338867, - "719": 2.046980619430542, - "720": 1.9874895811080933, - "721": 1.9991458654403687, - "722": 2.0271124839782715, - "723": 2.005398988723755, - "724": 2.0288848876953125, - "725": 2.0452847480773926, - "726": 2.0108683109283447, - "727": 2.010511875152588, - "728": 2.0094542503356934, - "729": 2.0190353393554688, - "730": 1.988690733909607, - "731": 2.029412269592285, - "732": 1.9948115348815918, - "733": 2.005605697631836, - "734": 2.004103899002075, - "735": 2.025423049926758, - "736": 1.9900341033935547, - "737": 1.9794466495513916, - "738": 2.007734775543213, - "739": 2.0177109241485596, - "740": 1.9895695447921753, - "741": 2.0379581451416016, - "742": 1.989262342453003, - "743": 1.984004259109497, - "744": 1.9994091987609863, - "745": 1.9834046363830566, - "746": 2.0179667472839355, - "747": 2.0069119930267334, - "748": 1.9827308654785156, - "749": 1.969693660736084, - "750": 2.015787363052368, - "751": 1.982442855834961, - "752": 2.0177993774414062, - "753": 2.00639009475708, - "754": 2.002668857574463, - "755": 2.0029540061950684 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "step_size_list": [ - 0.299177, - 0.298927, - 0.295496, - 0.292188, - 0.290247, - 0.287299, - 0.284033, - 0.281626, - 0.278962, - 0.277883, - 0.275667, - 0.273394, - 0.271368, - 0.268487, - 0.265574, - 0.262573, - 0.259425, - 0.255875, - 0.252782, - 0.250323, - 0.247426, - 0.244084, - 0.241932, - 0.239646, - 0.236837, - 0.234711, - 0.232, - 0.229325, - 0.226305, - 0.223632, - 0.221009, - 0.218263, - 0.215149, - 0.212238, - 0.209957, - 0.207157, - 0.204229, - 0.201619, - 0.198724, - 0.195766, - 0.192913, - 0.190059, - 0.187239, - 0.184665, - 0.18168, - 0.179106, - 0.176067, - 0.173214, - 0.17042, - 0.167693, - 0.165027, - 0.162177, - 0.159197, - 0.156321 - ], - "train_epoch_time": 4.845399618148804, - "train_loss": 1.9869464773700773, - "train_score": 0.41210993530931567, - "val_loss": 2.0999501641116924, - "val_score": 0.38056562175433206 - }, - { - "epoch": 14, - "grad_norm": 0.3497675955295563, - "learning_rate": 0.1546666666666667, - "model_norm": 87.89995574951172, - "step_logs": { - "grad_norm": { - "756": 0.3825933039188385, - "757": 0.4019700288772583, - "758": 0.43566250801086426, - "759": 0.4353422224521637, - "760": 0.39662641286849976, - "761": 0.40551745891571045, - "762": 0.38110724091529846, - "763": 0.3760072886943817, - "764": 0.42468348145484924, - "765": 0.39241504669189453, - "766": 0.3947967290878296, - "767": 0.3975926339626312, - "768": 0.37576502561569214, - "769": 0.40324899554252625, - "770": 0.4067135155200958, - "771": 0.37126943469047546, - "772": 0.360101580619812, - "773": 0.3835076689720154, - "774": 0.3931007981300354, - "775": 0.4015660881996155, - "776": 0.3725912868976593, - "777": 0.36091774702072144, - "778": 0.3680638074874878, - "779": 0.3820505440235138, - "780": 0.367154985666275, - "781": 0.37965452671051025, - "782": 0.37628594040870667, - "783": 0.3540761470794678, - "784": 0.36645638942718506, - "785": 0.3694341480731964, - "786": 0.3758610785007477, - "787": 0.38044625520706177, - "788": 0.3860872685909271, - "789": 0.3585388660430908, - "790": 0.37094998359680176, - "791": 0.3838367462158203, - "792": 0.36435091495513916, - "793": 0.3566299080848694, - "794": 0.3682991862297058, - "795": 0.3411950170993805, - "796": 0.3374997675418854, - "797": 0.35412564873695374, - "798": 0.3519652783870697, - "799": 0.35775846242904663, - "800": 0.37194088101387024, - "801": 0.38294243812561035, - "802": 0.33733099699020386, - "803": 0.3885990381240845, - "804": 0.3471088707447052, - "805": 0.35841116309165955, - "806": 0.3394443392753601, - "807": 0.38250747323036194, - "808": 0.3515566885471344, - "809": 0.3497675955295563 - }, - "loss": { - "756": 2.0030295848846436, - "757": 2.0008554458618164, - "758": 1.9976857900619507, - "759": 2.0156116485595703, - "760": 1.9824223518371582, - "761": 1.9933888912200928, - "762": 1.9906694889068604, - "763": 2.0045247077941895, - "764": 1.9395992755889893, - "765": 1.9741698503494263, - "766": 1.9911028146743774, - "767": 1.9915744066238403, - "768": 2.0097122192382812, - "769": 1.9591145515441895, - "770": 2.0089528560638428, - "771": 1.9596589803695679, - "772": 1.991560697555542, - "773": 1.9729987382888794, - "774": 1.9763469696044922, - "775": 1.968724012374878, - "776": 1.9851090908050537, - "777": 1.9672082662582397, - "778": 1.9833989143371582, - "779": 1.9909121990203857, - "780": 1.9740742444992065, - "781": 1.9451031684875488, - "782": 1.9947526454925537, - "783": 1.9418306350708008, - "784": 1.9938074350357056, - "785": 1.9893717765808105, - "786": 1.9707704782485962, - "787": 1.992068886756897, - "788": 1.9773051738739014, - "789": 1.9256902933120728, - "790": 1.9413520097732544, - "791": 1.994657278060913, - "792": 2.005741596221924, - "793": 1.9567921161651611, - "794": 1.9880865812301636, - "795": 1.9676276445388794, - "796": 1.9862022399902344, - "797": 1.9289112091064453, - "798": 1.963991403579712, - "799": 1.990053653717041, - "800": 1.955742597579956, - "801": 1.9384512901306152, - "802": 1.9901102781295776, - "803": 1.974186658859253, - "804": 1.9994986057281494, - "805": 1.9842350482940674, - "806": 1.9754936695098877, - "807": 1.979050636291504, - "808": 1.9623892307281494, - "809": 1.9532264471054077 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "step_size_list": [ - 0.153797, - 0.150878, - 0.147892, - 0.145078, - 0.142401, - 0.139538, - 0.136795, - 0.133981, - 0.130951, - 0.128244, - 0.125406, - 0.122561, - 0.11979, - 0.116863, - 0.11403, - 0.111267, - 0.108455, - 0.105558, - 0.102697, - 0.099837, - 0.0970522, - 0.0942237, - 0.0913683, - 0.0885021, - 0.0856746, - 0.0828069, - 0.0799699, - 0.0771407, - 0.0742828, - 0.0714295, - 0.0685718, - 0.0657193, - 0.062863, - 0.0600276, - 0.0571679, - 0.0543106, - 0.0514677, - 0.0486144, - 0.0457556, - 0.0429084, - 0.0400527, - 0.0371896, - 0.0343332, - 0.0314743, - 0.028613, - 0.0257527, - 0.0228986, - 0.020034, - 0.0171763, - 0.0143144, - 0.011453, - 0.00858986, - 0.00572736, - 0.00286394 - ], - "train_epoch_time": 4.84417200088501, - "train_loss": 1.9697177921169287, - "train_score": 0.417135939707544, - "val_loss": 2.0870154908549226, - "val_score": 0.3851490741687582 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:35:32.427588", - "final_model_norm": 87.89995574951172, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:33:50.841040", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 10.551793098449707, - "learning_rate": 1e-10, - "model_norm": 87.31610870361328, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.277790546417236, - "3": 7.638795375823975, - "4": 11.775469779968262, - "5": 4.806578636169434, - "6": 9.401572227478027, - "7": 3.420166492462158, - "8": 4.973513126373291, - "9": 5.9297661781311035, - "10": 4.668890953063965, - "11": 5.522274494171143, - "12": 19.95566177368164, - "13": 14.449785232543945, - "14": 5.967987060546875, - "15": 21.7133731842041, - "16": 9.069622039794922, - "17": 9.446586608886719, - "18": 17.0661563873291, - "19": 17.564598083496094, - "20": 3.7146430015563965, - "21": 6.430539608001709, - "22": 5.4503865242004395, - "23": 14.734153747558594, - "24": 9.41879940032959, - "25": 6.050946235656738, - "26": 16.967670440673828, - "27": 14.951860427856445, - "28": 7.332396984100342, - "29": 10.911014556884766, - "30": 4.50460958480835, - "31": 12.328603744506836, - "32": 6.336200714111328, - "33": 5.86729621887207, - "34": 5.471032619476318, - "35": 8.557619094848633, - "36": 5.094140529632568, - "37": 5.427487850189209, - "38": 13.277605056762695, - "39": 5.008471965789795, - "40": 10.797125816345215, - "41": 1.5552220344543457, - "42": 3.259514570236206, - "43": 2.6860194206237793, - "44": 2.179973602294922, - "45": 5.520071983337402, - "46": 2.507417678833008, - "47": 3.0781381130218506, - "48": 1.5036157369613647, - "49": 18.435609817504883, - "50": 4.062704563140869, - "51": 0.755198061466217, - "52": 2.889233350753784, - "53": 10.551793098449707 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.803471088409424, - "3": 4.067673206329346, - "4": 4.131890296936035, - "5": 4.486215114593506, - "6": 3.857367753982544, - "7": 4.027968406677246, - "8": 4.340281963348389, - "9": 3.8410356044769287, - "10": 4.843392848968506, - "11": 3.9144158363342285, - "12": 5.213651657104492, - "13": 7.589272975921631, - "14": 6.168381690979004, - "15": 4.244146347045898, - "16": 6.628342628479004, - "17": 5.097823619842529, - "18": 3.928354263305664, - "19": 4.672650337219238, - "20": 4.380564212799072, - "21": 4.482689380645752, - "22": 4.928394317626953, - "23": 6.558607578277588, - "24": 6.239792823791504, - "25": 5.458795070648193, - "26": 8.716230392456055, - "27": 7.661637306213379, - "28": 6.927092552185059, - "29": 6.035472393035889, - "30": 6.779300689697266, - "31": 6.210404396057129, - "32": 6.136882781982422, - "33": 6.734975814819336, - "34": 4.608994483947754, - "35": 5.8745293617248535, - "36": 7.247208595275879, - "37": 5.258144378662109, - "38": 7.626327991485596, - "39": 5.682893753051758, - "40": 5.561727523803711, - "41": 4.468381881713867, - "42": 3.837730884552002, - "43": 5.029836654663086, - "44": 4.081175327301025, - "45": 3.951021194458008, - "46": 4.729595184326172, - "47": 4.054864883422852, - "48": 3.86513090133667, - "49": 8.084235191345215, - "50": 4.109991550445557, - "51": 3.730285167694092, - "52": 3.627291202545166, - "53": 5.988303184509277 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 1e-10, - 0.00923366, - 0.0331336, - 0.0419477, - 0.0341536, - 0.0795234, - 0.0505291, - 0.116348, - 0.109895, - 0.0986902, - 0.137925, - 0.118473, - 0.0236085, - 0.0568112, - 0.154835, - 0.0169846, - 0.107181, - 0.0855159, - 0.0250951, - 0.0280549, - 0.2454, - 0.142993, - 0.189159, - 0.0534065, - 0.10879, - 0.186788, - 0.0542348, - 0.0608224, - 0.176478, - 0.086306, - 0.316131, - 0.0722022, - 0.20689, - 0.245649, - 0.211966, - 0.13052, - 0.314539, - 0.240819, - 0.0776754, - 0.286607, - 0.0852488, - 0.671069, - 0.388397, - 0.53192, - 0.581874, - 0.20132, - 0.570901, - 0.447994, - 0.749549, - 0.0453699, - 0.332449, - 0.928984, - 0.46497, - 0.0971205 - ], - "train_epoch_time": 4.84472393989563, - "train_loss": 3.850578463060442, - "train_score": 0.08525600790378866, - "val_loss": 3.862292156427517, - "val_score": 0.08233621570225014 - }, - { - "epoch": 1, - "grad_norm": 3.6986000537872314, - "learning_rate": 1.0, - "model_norm": 87.37684631347656, - "step_logs": { - "grad_norm": { - "54": 1.182322382926941, - "55": 16.677125930786133, - "56": 1.5781805515289307, - "57": 3.6878888607025146, - "58": 1.1136995553970337, - "59": 16.71405601501465, - "60": 2.185511350631714, - "61": 3.47221040725708, - "62": 1.9137296676635742, - "63": 4.228087425231934, - "64": 1.695902943611145, - "65": 5.307374477386475, - "66": 8.767341613769531, - "67": 1.54763925075531, - "68": 3.743389844894409, - "69": 2.758080005645752, - "70": 3.3279528617858887, - "71": 7.328800201416016, - "72": 1.728035569190979, - "73": 3.4959888458251953, - "74": 8.857303619384766, - "75": 0.954953670501709, - "76": 1.0097728967666626, - "77": 1.0383492708206177, - "78": 5.7253594398498535, - "79": 1.7573403120040894, - "80": 3.0066046714782715, - "81": 2.5207180976867676, - "82": 2.5117576122283936, - "83": 5.960236549377441, - "84": 1.522796869277954, - "85": 1.59621262550354, - "86": 0.4780735671520233, - "87": 0.3492223918437958, - "88": 1.115275263786316, - "89": 8.024332046508789, - "90": 1.662909984588623, - "91": 5.380059719085693, - "92": 1.5348799228668213, - "93": 1.0621405839920044, - "94": 1.9930458068847656, - "95": 0.8633806705474854, - "96": 1.5750083923339844, - "97": 3.6639809608459473, - "98": 2.683743476867676, - "99": 1.2608025074005127, - "100": 8.151193618774414, - "101": 1.1206763982772827, - "102": 3.7155747413635254, - "103": 1.0862257480621338, - "104": 8.485272407531738, - "105": 1.4894580841064453, - "106": 1.1201480627059937, - "107": 3.6986000537872314 - }, - "loss": { - "54": 3.861490249633789, - "55": 8.558002471923828, - "56": 3.711120128631592, - "57": 3.637338399887085, - "58": 3.6370863914489746, - "59": 8.956304550170898, - "60": 3.992795467376709, - "61": 4.36627721786499, - "62": 3.619739294052124, - "63": 4.753203392028809, - "64": 3.838421583175659, - "65": 3.8743603229522705, - "66": 5.395791053771973, - "67": 3.989959716796875, - "68": 3.738399028778076, - "69": 4.704212188720703, - "70": 4.339101314544678, - "71": 4.819789409637451, - "72": 4.133965492248535, - "73": 3.702777862548828, - "74": 4.930734634399414, - "75": 3.6834280490875244, - "76": 3.4976930618286133, - "77": 3.5633816719055176, - "78": 4.216736316680908, - "79": 3.9362308979034424, - "80": 3.5475428104400635, - "81": 4.4532270431518555, - "82": 3.6925930976867676, - "83": 4.420823097229004, - "84": 3.861217975616455, - "85": 3.8212168216705322, - "86": 3.4784512519836426, - "87": 3.365506172180176, - "88": 3.4324467182159424, - "89": 5.209386825561523, - "90": 3.713132619857788, - "91": 4.07759952545166, - "92": 3.924821615219116, - "93": 3.474189519882202, - "94": 3.6700921058654785, - "95": 3.408604145050049, - "96": 3.536109447479248, - "97": 3.993644952774048, - "98": 4.522773742675781, - "99": 3.428499698638916, - "100": 5.0790486335754395, - "101": 3.4795517921447754, - "102": 3.686734437942505, - "103": 3.5189476013183594, - "104": 4.491830825805664, - "105": 3.652750015258789, - "106": 3.355496883392334, - "107": 3.7562854290008545 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.846738, - 0.0579727, - 0.748746, - 0.348484, - 0.854328, - 0.0602566, - 0.625729, - 0.420061, - 0.66406, - 0.347163, - 0.727461, - 0.21574, - 0.12311, - 0.769141, - 0.347924, - 0.552935, - 0.439325, - 0.152162, - 0.734663, - 0.377305, - 0.111665, - 0.889847, - 0.872784, - 0.868595, - 0.20463, - 0.718244, - 0.439739, - 0.583629, - 0.539297, - 0.199289, - 0.769064, - 0.749969, - 0.968192, - 0.982204, - 0.846605, - 0.139272, - 0.72867, - 0.219815, - 0.769158, - 0.860318, - 0.648861, - 0.901433, - 0.740324, - 0.373028, - 0.556717, - 0.811804, - 0.132612, - 0.84712, - 0.348151, - 0.856423, - 0.110932, - 0.767063, - 0.842484, - 0.354497 - ], - "train_epoch_time": 4.842365980148315, - "train_loss": 3.5994698837805683, - "train_score": 0.12767777074785794, - "val_loss": 3.6159108990779836, - "val_score": 0.1264799799936921 - }, - { - "epoch": 2, - "grad_norm": 0.936374843120575, - "learning_rate": 1.0, - "model_norm": 87.47958374023438, - "step_logs": { - "grad_norm": { - "108": 1.0130778551101685, - "109": 0.7867873311042786, - "110": 1.87189781665802, - "111": 1.7924425601959229, - "112": 0.9850931167602539, - "113": 1.0074862241744995, - "114": 2.594512939453125, - "115": 1.3440529108047485, - "116": 8.823569297790527, - "117": 1.658522605895996, - "118": 0.7072761058807373, - "119": 1.7998427152633667, - "120": 1.2994506359100342, - "121": 1.7640235424041748, - "122": 1.523087978363037, - "123": 2.234794855117798, - "124": 1.9781229496002197, - "125": 1.7689653635025024, - "126": 1.3036879301071167, - "127": 1.8978241682052612, - "128": 0.9382278323173523, - "129": 3.1003024578094482, - "130": 1.2334017753601074, - "131": 6.025845527648926, - "132": 1.1387182474136353, - "133": 0.6895479559898376, - "134": 2.426396131515503, - "135": 1.4569425582885742, - "136": 7.109930992126465, - "137": 0.9290447235107422, - "138": 0.8100121021270752, - "139": 1.3584754467010498, - "140": 0.9901715517044067, - "141": 2.779709577560425, - "142": 1.0672603845596313, - "143": 3.458022117614746, - "144": 2.07422137260437, - "145": 0.9171708822250366, - "146": 3.3331644535064697, - "147": 1.2579619884490967, - "148": 1.2128691673278809, - "149": 1.8638805150985718, - "150": 1.8696259260177612, - "151": 1.37785804271698, - "152": 0.5921233892440796, - "153": 0.9394212365150452, - "154": 4.332217216491699, - "155": 1.2041782140731812, - "156": 1.0545597076416016, - "157": 1.414461612701416, - "158": 0.6525693535804749, - "159": 1.5009300708770752, - "160": 1.1042121648788452, - "161": 0.936374843120575 - }, - "loss": { - "108": 3.5776641368865967, - "109": 3.255084991455078, - "110": 3.3824307918548584, - "111": 3.5914969444274902, - "112": 3.3415751457214355, - "113": 3.4208984375, - "114": 3.4317774772644043, - "115": 3.507892370223999, - "116": 4.4293718338012695, - "117": 3.582699775695801, - "118": 3.2451202869415283, - "119": 3.324535846710205, - "120": 3.5358033180236816, - "121": 3.269261598587036, - "122": 3.635152578353882, - "123": 3.265742063522339, - "124": 3.8618967533111572, - "125": 3.4195780754089355, - "126": 3.3901875019073486, - "127": 3.3553900718688965, - "128": 3.3553123474121094, - "129": 3.3570055961608887, - "130": 3.3605549335479736, - "131": 3.921452045440674, - "132": 3.466475486755371, - "133": 3.0382943153381348, - "134": 3.324812889099121, - "135": 3.254194498062134, - "136": 4.093168258666992, - "137": 3.3552699089050293, - "138": 3.134474754333496, - "139": 3.121914863586426, - "140": 3.2171969413757324, - "141": 3.2393674850463867, - "142": 3.1525063514709473, - "143": 3.4248101711273193, - "144": 3.5027499198913574, - "145": 3.2340564727783203, - "146": 3.378838300704956, - "147": 3.461606502532959, - "148": 3.19130802154541, - "149": 3.1711950302124023, - "150": 3.3687026500701904, - "151": 3.482800006866455, - "152": 2.9598186016082764, - "153": 2.9557132720947266, - "154": 3.7361207008361816, - "155": 3.1387643814086914, - "156": 3.023664951324463, - "157": 3.1793694496154785, - "158": 3.0000743865966797, - "159": 2.9984164237976074, - "160": 3.201073408126831, - "161": 2.98128604888916 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 0.874558, - 0.913169, - 0.658774, - 0.690949, - 0.873208, - 0.870809, - 0.504857, - 0.795236, - 0.10216, - 0.722603, - 0.92844, - 0.672404, - 0.807245, - 0.677546, - 0.758105, - 0.566684, - 0.663741, - 0.686084, - 0.799575, - 0.650741, - 0.884036, - 0.411249, - 0.815432, - 0.177627, - 0.842438, - 0.927431, - 0.530399, - 0.754065, - 0.139372, - 0.886036, - 0.905254, - 0.771864, - 0.867773, - 0.456072, - 0.846986, - 0.364195, - 0.619523, - 0.884914, - 0.378207, - 0.813952, - 0.812692, - 0.646099, - 0.658406, - 0.785822, - 0.944084, - 0.870103, - 0.284762, - 0.812354, - 0.844667, - 0.760666, - 0.933731, - 0.726922, - 0.840019, - 0.871801 - ], - "train_epoch_time": 4.841893672943115, - "train_loss": 3.183151743846439, - "train_score": 0.19856191717056154, - "val_loss": 3.1889810083106793, - "val_score": 0.19537528726591993 - }, - { - "epoch": 3, - "grad_norm": 0.6600807905197144, - "learning_rate": 1.0, - "model_norm": 87.58049774169922, - "step_logs": { - "grad_norm": { - "162": 2.349169969558716, - "163": 0.9088624715805054, - "164": 6.740917682647705, - "165": 1.0101063251495361, - "166": 1.0578159093856812, - "167": 0.6850082874298096, - "168": 1.4120283126831055, - "169": 0.728011965751648, - "170": 0.613506555557251, - "171": 0.7622566223144531, - "172": 1.65699303150177, - "173": 0.810117244720459, - "174": 2.547241449356079, - "175": 1.0112279653549194, - "176": 4.82560920715332, - "177": 0.9750214219093323, - "178": 0.9130164980888367, - "179": 1.0064336061477661, - "180": 1.4664839506149292, - "181": 0.9779031872749329, - "182": 1.3843203783035278, - "183": 1.1455594301223755, - "184": 0.661656379699707, - "185": 0.6008827686309814, - "186": 1.5262645483016968, - "187": 0.733101487159729, - "188": 0.4733677804470062, - "189": 0.690822958946228, - "190": 1.532776951789856, - "191": 0.8726674318313599, - "192": 0.9893300533294678, - "193": 0.7634308934211731, - "194": 0.5948365330696106, - "195": 0.5662253499031067, - "196": 0.6097018718719482, - "197": 1.115746021270752, - "198": 0.9085699319839478, - "199": 0.6395397782325745, - "200": 0.5406814217567444, - "201": 0.5202569365501404, - "202": 0.5106058716773987, - "203": 0.6065346002578735, - "204": 0.5745587944984436, - "205": 0.6677202582359314, - "206": 0.7044236063957214, - "207": 0.8624392747879028, - "208": 0.8338705897331238, - "209": 0.6331600546836853, - "210": 0.6282129883766174, - "211": 0.7349923253059387, - "212": 0.7543413639068604, - "213": 0.6560853719711304, - "214": 0.6241986751556396, - "215": 0.6600807905197144 - }, - "loss": { - "162": 3.18389630317688, - "163": 2.9645843505859375, - "164": 3.8013947010040283, - "165": 3.05190110206604, - "166": 2.959916114807129, - "167": 2.9502933025360107, - "168": 2.9609286785125732, - "169": 3.033219575881958, - "170": 2.8299102783203125, - "171": 2.863287925720215, - "172": 2.9857888221740723, - "173": 2.996337413787842, - "174": 3.021493911743164, - "175": 2.930809259414673, - "176": 3.494567394256592, - "177": 3.101743221282959, - "178": 2.8797178268432617, - "179": 2.9090380668640137, - "180": 2.994272470474243, - "181": 3.0475385189056396, - "182": 2.8672924041748047, - "183": 3.0865540504455566, - "184": 2.8795323371887207, - "185": 2.78538179397583, - "186": 2.892137050628662, - "187": 3.0214877128601074, - "188": 2.7861251831054688, - "189": 2.757634401321411, - "190": 2.958156108856201, - "191": 2.9685380458831787, - "192": 2.796172618865967, - "193": 2.960533857345581, - "194": 2.7535245418548584, - "195": 2.758512496948242, - "196": 2.7589588165283203, - "197": 2.8154330253601074, - "198": 2.928706407546997, - "199": 2.850471019744873, - "200": 2.761180877685547, - "201": 2.740119695663452, - "202": 2.7113795280456543, - "203": 2.7149839401245117, - "204": 2.748411178588867, - "205": 2.731147289276123, - "206": 2.7936136722564697, - "207": 2.7666540145874023, - "208": 2.8514158725738525, - "209": 2.7581472396850586, - "210": 2.7208895683288574, - "211": 2.7478692531585693, - "212": 2.8006601333618164, - "213": 2.7553091049194336, - "214": 2.7745625972747803, - "215": 2.702153444290161 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 0.535721, - 0.877719, - 0.143333, - 0.85678, - 0.841028, - 0.926335, - 0.748117, - 0.919653, - 0.937645, - 0.907883, - 0.685034, - 0.901294, - 0.482227, - 0.85146, - 0.23085, - 0.867117, - 0.873564, - 0.851718, - 0.735773, - 0.864382, - 0.749528, - 0.824685, - 0.929353, - 0.939132, - 0.712897, - 0.918328, - 0.961342, - 0.920361, - 0.715765, - 0.886313, - 0.851049, - 0.910388, - 0.939628, - 0.945079, - 0.936883, - 0.818945, - 0.876476, - 0.933058, - 0.949725, - 0.952935, - 0.954127, - 0.936548, - 0.943346, - 0.924536, - 0.918432, - 0.881506, - 0.891322, - 0.93225, - 0.932381, - 0.910501, - 0.90778, - 0.927547, - 0.934393, - 0.925393 - ], - "train_epoch_time": 4.842341899871826, - "train_loss": 2.7814284674235363, - "train_score": 0.21141947627153082, - "val_loss": 2.808646624178618, - "val_score": 0.20431346881677034 - }, - { - "epoch": 4, - "grad_norm": 0.6852704882621765, - "learning_rate": 1.0, - "model_norm": 87.67063903808594, - "step_logs": { - "grad_norm": { - "216": 0.6668159365653992, - "217": 0.526739776134491, - "218": 0.5584539175033569, - "219": 0.7145861983299255, - "220": 0.8104191422462463, - "221": 0.4488040804862976, - "222": 0.48834919929504395, - "223": 0.7027773261070251, - "224": 0.7009124755859375, - "225": 0.606742262840271, - "226": 0.7202913761138916, - "227": 0.7018812894821167, - "228": 0.689257800579071, - "229": 0.6530488133430481, - "230": 0.6661697030067444, - "231": 0.7815045118331909, - "232": 0.6919880509376526, - "233": 0.5697280168533325, - "234": 0.6913193464279175, - "235": 0.7303526401519775, - "236": 0.6265333890914917, - "237": 0.603069007396698, - "238": 0.7266407012939453, - "239": 0.6770203113555908, - "240": 0.6387922763824463, - "241": 0.6717566251754761, - "242": 0.6977189779281616, - "243": 0.651513934135437, - "244": 0.6178772449493408, - "245": 0.6837935447692871, - "246": 0.6591500639915466, - "247": 0.5391072630882263, - "248": 0.6287344098091125, - "249": 0.7252734899520874, - "250": 0.691916823387146, - "251": 0.5559298992156982, - "252": 0.5315951704978943, - "253": 0.5950450897216797, - "254": 0.6224647760391235, - "255": 0.5911554098129272, - "256": 0.6148871779441833, - "257": 0.6896852254867554, - "258": 0.7248898148536682, - "259": 0.6099466681480408, - "260": 0.6298787593841553, - "261": 0.6692044138908386, - "262": 0.6525567173957825, - "263": 0.5846357941627502, - "264": 0.6014005541801453, - "265": 0.6742257475852966, - "266": 0.6430836915969849, - "267": 0.6430049538612366, - "268": 0.690177321434021, - "269": 0.6852704882621765 - }, - "loss": { - "216": 2.7904422283172607, - "217": 2.7123732566833496, - "218": 2.7137954235076904, - "219": 2.693697452545166, - "220": 2.818570852279663, - "221": 2.714264154434204, - "222": 2.667057991027832, - "223": 2.7210018634796143, - "224": 2.7749829292297363, - "225": 2.70729398727417, - "226": 2.74062442779541, - "227": 2.730461835861206, - "228": 2.759340524673462, - "229": 2.7050209045410156, - "230": 2.7378931045532227, - "231": 2.7252767086029053, - "232": 2.7979393005371094, - "233": 2.6583619117736816, - "234": 2.753272533416748, - "235": 2.7250847816467285, - "236": 2.7271876335144043, - "237": 2.686765670776367, - "238": 2.7256574630737305, - "239": 2.713956356048584, - "240": 2.708928108215332, - "241": 2.6793227195739746, - "242": 2.731151580810547, - "243": 2.685105323791504, - "244": 2.705883502960205, - "245": 2.667982578277588, - "246": 2.73250412940979, - "247": 2.6658506393432617, - "248": 2.6772403717041016, - "249": 2.6917781829833984, - "250": 2.736544132232666, - "251": 2.6529805660247803, - "252": 2.6589348316192627, - "253": 2.639176845550537, - "254": 2.6941981315612793, - "255": 2.6540732383728027, - "256": 2.691887855529785, - "257": 2.6696085929870605, - "258": 2.697352409362793, - "259": 2.6578001976013184, - "260": 2.666073799133301, - "261": 2.670083999633789, - "262": 2.7231509685516357, - "263": 2.6504769325256348, - "264": 2.6443305015563965, - "265": 2.6431427001953125, - "266": 2.6884877681732178, - "267": 2.6233601570129395, - "268": 2.683504581451416, - "269": 2.661273717880249 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 0.926207, - 0.951343, - 0.945662, - 0.913423, - 0.895649, - 0.964223, - 0.957204, - 0.916795, - 0.918679, - 0.936339, - 0.913531, - 0.917253, - 0.920738, - 0.92693, - 0.925031, - 0.899238, - 0.921174, - 0.942462, - 0.920139, - 0.910854, - 0.932863, - 0.936608, - 0.911695, - 0.922131, - 0.929959, - 0.92233, - 0.918171, - 0.926748, - 0.934104, - 0.919433, - 0.926353, - 0.948307, - 0.931248, - 0.910988, - 0.919563, - 0.944959, - 0.949541, - 0.937136, - 0.932917, - 0.938231, - 0.934381, - 0.918199, - 0.911242, - 0.934589, - 0.930746, - 0.922627, - 0.927483, - 0.939427, - 0.935989, - 0.920817, - 0.928581, - 0.926954, - 0.918481, - 0.918925 - ], - "train_epoch_time": 4.841772556304932, - "train_loss": 2.6756673847756733, - "train_score": 0.23037123384229422, - "val_loss": 2.6977650867400844, - "val_score": 0.22342763360414383 - }, - { - "epoch": 5, - "grad_norm": 0.6575700044631958, - "learning_rate": 1.0, - "model_norm": 87.76293182373047, - "step_logs": { - "grad_norm": { - "270": 0.6253135800361633, - "271": 0.5745961666107178, - "272": 0.5815318822860718, - "273": 0.6147587299346924, - "274": 0.6545942425727844, - "275": 0.5514642596244812, - "276": 0.5357682108879089, - "277": 0.6064366102218628, - "278": 0.6165779829025269, - "279": 0.5857059955596924, - "280": 0.6177246570587158, - "281": 0.7129228115081787, - "282": 0.7288950085639954, - "283": 0.6149364113807678, - "284": 0.616297721862793, - "285": 0.6054685115814209, - "286": 0.6007561683654785, - "287": 0.6003897786140442, - "288": 0.670651376247406, - "289": 0.7251616716384888, - "290": 0.6183573603630066, - "291": 0.5742987990379333, - "292": 0.6593526601791382, - "293": 0.6468791365623474, - "294": 0.5772930979728699, - "295": 0.6269423961639404, - "296": 0.6214361786842346, - "297": 0.6635381579399109, - "298": 0.8135142922401428, - "299": 0.6042878031730652, - "300": 0.5405102968215942, - "301": 0.6937423348426819, - "302": 0.7899835109710693, - "303": 0.6307160258293152, - "304": 0.526786744594574, - "305": 0.5233730673789978, - "306": 0.5989813804626465, - "307": 0.7113780975341797, - "308": 0.6406801342964172, - "309": 0.5731056928634644, - "310": 0.6452125310897827, - "311": 0.9156473278999329, - "312": 0.7744340896606445, - "313": 0.48323923349380493, - "314": 0.38585710525512695, - "315": 0.4710135757923126, - "316": 0.6377565860748291, - "317": 0.6965519189834595, - "318": 0.6850444078445435, - "319": 0.6682303547859192, - "320": 0.6728943586349487, - "321": 0.7384703755378723, - "322": 0.7777429819107056, - "323": 0.6575700044631958 - }, - "loss": { - "270": 2.669743537902832, - "271": 2.6243810653686523, - "272": 2.6640257835388184, - "273": 2.620114803314209, - "274": 2.6805260181427, - "275": 2.625105142593384, - "276": 2.636050224304199, - "277": 2.6242499351501465, - "278": 2.648617744445801, - "279": 2.6396660804748535, - "280": 2.6394271850585938, - "281": 2.634902000427246, - "282": 2.6951990127563477, - "283": 2.6385412216186523, - "284": 2.6460447311401367, - "285": 2.624196767807007, - "286": 2.632338523864746, - "287": 2.600693702697754, - "288": 2.6380844116210938, - "289": 2.6424827575683594, - "290": 2.668303966522217, - "291": 2.608004570007324, - "292": 2.623488426208496, - "293": 2.63919734954834, - "294": 2.6157071590423584, - "295": 2.616673469543457, - "296": 2.6420910358428955, - "297": 2.591675281524658, - "298": 2.6333885192871094, - "299": 2.6473581790924072, - "300": 2.6024539470672607, - "301": 2.6129446029663086, - "302": 2.6780552864074707, - "303": 2.6315975189208984, - "304": 2.5968313217163086, - "305": 2.5613808631896973, - "306": 2.573054313659668, - "307": 2.5965569019317627, - "308": 2.6412408351898193, - "309": 2.5610084533691406, - "310": 2.596482276916504, - "311": 2.628560781478882, - "312": 2.704296112060547, - "313": 2.5825698375701904, - "314": 2.530683994293213, - "315": 2.5261120796203613, - "316": 2.5743026733398438, - "317": 2.5945258140563965, - "318": 2.591357469558716, - "319": 2.5900416374206543, - "320": 2.5771846771240234, - "321": 2.5963213443756104, - "322": 2.6339271068573, - "323": 2.5780746936798096 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 0.931766, - 0.94082, - 0.940317, - 0.932731, - 0.925988, - 0.945248, - 0.948365, - 0.934518, - 0.933038, - 0.938985, - 0.932588, - 0.912036, - 0.910281, - 0.933133, - 0.933034, - 0.934712, - 0.935845, - 0.935189, - 0.92145, - 0.909503, - 0.933141, - 0.940528, - 0.923483, - 0.926547, - 0.94011, - 0.930141, - 0.931895, - 0.921708, - 0.88837, - 0.935482, - 0.946853, - 0.915671, - 0.895643, - 0.929729, - 0.949279, - 0.949243, - 0.934825, - 0.911205, - 0.927898, - 0.939739, - 0.925784, - 0.862455, - 0.900181, - 0.956745, - 0.971425, - 0.957935, - 0.926785, - 0.914493, - 0.91697, - 0.920639, - 0.919248, - 0.90496, - 0.897001, - 0.922628 - ], - "train_epoch_time": 4.8422532081604, - "train_loss": 2.583251646635693, - "train_score": 0.2353636119039021, - "val_loss": 2.6131799234725306, - "val_score": 0.23102934851848983 - }, - { - "epoch": 6, - "grad_norm": 0.5000882744789124, - "learning_rate": 1.0, - "model_norm": 87.84900665283203, - "step_logs": { - "grad_norm": { - "324": 0.5971531867980957, - "325": 0.6022903323173523, - "326": 0.6419209241867065, - "327": 0.6169332265853882, - "328": 0.6087033152580261, - "329": 0.7084470987319946, - "330": 0.7457408905029297, - "331": 0.5256245136260986, - "332": 0.42459234595298767, - "333": 0.5654624700546265, - "334": 0.6345568895339966, - "335": 0.58400958776474, - "336": 0.6193705201148987, - "337": 0.6653412580490112, - "338": 0.6821584701538086, - "339": 0.6362888216972351, - "340": 0.5809125304222107, - "341": 0.5886433124542236, - "342": 0.7237919569015503, - "343": 0.7224463224411011, - "344": 0.6026124358177185, - "345": 0.5036521553993225, - "346": 0.5330109000205994, - "347": 0.618767261505127, - "348": 0.6761471629142761, - "349": 0.6541870832443237, - "350": 0.6549535989761353, - "351": 0.711733341217041, - "352": 0.6920038461685181, - "353": 0.5846808552742004, - "354": 0.5397324562072754, - "355": 0.4805957078933716, - "356": 0.4639611542224884, - "357": 0.5837191343307495, - "358": 0.68544602394104, - "359": 0.6101338267326355, - "360": 0.5560598373413086, - "361": 0.6159241199493408, - "362": 0.626372218132019, - "363": 0.5856086611747742, - "364": 0.576715350151062, - "365": 0.5504580140113831, - "366": 0.49138495326042175, - "367": 0.49599599838256836, - "368": 0.5381412506103516, - "369": 0.5965025424957275, - "370": 0.5716441869735718, - "371": 0.534985363483429, - "372": 0.6050951480865479, - "373": 0.7219687700271606, - "374": 0.8310479521751404, - "375": 0.7454206347465515, - "376": 0.5600138306617737, - "377": 0.5000882744789124 - }, - "loss": { - "324": 2.5617666244506836, - "325": 2.5578224658966064, - "326": 2.5838518142700195, - "327": 2.5559661388397217, - "328": 2.586496114730835, - "329": 2.5217881202697754, - "330": 2.606095314025879, - "331": 2.5538928508758545, - "332": 2.493557929992676, - "333": 2.5040602684020996, - "334": 2.55924654006958, - "335": 2.508248805999756, - "336": 2.578103542327881, - "337": 2.54435396194458, - "338": 2.5729055404663086, - "339": 2.5638070106506348, - "340": 2.539839267730713, - "341": 2.520904064178467, - "342": 2.5694704055786133, - "343": 2.542829751968384, - "344": 2.5430386066436768, - "345": 2.5030465126037598, - "346": 2.5153956413269043, - "347": 2.52713680267334, - "348": 2.5745956897735596, - "349": 2.550501585006714, - "350": 2.5476882457733154, - "351": 2.5550897121429443, - "352": 2.567355155944824, - "353": 2.5307085514068604, - "354": 2.511206865310669, - "355": 2.4994359016418457, - "356": 2.472632884979248, - "357": 2.4980854988098145, - "358": 2.571336269378662, - "359": 2.5367343425750732, - "360": 2.4759137630462646, - "361": 2.5135397911071777, - "362": 2.518010377883911, - "363": 2.5342636108398438, - "364": 2.465496063232422, - "365": 2.5191147327423096, - "366": 2.491001605987549, - "367": 2.482466697692871, - "368": 2.4892263412475586, - "369": 2.4983930587768555, - "370": 2.4938693046569824, - "371": 2.5119211673736572, - "372": 2.4812660217285156, - "373": 2.529157876968384, - "374": 2.5594825744628906, - "375": 2.5710878372192383, - "376": 2.519064426422119, - "377": 2.4687352180480957 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 0.93493, - 0.933785, - 0.926151, - 0.930705, - 0.933162, - 0.909494, - 0.903589, - 0.948685, - 0.965112, - 0.939986, - 0.927069, - 0.936339, - 0.930752, - 0.91997, - 0.917069, - 0.926821, - 0.937705, - 0.935694, - 0.907489, - 0.906925, - 0.933359, - 0.951772, - 0.946546, - 0.929582, - 0.918454, - 0.922597, - 0.92235, - 0.909812, - 0.914694, - 0.936732, - 0.945178, - 0.955836, - 0.958287, - 0.936156, - 0.916288, - 0.931641, - 0.941228, - 0.929831, - 0.927724, - 0.936628, - 0.936811, - 0.943271, - 0.953774, - 0.952789, - 0.945028, - 0.933525, - 0.938512, - 0.9461, - 0.931289, - 0.906581, - 0.881121, - 0.90248, - 0.941399, - 0.951791 - ], - "train_epoch_time": 4.841815710067749, - "train_loss": 2.483801632187459, - "train_score": 0.2878642843905275, - "val_loss": 2.508345295996945, - "val_score": 0.28070734039239853 - }, - { - "epoch": 7, - "grad_norm": 0.534000039100647, - "learning_rate": 1.0, - "model_norm": 87.9289321899414, - "step_logs": { - "grad_norm": { - "378": 0.6672523021697998, - "379": 0.6729017496109009, - "380": 0.5894371271133423, - "381": 0.6475037336349487, - "382": 0.683190643787384, - "383": 0.6214357614517212, - "384": 0.6065525412559509, - "385": 0.6320510506629944, - "386": 0.6907786726951599, - "387": 0.7481032609939575, - "388": 0.5592978000640869, - "389": 0.4814859628677368, - "390": 0.6105024814605713, - "391": 0.621249794960022, - "392": 0.5701602101325989, - "393": 0.6558672189712524, - "394": 0.8090927004814148, - "395": 0.702446460723877, - "396": 0.5572105050086975, - "397": 0.5946030616760254, - "398": 0.8846548199653625, - "399": 0.8287051916122437, - "400": 0.5312759280204773, - "401": 0.3815591037273407, - "402": 0.34675461053848267, - "403": 0.4594365358352661, - "404": 0.5876360535621643, - "405": 0.6224352717399597, - "406": 0.6259280443191528, - "407": 0.6102624535560608, - "408": 0.6231469511985779, - "409": 0.620484471321106, - "410": 0.6014580726623535, - "411": 0.6007525324821472, - "412": 0.5963590741157532, - "413": 0.5844481587409973, - "414": 0.6238325238227844, - "415": 0.6103806495666504, - "416": 0.5578613877296448, - "417": 0.5720828771591187, - "418": 0.5470981597900391, - "419": 0.5328080654144287, - "420": 0.5312775373458862, - "421": 0.5537199974060059, - "422": 0.59703129529953, - "423": 0.5705642104148865, - "424": 0.5319480299949646, - "425": 0.5432087779045105, - "426": 0.5285447239875793, - "427": 0.538119375705719, - "428": 0.5494590401649475, - "429": 0.5210990309715271, - "430": 0.5022335648536682, - "431": 0.534000039100647 - }, - "loss": { - "378": 2.492499351501465, - "379": 2.518148899078369, - "380": 2.4952762126922607, - "381": 2.4923958778381348, - "382": 2.4948248863220215, - "383": 2.5114943981170654, - "384": 2.4629573822021484, - "385": 2.4895424842834473, - "386": 2.4806649684906006, - "387": 2.518702507019043, - "388": 2.4814648628234863, - "389": 2.4595227241516113, - "390": 2.467555046081543, - "391": 2.4997243881225586, - "392": 2.4422643184661865, - "393": 2.477545738220215, - "394": 2.5091967582702637, - "395": 2.506126642227173, - "396": 2.4630508422851562, - "397": 2.462289571762085, - "398": 2.4936091899871826, - "399": 2.5650851726531982, - "400": 2.4836599826812744, - "401": 2.443777084350586, - "402": 2.4244513511657715, - "403": 2.430521011352539, - "404": 2.4571049213409424, - "405": 2.4842782020568848, - "406": 2.4711008071899414, - "407": 2.4833016395568848, - "408": 2.4345924854278564, - "409": 2.4896140098571777, - "410": 2.4557390213012695, - "411": 2.485828161239624, - "412": 2.4550909996032715, - "413": 2.4628920555114746, - "414": 2.4521701335906982, - "415": 2.491764783859253, - "416": 2.429023265838623, - "417": 2.449615001678467, - "418": 2.419907569885254, - "419": 2.4501054286956787, - "420": 2.416001319885254, - "421": 2.4260528087615967, - "422": 2.434298038482666, - "423": 2.457561492919922, - "424": 2.424787998199463, - "425": 2.4322166442871094, - "426": 2.427424669265747, - "427": 2.4174022674560547, - "428": 2.4062047004699707, - "429": 2.4244260787963867, - "430": 2.4041099548339844, - "431": 2.4113759994506836 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 0.91801, - 0.91751, - 0.934913, - 0.922417, - 0.914458, - 0.928606, - 0.930503, - 0.925726, - 0.91226, - 0.900009, - 0.940707, - 0.954992, - 0.92978, - 0.928334, - 0.937599, - 0.920122, - 0.884606, - 0.910378, - 0.940709, - 0.933016, - 0.864361, - 0.881939, - 0.946233, - 0.971074, - 0.975803, - 0.958384, - 0.934345, - 0.927665, - 0.926549, - 0.930246, - 0.926141, - 0.928228, - 0.931398, - 0.932321, - 0.932462, - 0.935152, - 0.926482, - 0.930441, - 0.939796, - 0.937381, - 0.941757, - 0.945239, - 0.94481, - 0.940565, - 0.931781, - 0.937881, - 0.944868, - 0.942809, - 0.945589, - 0.943491, - 0.940969, - 0.946968, - 0.950155, - 0.944174 - ], - "train_epoch_time": 4.841690540313721, - "train_loss": 2.423061289575213, - "train_score": 0.27203080164992827, - "val_loss": 2.4458374610314, - "val_score": 0.2699438507671882 - }, - { - "epoch": 8, - "grad_norm": 0.5307457447052002, - "learning_rate": 1.0, - "model_norm": 88.01598358154297, - "step_logs": { - "grad_norm": { - "432": 0.6045862436294556, - "433": 0.5658586621284485, - "434": 0.560275137424469, - "435": 0.5634326934814453, - "436": 0.5883510708808899, - "437": 0.573127031326294, - "438": 0.7011948227882385, - "439": 1.0743197202682495, - "440": 0.6005929112434387, - "441": 0.47543981671333313, - "442": 0.4811388850212097, - "443": 0.6034538149833679, - "444": 0.7212246656417847, - "445": 0.6774283647537231, - "446": 0.5909043550491333, - "447": 0.5820765495300293, - "448": 0.6224314570426941, - "449": 0.5919395685195923, - "450": 0.6028775572776794, - "451": 0.5705131888389587, - "452": 0.555813729763031, - "453": 0.5373350381851196, - "454": 0.5404369235038757, - "455": 0.544927179813385, - "456": 0.5368862152099609, - "457": 0.5788453817367554, - "458": 0.5847495198249817, - "459": 0.5616357326507568, - "460": 0.5424827337265015, - "461": 0.5723328590393066, - "462": 0.6037271618843079, - "463": 0.6554265022277832, - "464": 0.6995989680290222, - "465": 0.6474835872650146, - "466": 0.52692711353302, - "467": 0.49869897961616516, - "468": 0.5641749501228333, - "469": 0.6213555932044983, - "470": 0.6797971725463867, - "471": 0.8086366057395935, - "472": 0.664639413356781, - "473": 0.6444742679595947, - "474": 0.477022647857666, - "475": 0.43518105149269104, - "476": 0.4455399215221405, - "477": 0.4941543936729431, - "478": 0.6534720659255981, - "479": 0.6960119605064392, - "480": 0.7720495462417603, - "481": 0.901447594165802, - "482": 0.5485942363739014, - "483": 0.5247522592544556, - "484": 0.5808753967285156, - "485": 0.5307457447052002 - }, - "loss": { - "432": 2.4294066429138184, - "433": 2.4400105476379395, - "434": 2.4061295986175537, - "435": 2.4413838386535645, - "436": 2.415982246398926, - "437": 2.4369966983795166, - "438": 2.4323067665100098, - "439": 2.520164966583252, - "440": 2.5271079540252686, - "441": 2.4397060871124268, - "442": 2.40366268157959, - "443": 2.4279680252075195, - "444": 2.46299409866333, - "445": 2.4677391052246094, - "446": 2.428255081176758, - "447": 2.4093849658966064, - "448": 2.4300594329833984, - "449": 2.4308786392211914, - "450": 2.4155869483947754, - "451": 2.430352210998535, - "452": 2.392518997192383, - "453": 2.4181995391845703, - "454": 2.4137353897094727, - "455": 2.416621685028076, - "456": 2.3817925453186035, - "457": 2.4191548824310303, - "458": 2.4184141159057617, - "459": 2.4197685718536377, - "460": 2.3851068019866943, - "461": 2.4107916355133057, - "462": 2.3969180583953857, - "463": 2.418745994567871, - "464": 2.4208900928497314, - "465": 2.4515137672424316, - "466": 2.401920795440674, - "467": 2.394526481628418, - "468": 2.3786001205444336, - "469": 2.4157373905181885, - "470": 2.402095317840576, - "471": 2.441835880279541, - "472": 2.462407112121582, - "473": 2.4406094551086426, - "474": 2.3807761669158936, - "475": 2.356658458709717, - "476": 2.3443193435668945, - "477": 2.3668155670166016, - "478": 2.3903822898864746, - "479": 2.4566798210144043, - "480": 2.42482852935791, - "481": 2.461195230484009, - "482": 2.420844554901123, - "483": 2.3973467350006104, - "484": 2.397913694381714, - "485": 2.444324493408203 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 0.930034, - 0.938426, - 0.938764, - 0.938953, - 0.93315, - 0.936862, - 0.908206, - 0.813679, - 0.933386, - 0.955725, - 0.954058, - 0.930239, - 0.904489, - 0.914928, - 0.932926, - 0.934308, - 0.926171, - 0.932774, - 0.930031, - 0.93724, - 0.939354, - 0.943664, - 0.94295, - 0.942118, - 0.942942, - 0.935233, - 0.933974, - 0.93881, - 0.941892, - 0.936385, - 0.92934, - 0.91844, - 0.908194, - 0.92123, - 0.94536, - 0.950633, - 0.937288, - 0.926003, - 0.912249, - 0.881917, - 0.917686, - 0.921582, - 0.95439, - 0.961372, - 0.959382, - 0.950945, - 0.918003, - 0.910254, - 0.890545, - 0.858307, - 0.941478, - 0.945688, - 0.934268, - 0.945518 - ], - "train_epoch_time": 4.842552185058594, - "train_loss": 2.3655868504276576, - "train_score": 0.32127645255161325, - "val_loss": 2.405880637886056, - "val_score": 0.3133027771857795 - }, - { - "epoch": 9, - "grad_norm": 0.5573657751083374, - "learning_rate": 1.0, - "model_norm": 88.09248352050781, - "step_logs": { - "grad_norm": { - "486": 0.4535903334617615, - "487": 0.43654677271842957, - "488": 0.4688446521759033, - "489": 0.5269677639007568, - "490": 0.5538724660873413, - "491": 0.5637977719306946, - "492": 0.5148653984069824, - "493": 0.48916780948638916, - "494": 0.4811687767505646, - "495": 0.53957200050354, - "496": 0.551761269569397, - "497": 0.49464598298072815, - "498": 0.5652946829795837, - "499": 0.559206485748291, - "500": 0.5420349836349487, - "501": 0.5247375965118408, - "502": 0.5506398677825928, - "503": 0.5332450866699219, - "504": 0.4832785427570343, - "505": 0.48463431000709534, - "506": 0.5219869613647461, - "507": 0.5007066130638123, - "508": 0.456484317779541, - "509": 0.4804133474826813, - "510": 0.5664798617362976, - "511": 0.5971078276634216, - "512": 0.6086966395378113, - "513": 0.5204153060913086, - "514": 0.44804394245147705, - "515": 0.49400898814201355, - "516": 0.4965521991252899, - "517": 0.5368234515190125, - "518": 0.5534977912902832, - "519": 0.5647637248039246, - "520": 0.5865835547447205, - "521": 0.5835053324699402, - "522": 0.5663397312164307, - "523": 0.5447574853897095, - "524": 0.6109901666641235, - "525": 0.7890527844429016, - "526": 0.808137059211731, - "527": 0.6618877649307251, - "528": 0.5339902639389038, - "529": 0.4922279119491577, - "530": 0.5829690098762512, - "531": 0.5641847252845764, - "532": 0.5310723185539246, - "533": 0.5086075663566589, - "534": 0.483493447303772, - "535": 0.5759891271591187, - "536": 0.6187015771865845, - "537": 0.6298069953918457, - "538": 0.6174057722091675, - "539": 0.5573657751083374 - }, - "loss": { - "486": 2.3655457496643066, - "487": 2.3609251976013184, - "488": 2.3451809883117676, - "489": 2.395559549331665, - "490": 2.3671340942382812, - "491": 2.411898612976074, - "492": 2.3650951385498047, - "493": 2.3698604106903076, - "494": 2.3744654655456543, - "495": 2.3806395530700684, - "496": 2.387601852416992, - "497": 2.38102388381958, - "498": 2.377661943435669, - "499": 2.3691205978393555, - "500": 2.3674073219299316, - "501": 2.40138578414917, - "502": 2.3700547218322754, - "503": 2.397083282470703, - "504": 2.3748817443847656, - "505": 2.341590404510498, - "506": 2.343320369720459, - "507": 2.376145124435425, - "508": 2.334594249725342, - "509": 2.367121696472168, - "510": 2.368175745010376, - "511": 2.3799967765808105, - "512": 2.368922233581543, - "513": 2.4109578132629395, - "514": 2.360182762145996, - "515": 2.359098434448242, - "516": 2.344724416732788, - "517": 2.3594279289245605, - "518": 2.362699508666992, - "519": 2.361393928527832, - "520": 2.37358021736145, - "521": 2.3732872009277344, - "522": 2.376909017562866, - "523": 2.3884658813476562, - "524": 2.3681800365448, - "525": 2.423929214477539, - "526": 2.4424996376037598, - "527": 2.3778748512268066, - "528": 2.364795684814453, - "529": 2.340841770172119, - "530": 2.3853812217712402, - "531": 2.3833248615264893, - "532": 2.34861421585083, - "533": 2.376563787460327, - "534": 2.3223228454589844, - "535": 2.3330297470092773, - "536": 2.366586208343506, - "537": 2.365109443664551, - "538": 2.355536937713623, - "539": 2.3546268939971924 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 0.958325, - 0.961206, - 0.955233, - 0.945215, - 0.939145, - 0.938178, - 0.946933, - 0.951941, - 0.953514, - 0.942377, - 0.940067, - 0.951131, - 0.937031, - 0.938089, - 0.941574, - 0.945777, - 0.93988, - 0.944009, - 0.953132, - 0.952243, - 0.945057, - 0.949889, - 0.957278, - 0.953516, - 0.936547, - 0.930317, - 0.927469, - 0.94682, - 0.959208, - 0.95082, - 0.950048, - 0.942445, - 0.939115, - 0.936737, - 0.932417, - 0.93307, - 0.936794, - 0.94151, - 0.926941, - 0.886188, - 0.882074, - 0.915651, - 0.943138, - 0.950794, - 0.933501, - 0.937403, - 0.943358, - 0.948386, - 0.952082, - 0.933618, - 0.925177, - 0.922632, - 0.925143, - 0.938115 - ], - "train_epoch_time": 4.842437267303467, - "train_loss": 2.3432632475706563, - "train_score": 0.32907550211787395, - "val_loss": 2.383026872792556, - "val_score": 0.3201017148801298 - }, - { - "epoch": 10, - "grad_norm": 0.5111677050590515, - "learning_rate": 1.0, - "model_norm": 88.16957092285156, - "step_logs": { - "grad_norm": { - "540": 0.4596772789955139, - "541": 0.4341847896575928, - "542": 0.5254841446876526, - "543": 0.566222071647644, - "544": 0.5521520972251892, - "545": 0.5631356835365295, - "546": 0.5399010181427002, - "547": 0.5117794275283813, - "548": 0.4850256145000458, - "549": 0.49450379610061646, - "550": 0.5503708124160767, - "551": 0.5276839733123779, - "552": 0.5123575329780579, - "553": 0.5299116373062134, - "554": 0.5598269701004028, - "555": 0.5512658953666687, - "556": 0.5198515057563782, - "557": 0.5541303157806396, - "558": 0.641183614730835, - "559": 0.694770336151123, - "560": 0.5791642665863037, - "561": 0.4883069097995758, - "562": 0.54417484998703, - "563": 0.5214710235595703, - "564": 0.49169886112213135, - "565": 0.5291573405265808, - "566": 0.5903640389442444, - "567": 0.5659109950065613, - "568": 0.5247699618339539, - "569": 0.5331757664680481, - "570": 0.5237985849380493, - "571": 0.5638059377670288, - "572": 0.5260524153709412, - "573": 0.5002376437187195, - "574": 0.557741105556488, - "575": 0.5511950850486755, - "576": 0.544090211391449, - "577": 0.5877402424812317, - "578": 0.6141374111175537, - "579": 0.5222635269165039, - "580": 0.48640304803848267, - "581": 0.49971556663513184, - "582": 0.4673531651496887, - "583": 0.42435377836227417, - "584": 0.4753330647945404, - "585": 0.4752807021141052, - "586": 0.4638010859489441, - "587": 0.502336859703064, - "588": 0.4949812889099121, - "589": 0.4878048598766327, - "590": 0.5196456909179688, - "591": 0.5034701824188232, - "592": 0.4684698283672333, - "593": 0.5111677050590515 - }, - "loss": { - "540": 2.3485372066497803, - "541": 2.327695846557617, - "542": 2.3510007858276367, - "543": 2.397897243499756, - "544": 2.332845687866211, - "545": 2.351045608520508, - "546": 2.3531222343444824, - "547": 2.354424238204956, - "548": 2.3402605056762695, - "549": 2.33109974861145, - "550": 2.356163501739502, - "551": 2.3666553497314453, - "552": 2.3335275650024414, - "553": 2.3365063667297363, - "554": 2.33563232421875, - "555": 2.376941680908203, - "556": 2.335899829864502, - "557": 2.337794065475464, - "558": 2.323544979095459, - "559": 2.4092025756835938, - "560": 2.334195137023926, - "561": 2.356853485107422, - "562": 2.3534493446350098, - "563": 2.354688882827759, - "564": 2.3419101238250732, - "565": 2.314380645751953, - "566": 2.336700916290283, - "567": 2.335231065750122, - "568": 2.347973108291626, - "569": 2.3590574264526367, - "570": 2.3474464416503906, - "571": 2.3411898612976074, - "572": 2.3616764545440674, - "573": 2.349451780319214, - "574": 2.328526258468628, - "575": 2.3436975479125977, - "576": 2.328582763671875, - "577": 2.3568406105041504, - "578": 2.364205837249756, - "579": 2.3688583374023438, - "580": 2.325706720352173, - "581": 2.3392255306243896, - "582": 2.3274292945861816, - "583": 2.3014719486236572, - "584": 2.3278751373291016, - "585": 2.3259596824645996, - "586": 2.2896718978881836, - "587": 2.3136181831359863, - "588": 2.3155267238616943, - "589": 2.2978756427764893, - "590": 2.3300156593322754, - "591": 2.3338265419006348, - "592": 2.3021931648254395, - "593": 2.3216135501861572 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 0.95695, - 0.961082, - 0.944531, - 0.937337, - 0.938664, - 0.936818, - 0.941675, - 0.947308, - 0.952144, - 0.950164, - 0.939602, - 0.944441, - 0.946748, - 0.943315, - 0.937126, - 0.939916, - 0.945317, - 0.938374, - 0.918723, - 0.908943, - 0.932965, - 0.95185, - 0.940811, - 0.945409, - 0.950916, - 0.942958, - 0.930598, - 0.93583, - 0.944606, - 0.943172, - 0.944788, - 0.936428, - 0.944655, - 0.949438, - 0.937386, - 0.93913, - 0.940234, - 0.93172, - 0.926127, - 0.945562, - 0.951598, - 0.949329, - 0.95518, - 0.962351, - 0.953717, - 0.95369, - 0.955133, - 0.948286, - 0.949753, - 0.950772, - 0.945228, - 0.948491, - 0.954504, - 0.946724 - ], - "train_epoch_time": 4.84199595451355, - "train_loss": 2.321312928986515, - "train_score": 0.3224040530162357, - "val_loss": 2.3717165173947743, - "val_score": 0.3129574491222198 - }, - { - "epoch": 11, - "grad_norm": 0.5891222953796387, - "learning_rate": 1.0, - "model_norm": 88.25027465820312, - "step_logs": { - "grad_norm": { - "594": 0.5133320093154907, - "595": 0.482785701751709, - "596": 0.4656589925289154, - "597": 0.4678007662296295, - "598": 0.5461351275444031, - "599": 0.5860890746116638, - "600": 0.5101441144943237, - "601": 0.44936949014663696, - "602": 0.45749667286872864, - "603": 0.525749921798706, - "604": 0.5417052507400513, - "605": 0.49314481019973755, - "606": 0.5035368800163269, - "607": 0.5096091032028198, - "608": 0.5163124203681946, - "609": 0.548633337020874, - "610": 0.514327883720398, - "611": 0.485579252243042, - "612": 0.5117934942245483, - "613": 0.5537772178649902, - "614": 0.5970088243484497, - "615": 0.5690841674804688, - "616": 0.5433055758476257, - "617": 0.5305554866790771, - "618": 0.5305752158164978, - "619": 0.5523653030395508, - "620": 0.5469662547111511, - "621": 0.510524332523346, - "622": 0.489940345287323, - "623": 0.5190476775169373, - "624": 0.5540767312049866, - "625": 0.5712870955467224, - "626": 0.591969907283783, - "627": 0.5713293552398682, - "628": 0.4952087700366974, - "629": 0.5064335465431213, - "630": 0.5567072033882141, - "631": 0.5270132422447205, - "632": 0.44978469610214233, - "633": 0.41745802760124207, - "634": 0.42759495973587036, - "635": 0.46977895498275757, - "636": 0.46791911125183105, - "637": 0.4392073154449463, - "638": 0.4680866003036499, - "639": 0.5004833340644836, - "640": 0.5214317440986633, - "641": 0.5833013653755188, - "642": 0.5800824761390686, - "643": 0.5446174144744873, - "644": 0.5960995554924011, - "645": 0.6510222554206848, - "646": 0.6233162879943848, - "647": 0.5891222953796387 - }, - "loss": { - "594": 2.3173465728759766, - "595": 2.341799020767212, - "596": 2.318256378173828, - "597": 2.2926080226898193, - "598": 2.3009142875671387, - "599": 2.324228286743164, - "600": 2.3181920051574707, - "601": 2.3202199935913086, - "602": 2.307252883911133, - "603": 2.328923463821411, - "604": 2.3133187294006348, - "605": 2.32059383392334, - "606": 2.3075478076934814, - "607": 2.315316677093506, - "608": 2.295219898223877, - "609": 2.3372952938079834, - "610": 2.3392343521118164, - "611": 2.2881274223327637, - "612": 2.3340096473693848, - "613": 2.305269241333008, - "614": 2.354292392730713, - "615": 2.3354878425598145, - "616": 2.3175840377807617, - "617": 2.3408193588256836, - "618": 2.3563551902770996, - "619": 2.3441519737243652, - "620": 2.3236570358276367, - "621": 2.3328073024749756, - "622": 2.2992446422576904, - "623": 2.310436248779297, - "624": 2.3252806663513184, - "625": 2.3196282386779785, - "626": 2.322413921356201, - "627": 2.3667993545532227, - "628": 2.3076698780059814, - "629": 2.298457622528076, - "630": 2.2946012020111084, - "631": 2.3172450065612793, - "632": 2.2990386486053467, - "633": 2.2998156547546387, - "634": 2.2614967823028564, - "635": 2.2691798210144043, - "636": 2.29813814163208, - "637": 2.3086698055267334, - "638": 2.301081418991089, - "639": 2.277754306793213, - "640": 2.286409854888916, - "641": 2.321709156036377, - "642": 2.324002504348755, - "643": 2.3309974670410156, - "644": 2.3036136627197266, - "645": 2.341291904449463, - "646": 2.337948799133301, - "647": 2.319767475128174 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 0.946203, - 0.952594, - 0.955322, - 0.954447, - 0.939131, - 0.931189, - 0.946852, - 0.958299, - 0.95661, - 0.943981, - 0.940358, - 0.95021, - 0.947922, - 0.946895, - 0.945115, - 0.939505, - 0.946483, - 0.951001, - 0.946869, - 0.937633, - 0.929631, - 0.935162, - 0.94013, - 0.943284, - 0.943633, - 0.938898, - 0.939518, - 0.947093, - 0.95039, - 0.944909, - 0.938074, - 0.934274, - 0.929848, - 0.935491, - 0.949547, - 0.947156, - 0.936739, - 0.943459, - 0.957856, - 0.963495, - 0.961147, - 0.953627, - 0.95453, - 0.959897, - 0.954554, - 0.947881, - 0.943879, - 0.931729, - 0.932492, - 0.940183, - 0.928397, - 0.917, - 0.923284, - 0.9304 - ], - "train_epoch_time": 4.841830015182495, - "train_loss": 2.3295666375837505, - "train_score": 0.32586419487444196, - "val_loss": 2.3782753353140795, - "val_score": 0.31583668183787683 - }, - { - "epoch": 12, - "grad_norm": 0.3471137583255768, - "learning_rate": 1.0, - "model_norm": 88.31282043457031, - "step_logs": { - "grad_norm": { - "648": 1.1777749061584473, - "649": 0.5775364637374878, - "650": 0.6844466924667358, - "651": 0.596462070941925, - "652": 0.8689321875572205, - "653": 0.6130152940750122, - "654": 0.4199494421482086, - "655": 0.3861088156700134, - "656": 0.48311862349510193, - "657": 0.48955103754997253, - "658": 0.43584492802619934, - "659": 0.42765292525291443, - "660": 0.45243245363235474, - "661": 0.4535365700721741, - "662": 0.45955464243888855, - "663": 0.45431095361709595, - "664": 0.4133497476577759, - "665": 0.39969709515571594, - "666": 0.41830238699913025, - "667": 0.43993914127349854, - "668": 0.4705626666545868, - "669": 0.4866509735584259, - "670": 0.46342143416404724, - "671": 0.41562455892562866, - "672": 0.39392420649528503, - "673": 0.3934900760650635, - "674": 0.46160849928855896, - "675": 0.4165823757648468, - "676": 0.4142131805419922, - "677": 0.4793565273284912, - "678": 0.5682106614112854, - "679": 0.5044792294502258, - "680": 0.3850363492965698, - "681": 0.370151162147522, - "682": 0.3876659870147705, - "683": 0.44797414541244507, - "684": 0.4645005762577057, - "685": 0.5066595077514648, - "686": 0.4528997540473938, - "687": 0.3925599753856659, - "688": 0.3621465861797333, - "689": 0.34414830803871155, - "690": 0.3602757453918457, - "691": 0.35121628642082214, - "692": 0.3378406763076782, - "693": 0.3606458008289337, - "694": 0.4013032019138336, - "695": 0.43680357933044434, - "696": 0.4419235289096832, - "697": 0.43803393840789795, - "698": 0.45218780636787415, - "699": 0.44148799777030945, - "700": 0.3915144205093384, - "701": 0.3471137583255768 - }, - "loss": { - "648": 2.3302783966064453, - "649": 2.363994598388672, - "650": 2.3300065994262695, - "651": 2.3496322631835938, - "652": 2.373607635498047, - "653": 2.408134937286377, - "654": 2.3167662620544434, - "655": 2.3106939792633057, - "656": 2.300048828125, - "657": 2.3136980533599854, - "658": 2.292933702468872, - "659": 2.297440528869629, - "660": 2.28442645072937, - "661": 2.2821733951568604, - "662": 2.304558753967285, - "663": 2.3276240825653076, - "664": 2.2810564041137695, - "665": 2.2977817058563232, - "666": 2.253488540649414, - "667": 2.261998176574707, - "668": 2.276346206665039, - "669": 2.289158582687378, - "670": 2.2793164253234863, - "671": 2.2820611000061035, - "672": 2.2701451778411865, - "673": 2.2513554096221924, - "674": 2.274477005004883, - "675": 2.2709403038024902, - "676": 2.240373134613037, - "677": 2.2823562622070312, - "678": 2.264913558959961, - "679": 2.2986536026000977, - "680": 2.2662601470947266, - "681": 2.2509055137634277, - "682": 2.249601364135742, - "683": 2.259070873260498, - "684": 2.272066116333008, - "685": 2.2733898162841797, - "686": 2.291858673095703, - "687": 2.296607255935669, - "688": 2.258047580718994, - "689": 2.2477993965148926, - "690": 2.252373218536377, - "691": 2.2413618564605713, - "692": 2.212996482849121, - "693": 2.2121965885162354, - "694": 2.2410457134246826, - "695": 2.268937587738037, - "696": 2.241586446762085, - "697": 2.27162504196167, - "698": 2.2394938468933105, - "699": 2.26474928855896, - "700": 2.2560529708862305, - "701": 2.226684093475342 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 0.770632, - 0.928713, - 0.898449, - 0.913597, - 0.844333, - 0.901005, - 0.928917, - 0.928143, - 0.906876, - 0.900402, - 0.903169, - 0.898755, - 0.889045, - 0.883147, - 0.876874, - 0.872313, - 0.871809, - 0.868052, - 0.859237, - 0.850594, - 0.840702, - 0.832872, - 0.83039, - 0.831037, - 0.827753, - 0.821781, - 0.807742, - 0.807618, - 0.801766, - 0.788405, - 0.770091, - 0.773994, - 0.781945, - 0.777455, - 0.769807, - 0.757572, - 0.750078, - 0.739394, - 0.740082, - 0.740399, - 0.736969, - 0.7325, - 0.725261, - 0.720014, - 0.714965, - 0.707207, - 0.698089, - 0.689303, - 0.682773, - 0.677571, - 0.670205, - 0.665561, - 0.663703, - 0.66081 - ], - "train_epoch_time": 4.842498064041138, - "train_loss": 2.2332572330190255, - "train_score": 0.3521801021639552, - "val_loss": 2.3010796326309886, - "val_score": 0.33304481268202807 - }, - { - "epoch": 13, - "grad_norm": 0.19276770949363708, - "learning_rate": 0.6666666666666667, - "model_norm": 88.34199523925781, - "step_logs": { - "grad_norm": { - "702": 0.3367370367050171, - "703": 0.3300202190876007, - "704": 0.35249218344688416, - "705": 0.37002718448638916, - "706": 0.39258378744125366, - "707": 0.4061930775642395, - "708": 0.3715112507343292, - "709": 0.3333435654640198, - "710": 0.30995285511016846, - "711": 0.3120267689228058, - "712": 0.32090482115745544, - "713": 0.34134599566459656, - "714": 0.34763240814208984, - "715": 0.34040433168411255, - "716": 0.34016522765159607, - "717": 0.32474440336227417, - "718": 0.2861678898334503, - "719": 0.2720697820186615, - "720": 0.26680541038513184, - "721": 0.2540389597415924, - "722": 0.24680061638355255, - "723": 0.27230924367904663, - "724": 0.27121758460998535, - "725": 0.3167182505130768, - "726": 0.304227352142334, - "727": 0.27463096380233765, - "728": 0.2517527937889099, - "729": 0.2406729757785797, - "730": 0.2385374903678894, - "731": 0.2672624886035919, - "732": 0.2643625736236572, - "733": 0.2615050971508026, - "734": 0.2672218382358551, - "735": 0.2772354185581207, - "736": 0.28493732213974, - "737": 0.30446380376815796, - "738": 0.2752019762992859, - "739": 0.2577574849128723, - "740": 0.22526749968528748, - "741": 0.21702386438846588, - "742": 0.1880822777748108, - "743": 0.20799009501934052, - "744": 0.22810305655002594, - "745": 0.2072276622056961, - "746": 0.20557722449302673, - "747": 0.2135082185268402, - "748": 0.24156875908374786, - "749": 0.2122519463300705, - "750": 0.21947483718395233, - "751": 0.2319253534078598, - "752": 0.2278103083372116, - "753": 0.21703630685806274, - "754": 0.22236396372318268, - "755": 0.19276770949363708 - }, - "loss": { - "702": 2.2213964462280273, - "703": 2.2200703620910645, - "704": 2.2125444412231445, - "705": 2.215944290161133, - "706": 2.2394566535949707, - "707": 2.2375903129577637, - "708": 2.247518539428711, - "709": 2.228762149810791, - "710": 2.2198896408081055, - "711": 2.2327170372009277, - "712": 2.2222037315368652, - "713": 2.2404026985168457, - "714": 2.215725898742676, - "715": 2.2390024662017822, - "716": 2.2179675102233887, - "717": 2.248314142227173, - "718": 2.21696400642395, - "719": 2.2123448848724365, - "720": 2.203108787536621, - "721": 2.219628095626831, - "722": 2.2158589363098145, - "723": 2.2054171562194824, - "724": 2.203853130340576, - "725": 2.2004506587982178, - "726": 2.2177019119262695, - "727": 2.1910629272460938, - "728": 2.2068214416503906, - "729": 2.21448016166687, - "730": 2.216590642929077, - "731": 2.1941914558410645, - "732": 2.2182273864746094, - "733": 2.2038538455963135, - "734": 2.1972885131835938, - "735": 2.1980886459350586, - "736": 2.2136335372924805, - "737": 2.1935997009277344, - "738": 2.193983554840088, - "739": 2.2163987159729004, - "740": 2.2128028869628906, - "741": 2.184307813644409, - "742": 2.2243218421936035, - "743": 2.2232890129089355, - "744": 2.20253849029541, - "745": 2.2184159755706787, - "746": 2.1896615028381348, - "747": 2.211479663848877, - "748": 2.196040630340576, - "749": 2.158949613571167, - "750": 2.183164119720459, - "751": 2.1787428855895996, - "752": 2.1821770668029785, - "753": 2.183241367340088, - "754": 2.17989444732666, - "755": 2.1832149028778076 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 0.655513, - 0.649964, - 0.642516, - 0.635424, - 0.6281, - 0.62124, - 0.617688, - 0.613916, - 0.609147, - 0.603076, - 0.596576, - 0.589586, - 0.583168, - 0.577654, - 0.571595, - 0.566448, - 0.562007, - 0.556499, - 0.550614, - 0.54503, - 0.539184, - 0.532232, - 0.526202, - 0.518491, - 0.512968, - 0.507867, - 0.50252, - 0.496752, - 0.490717, - 0.483814, - 0.477857, - 0.471829, - 0.465587, - 0.459246, - 0.452995, - 0.446367, - 0.441061, - 0.435411, - 0.429968, - 0.423979, - 0.418357, - 0.411923, - 0.405456, - 0.399682, - 0.393561, - 0.387336, - 0.38078, - 0.37507, - 0.368863, - 0.362568, - 0.356507, - 0.350521, - 0.344329, - 0.338528 - ], - "train_epoch_time": 4.843467473983765, - "train_loss": 2.187423188990808, - "train_score": 0.3627488342641587, - "val_loss": 2.261908656557173, - "val_score": 0.34299207058757647 - }, - { - "epoch": 14, - "grad_norm": 0.1918720155954361, - "learning_rate": 0.33333333333333337, - "model_norm": 88.35152435302734, - "step_logs": { - "grad_norm": { - "756": 0.2120075225830078, - "757": 0.18507209420204163, - "758": 0.18970555067062378, - "759": 0.2302817851305008, - "760": 0.2818502187728882, - "761": 0.22272615134716034, - "762": 0.19985193014144897, - "763": 0.18835432827472687, - "764": 0.22415927052497864, - "765": 0.21204973757266998, - "766": 0.21221452951431274, - "767": 0.19515444338321686, - "768": 0.20536620914936066, - "769": 0.18919330835342407, - "770": 0.22594144940376282, - "771": 0.21108511090278625, - "772": 0.18369100987911224, - "773": 0.22985200583934784, - "774": 0.2018551528453827, - "775": 0.20708796381950378, - "776": 0.204896941781044, - "777": 0.20150117576122284, - "778": 0.20800389349460602, - "779": 0.19345587491989136, - "780": 0.1838405430316925, - "781": 0.18990516662597656, - "782": 0.19190816581249237, - "783": 0.20387667417526245, - "784": 0.1956193745136261, - "785": 0.1914726197719574, - "786": 0.1904798001050949, - "787": 0.20598207414150238, - "788": 0.2333458513021469, - "789": 0.20045232772827148, - "790": 0.1941618174314499, - "791": 0.18953397870063782, - "792": 0.18890880048274994, - "793": 0.1807178109884262, - "794": 0.18581968545913696, - "795": 0.18459971249103546, - "796": 0.19288456439971924, - "797": 0.17857621610164642, - "798": 0.1772775799036026, - "799": 0.18140459060668945, - "800": 0.15560737252235413, - "801": 0.186567023396492, - "802": 0.20617219805717468, - "803": 0.18182601034641266, - "804": 0.1799536645412445, - "805": 0.21246781945228577, - "806": 0.17745335400104523, - "807": 0.18768306076526642, - "808": 0.19940459728240967, - "809": 0.1918720155954361 - }, - "loss": { - "756": 2.1920931339263916, - "757": 2.1925957202911377, - "758": 2.1922717094421387, - "759": 2.1849136352539062, - "760": 2.167494773864746, - "761": 2.1916823387145996, - "762": 2.196556568145752, - "763": 2.166910171508789, - "764": 2.1777358055114746, - "765": 2.18515682220459, - "766": 2.2083334922790527, - "767": 2.1872642040252686, - "768": 2.2003586292266846, - "769": 2.1752116680145264, - "770": 2.1866278648376465, - "771": 2.1671388149261475, - "772": 2.189948081970215, - "773": 2.1878371238708496, - "774": 2.202288866043091, - "775": 2.184051275253296, - "776": 2.169708251953125, - "777": 2.1713132858276367, - "778": 2.1911211013793945, - "779": 2.1960315704345703, - "780": 2.193258047103882, - "781": 2.172991991043091, - "782": 2.1540231704711914, - "783": 2.1922898292541504, - "784": 2.1756553649902344, - "785": 2.1882693767547607, - "786": 2.159757614135742, - "787": 2.1898012161254883, - "788": 2.179352283477783, - "789": 2.194922924041748, - "790": 2.160473346710205, - "791": 2.19187068939209, - "792": 2.175341844558716, - "793": 2.1823716163635254, - "794": 2.200077533721924, - "795": 2.177055835723877, - "796": 2.1717052459716797, - "797": 2.1864142417907715, - "798": 2.1723575592041016, - "799": 2.1695990562438965, - "800": 2.1579627990722656, - "801": 2.1700949668884277, - "802": 2.1710829734802246, - "803": 2.1577982902526855, - "804": 2.158672332763672, - "805": 2.1799466609954834, - "806": 2.1822350025177, - "807": 2.158999443054199, - "808": 2.1973447799682617, - "809": 2.1833131313323975 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 0.332198, - 0.326327, - 0.320144, - 0.313617, - 0.306906, - 0.301437, - 0.2955, - 0.289436, - 0.283023, - 0.276986, - 0.270855, - 0.26482, - 0.258617, - 0.25256, - 0.246204, - 0.240146, - 0.234145, - 0.227767, - 0.221766, - 0.215592, - 0.209451, - 0.203316, - 0.197146, - 0.191047, - 0.184921, - 0.178747, - 0.172585, - 0.166404, - 0.160268, - 0.154122, - 0.147964, - 0.14178, - 0.135572, - 0.129476, - 0.123324, - 0.117171, - 0.11101, - 0.104856, - 0.098689, - 0.0925255, - 0.0863558, - 0.0802, - 0.0740344, - 0.0678663, - 0.061707, - 0.0555308, - 0.0493589, - 0.0431956, - 0.0370268, - 0.0308543, - 0.024687, - 0.0185157, - 0.0123443, - 0.00617252 - ], - "train_epoch_time": 4.843153238296509, - "train_loss": 2.174925650619879, - "train_score": 0.3647933105228621, - "val_loss": 2.250647404984957, - "val_score": 0.3442209032797239 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:37:14.167890", - "final_model_norm": 88.35152435302734, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:35:32.596938", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.9831418991088867, - "learning_rate": 1e-10, - "model_norm": 87.28739166259766, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.520443916320801, - "3": 8.1116943359375, - "4": 8.071544647216797, - "5": 5.051503658294678, - "6": 10.454682350158691, - "7": 3.2941524982452393, - "8": 7.0451741218566895, - "9": 3.3513622283935547, - "10": 7.286918640136719, - "11": 4.471482276916504, - "12": 17.447614669799805, - "13": 5.5175323486328125, - "14": 40.61209487915039, - "15": 6.867544174194336, - "16": 10.986544609069824, - "17": 9.684115409851074, - "18": 12.968350410461426, - "19": 4.472919464111328, - "20": 7.644439220428467, - "21": 5.183309555053711, - "22": 6.06050968170166, - "23": 13.362408638000488, - "24": 11.589578628540039, - "25": 5.267686367034912, - "26": 5.404995918273926, - "27": 14.979524612426758, - "28": 13.429210662841797, - "29": 6.378403663635254, - "30": 12.583794593811035, - "31": 5.356675148010254, - "32": 12.356690406799316, - "33": 10.635289192199707, - "34": 13.440580368041992, - "35": 16.028547286987305, - "36": 8.718912124633789, - "37": 5.2349419593811035, - "38": 16.089889526367188, - "39": 3.958259105682373, - "40": 15.132730484008789, - "41": 7.108010292053223, - "42": 8.280714988708496, - "43": 15.54705810546875, - "44": 7.04470157623291, - "45": 7.8422393798828125, - "46": 15.904803276062012, - "47": 3.8307065963745117, - "48": 8.149847030639648, - "49": 9.206463813781738, - "50": 4.509592533111572, - "51": 13.518691062927246, - "52": 3.18174409866333, - "53": 1.9831418991088867 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.7987375259399414, - "3": 4.1109819412231445, - "4": 3.9307804107666016, - "5": 4.457658290863037, - "6": 3.9617631435394287, - "7": 3.9944658279418945, - "8": 4.307804107666016, - "9": 3.8398241996765137, - "10": 4.925821304321289, - "11": 4.240236282348633, - "12": 5.873170852661133, - "13": 6.514164924621582, - "14": 5.1177263259887695, - "15": 6.173100471496582, - "16": 3.955044984817505, - "17": 4.3252363204956055, - "18": 4.531990051269531, - "19": 4.689167022705078, - "20": 4.176158428192139, - "21": 5.519330024719238, - "22": 4.509194374084473, - "23": 5.555792331695557, - "24": 4.245941162109375, - "25": 3.864851951599121, - "26": 4.760091781616211, - "27": 6.997500419616699, - "28": 6.962763786315918, - "29": 6.041469097137451, - "30": 6.256098747253418, - "31": 6.706310272216797, - "32": 6.257039546966553, - "33": 6.636319160461426, - "34": 6.713165760040283, - "35": 7.423327922821045, - "36": 5.201717376708984, - "37": 4.588018417358398, - "38": 5.380327224731445, - "39": 4.172258377075195, - "40": 7.587206840515137, - "41": 7.414318084716797, - "42": 5.928915023803711, - "43": 8.181875228881836, - "44": 7.675028324127197, - "45": 6.547153949737549, - "46": 8.30854320526123, - "47": 6.58784294128418, - "48": 5.120253562927246, - "49": 5.55564546585083, - "50": 5.101111888885498, - "51": 7.125539779663086, - "52": 5.335868835449219, - "53": 4.103789806365967 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 1e-10, - 0.0093311, - 0.0326839, - 0.0405358, - 0.0481067, - 0.077747, - 0.0451921, - 0.117631, - 0.083257, - 0.142489, - 0.0962475, - 0.144862, - 0.0332416, - 0.161738, - 0.00607122, - 0.139794, - 0.0543936, - 0.0725561, - 0.0468772, - 0.209868, - 0.105301, - 0.207692, - 0.157592, - 0.0548153, - 0.0558641, - 0.178896, - 0.200332, - 0.0559123, - 0.0678596, - 0.196417, - 0.0698204, - 0.266508, - 0.0726545, - 0.0996299, - 0.0669997, - 0.0533815, - 0.114995, - 0.230527, - 0.0394101, - 0.316489, - 0.0611952, - 0.216137, - 0.143407, - 0.0627592, - 0.228863, - 0.17218, - 0.061312, - 0.459227, - 0.132843, - 0.115626, - 0.334076, - 0.0723383, - 0.513182, - 0.676053 - ], - "train_epoch_time": 4.845436096191406, - "train_loss": 4.499074915526074, - "train_score": 0.08541853481762036, - "val_loss": 4.512567051516335, - "val_score": 0.08252009189977438 - }, - { - "epoch": 1, - "grad_norm": 3.0589547157287598, - "learning_rate": 1.0, - "model_norm": 87.3234634399414, - "step_logs": { - "grad_norm": { - "54": 2.8141071796417236, - "55": 1.710223913192749, - "56": 3.732734203338623, - "57": 11.22106647491455, - "58": 2.0565707683563232, - "59": 3.3401100635528564, - "60": 7.947438716888428, - "61": 2.3921890258789062, - "62": 7.051760673522949, - "63": 3.568357467651367, - "64": 5.318252086639404, - "65": 9.018369674682617, - "66": 1.1958746910095215, - "67": 12.429364204406738, - "68": 0.9566407203674316, - "69": 7.411942958831787, - "70": 1.5008424520492554, - "71": 16.9825439453125, - "72": 3.371229648590088, - "73": 0.5242273211479187, - "74": 4.539724349975586, - "75": 5.06485652923584, - "76": 3.1363143920898438, - "77": 14.291738510131836, - "78": 2.0122501850128174, - "79": 6.145211696624756, - "80": 10.80793285369873, - "81": 1.067740559577942, - "82": 1.3276876211166382, - "83": 11.108071327209473, - "84": 3.60037899017334, - "85": 6.396505355834961, - "86": 2.36037278175354, - "87": 9.8271484375, - "88": 2.612766742706299, - "89": 1.112652063369751, - "90": 3.7345943450927734, - "91": 2.317619562149048, - "92": 2.6894021034240723, - "93": 7.344915390014648, - "94": 0.9760797619819641, - "95": 11.726158142089844, - "96": 1.5686811208724976, - "97": 9.840619087219238, - "98": 1.9007809162139893, - "99": 1.3968459367752075, - "100": 1.2916523218154907, - "101": 1.4621291160583496, - "102": 3.6534433364868164, - "103": 1.9408931732177734, - "104": 1.395183801651001, - "105": 4.99188232421875, - "106": 1.0339820384979248, - "107": 3.0589547157287598 - }, - "loss": { - "54": 4.496552467346191, - "55": 4.283427715301514, - "56": 4.070971488952637, - "57": 6.417243957519531, - "58": 4.241355895996094, - "59": 4.122255802154541, - "60": 5.691153049468994, - "61": 4.3569231033325195, - "62": 4.7263503074646, - "63": 4.9553303718566895, - "64": 4.079760551452637, - "65": 6.059639930725098, - "66": 3.8306164741516113, - "67": 6.762326717376709, - "68": 3.535759925842285, - "69": 4.7031965255737305, - "70": 3.6029560565948486, - "71": 9.066278457641602, - "72": 4.146097183227539, - "73": 3.396852493286133, - "74": 4.054537296295166, - "75": 4.677854537963867, - "76": 3.845867872238159, - "77": 6.700932502746582, - "78": 3.9027583599090576, - "79": 5.309556007385254, - "80": 5.890167236328125, - "81": 3.59029221534729, - "82": 3.580516815185547, - "83": 7.183401584625244, - "84": 4.102786064147949, - "85": 4.321220397949219, - "86": 4.31319522857666, - "87": 4.902420997619629, - "88": 4.417163372039795, - "89": 3.501753091812134, - "90": 4.025180816650391, - "91": 4.4595208168029785, - "92": 3.8459949493408203, - "93": 4.913617134094238, - "94": 3.500107765197754, - "95": 6.503312587738037, - "96": 3.8549532890319824, - "97": 4.363958358764648, - "98": 3.8823580741882324, - "99": 3.9704980850219727, - "100": 3.6405396461486816, - "101": 3.5652427673339844, - "102": 3.963061809539795, - "103": 4.422133445739746, - "104": 3.6809685230255127, - "105": 3.531747341156006, - "106": 3.408315658569336, - "107": 4.0542521476745605 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.531749, - 0.745481, - 0.368827, - 0.0925029, - 0.667289, - 0.424956, - 0.152692, - 0.603602, - 0.159728, - 0.437676, - 0.223896, - 0.129687, - 0.842695, - 0.0804973, - 0.885414, - 0.146191, - 0.76185, - 0.0591525, - 0.421836, - 0.961121, - 0.282367, - 0.267241, - 0.438821, - 0.0615737, - 0.658434, - 0.219481, - 0.0916103, - 0.862983, - 0.802466, - 0.104292, - 0.387635, - 0.174391, - 0.607589, - 0.0921701, - 0.564102, - 0.849785, - 0.365966, - 0.624128, - 0.515381, - 0.154092, - 0.880204, - 0.0864173, - 0.758053, - 0.0826776, - 0.682451, - 0.802755, - 0.813579, - 0.769341, - 0.372577, - 0.701295, - 0.790885, - 0.220856, - 0.864424, - 0.464253 - ], - "train_epoch_time": 4.841185808181763, - "train_loss": 3.364718777548463, - "train_score": 0.08575031382794701, - "val_loss": 3.3792267357577686, - "val_score": 0.08293269226492204 - }, - { - "epoch": 2, - "grad_norm": 0.6052566766738892, - "learning_rate": 1.0, - "model_norm": 87.44818878173828, - "step_logs": { - "grad_norm": { - "108": 0.7990756630897522, - "109": 1.3617959022521973, - "110": 1.1489999294281006, - "111": 3.39947247505188, - "112": 1.0340996980667114, - "113": 0.617216944694519, - "114": 3.1970701217651367, - "115": 1.1757283210754395, - "116": 3.6301863193511963, - "117": 1.1995385885238647, - "118": 2.378713369369507, - "119": 1.1363970041275024, - "120": 2.594503164291382, - "121": 0.8914473056793213, - "122": 0.9866601228713989, - "123": 2.352792263031006, - "124": 0.8244664669036865, - "125": 1.143155574798584, - "126": 0.9805446863174438, - "127": 1.0797122716903687, - "128": 2.487576484680176, - "129": 1.175310492515564, - "130": 2.6633682250976562, - "131": 1.119796872138977, - "132": 1.503003716468811, - "133": 1.1181613206863403, - "134": 0.870795488357544, - "135": 0.6408287882804871, - "136": 0.4828634262084961, - "137": 1.3867980241775513, - "138": 1.8460670709609985, - "139": 0.9291976690292358, - "140": 1.1664748191833496, - "141": 1.8698469400405884, - "142": 0.9238661527633667, - "143": 2.2968530654907227, - "144": 1.0769011974334717, - "145": 0.884735643863678, - "146": 1.1556028127670288, - "147": 2.6860485076904297, - "148": 1.066886305809021, - "149": 0.6632800698280334, - "150": 1.0318211317062378, - "151": 2.3003132343292236, - "152": 1.0402331352233887, - "153": 0.9450657367706299, - "154": 0.9584382176399231, - "155": 0.5830466747283936, - "156": 0.6201723217964172, - "157": 0.9507369995117188, - "158": 0.9885178208351135, - "159": 0.5497141480445862, - "160": 0.4122600853443146, - "161": 0.6052566766738892 - }, - "loss": { - "108": 3.3414113521575928, - "109": 3.5784389972686768, - "110": 3.518871307373047, - "111": 3.5433592796325684, - "112": 3.6816158294677734, - "113": 3.349371910095215, - "114": 3.5908660888671875, - "115": 3.424107074737549, - "116": 3.5753965377807617, - "117": 3.6244006156921387, - "118": 3.7597837448120117, - "119": 3.2557806968688965, - "120": 3.5738797187805176, - "121": 3.4056382179260254, - "122": 3.288278579711914, - "123": 3.3868770599365234, - "124": 3.456759452819824, - "125": 3.249878406524658, - "126": 3.1945924758911133, - "127": 3.185201644897461, - "128": 3.378434181213379, - "129": 3.424539804458618, - "130": 3.3112215995788574, - "131": 3.3461813926696777, - "132": 3.216909646987915, - "133": 3.3869524002075195, - "134": 3.165609359741211, - "135": 3.143808364868164, - "136": 2.9743454456329346, - "137": 3.0846259593963623, - "138": 3.309664011001587, - "139": 3.4011752605438232, - "140": 3.1722803115844727, - "141": 3.2864184379577637, - "142": 3.1983742713928223, - "143": 3.104551315307617, - "144": 3.2795639038085938, - "145": 3.118144989013672, - "146": 3.100399971008301, - "147": 3.398005247116089, - "148": 3.237244129180908, - "149": 2.979027032852173, - "150": 3.002187728881836, - "151": 3.1980438232421875, - "152": 3.3180489540100098, - "153": 3.041016101837158, - "154": 3.1343166828155518, - "155": 2.90212345123291, - "156": 2.891636610031128, - "157": 2.897037982940674, - "158": 3.0314197540283203, - "159": 2.9041519165039062, - "160": 2.7970480918884277, - "161": 2.820652484893799 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 0.912786, - 0.794206, - 0.842042, - 0.380125, - 0.873187, - 0.94619, - 0.412672, - 0.832048, - 0.351753, - 0.834376, - 0.570622, - 0.834499, - 0.514998, - 0.895519, - 0.871061, - 0.550292, - 0.910481, - 0.832602, - 0.8692, - 0.845309, - 0.521971, - 0.832165, - 0.482827, - 0.842198, - 0.740129, - 0.844185, - 0.893041, - 0.938691, - 0.962284, - 0.762346, - 0.660131, - 0.887368, - 0.82341, - 0.652769, - 0.882276, - 0.540644, - 0.849755, - 0.888481, - 0.8228, - 0.485053, - 0.850481, - 0.931238, - 0.849392, - 0.547257, - 0.859801, - 0.871953, - 0.87219, - 0.944672, - 0.937643, - 0.865049, - 0.861198, - 0.950546, - 0.970514, - 0.939022 - ], - "train_epoch_time": 4.844262361526489, - "train_loss": 2.898751284026005, - "train_score": 0.16185998026832105, - "val_loss": 2.906747388511246, - "val_score": 0.1593893511492404 - }, - { - "epoch": 3, - "grad_norm": 0.5803899168968201, - "learning_rate": 1.0, - "model_norm": 87.54922485351562, - "step_logs": { - "grad_norm": { - "162": 0.7100263833999634, - "163": 0.9402167201042175, - "164": 1.0552407503128052, - "165": 0.5816885828971863, - "166": 0.536823034286499, - "167": 0.5009744763374329, - "168": 0.668644905090332, - "169": 0.7584171891212463, - "170": 0.7275749444961548, - "171": 0.6691548228263855, - "172": 0.9205804467201233, - "173": 0.7444524765014648, - "174": 0.6788049340248108, - "175": 0.5134831070899963, - "176": 0.5923011302947998, - "177": 0.9358205795288086, - "178": 1.0203570127487183, - "179": 0.7821270227432251, - "180": 0.5423383712768555, - "181": 0.7544683814048767, - "182": 0.6733540296554565, - "183": 0.745196521282196, - "184": 0.7250047326087952, - "185": 0.8175691962242126, - "186": 0.6404562592506409, - "187": 0.5733717679977417, - "188": 0.8767086863517761, - "189": 1.206737756729126, - "190": 0.707180380821228, - "191": 0.5744861960411072, - "192": 0.5868545770645142, - "193": 0.5949322581291199, - "194": 0.7133055329322815, - "195": 0.6171315312385559, - "196": 0.5165334343910217, - "197": 0.7953967452049255, - "198": 1.1480768918991089, - "199": 0.7780097126960754, - "200": 0.7076628804206848, - "201": 0.5793918371200562, - "202": 0.5321841835975647, - "203": 0.7195229530334473, - "204": 0.8034334182739258, - "205": 0.8028842806816101, - "206": 0.8330018520355225, - "207": 0.7464473247528076, - "208": 0.6742626428604126, - "209": 0.817547619342804, - "210": 0.6382176876068115, - "211": 0.36893272399902344, - "212": 0.5235304236412048, - "213": 0.8875744342803955, - "214": 0.8481656908988953, - "215": 0.5803899168968201 - }, - "loss": { - "162": 2.8880667686462402, - "163": 2.873842239379883, - "164": 3.054826259613037, - "165": 2.973947286605835, - "166": 2.804098606109619, - "167": 2.7878613471984863, - "168": 2.8069705963134766, - "169": 2.899956464767456, - "170": 2.86919903755188, - "171": 2.824172019958496, - "172": 2.8239760398864746, - "173": 2.9311931133270264, - "174": 2.7902462482452393, - "175": 2.810206890106201, - "176": 2.7419300079345703, - "177": 2.8503379821777344, - "178": 2.9393467903137207, - "179": 2.9799861907958984, - "180": 2.7808358669281006, - "181": 2.8034653663635254, - "182": 2.834273338317871, - "183": 2.7865006923675537, - "184": 2.8316876888275146, - "185": 2.8238914012908936, - "186": 2.841508388519287, - "187": 2.7296905517578125, - "188": 2.809426784515381, - "189": 2.9049742221832275, - "190": 2.9583373069763184, - "191": 2.7423365116119385, - "192": 2.7391369342803955, - "193": 2.7513647079467773, - "194": 2.744466781616211, - "195": 2.8149805068969727, - "196": 2.7198293209075928, - "197": 2.738754987716675, - "198": 2.8643856048583984, - "199": 2.9525084495544434, - "200": 2.76503586769104, - "201": 2.739008665084839, - "202": 2.709150791168213, - "203": 2.707935333251953, - "204": 2.807283878326416, - "205": 2.77246356010437, - "206": 2.80674409866333, - "207": 2.7761611938476562, - "208": 2.782247543334961, - "209": 2.7127556800842285, - "210": 2.8310041427612305, - "211": 2.665283441543579, - "212": 2.6493804454803467, - "213": 2.7252862453460693, - "214": 2.8497817516326904, - "215": 2.722008228302002 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 0.919727, - 0.8667, - 0.845839, - 0.946174, - 0.951126, - 0.956927, - 0.926236, - 0.909775, - 0.915542, - 0.926549, - 0.869528, - 0.913629, - 0.923729, - 0.95519, - 0.939873, - 0.866834, - 0.849544, - 0.906916, - 0.949771, - 0.907835, - 0.925938, - 0.909385, - 0.91507, - 0.894174, - 0.932682, - 0.943202, - 0.879668, - 0.799589, - 0.922063, - 0.943241, - 0.940852, - 0.939566, - 0.915167, - 0.936639, - 0.953245, - 0.896458, - 0.812955, - 0.907025, - 0.916963, - 0.942258, - 0.950326, - 0.912749, - 0.896885, - 0.895853, - 0.889987, - 0.908801, - 0.924469, - 0.890319, - 0.932889, - 0.975102, - 0.950818, - 0.873719, - 0.887928, - 0.94173 - ], - "train_epoch_time": 4.842041015625, - "train_loss": 2.699241478439725, - "train_score": 0.24656451761808082, - "val_loss": 2.7276196362093588, - "val_score": 0.2379134975707353 - }, - { - "epoch": 4, - "grad_norm": 0.5657550692558289, - "learning_rate": 1.0, - "model_norm": 87.64610290527344, - "step_logs": { - "grad_norm": { - "216": 0.5650863647460938, - "217": 0.8314638137817383, - "218": 0.7408836483955383, - "219": 0.4571698009967804, - "220": 0.5305669903755188, - "221": 0.7171705365180969, - "222": 0.7380601763725281, - "223": 0.6865860819816589, - "224": 0.6067739129066467, - "225": 0.4946342408657074, - "226": 0.6058553457260132, - "227": 0.9833201169967651, - "228": 0.6633129715919495, - "229": 0.4711220860481262, - "230": 0.5343348383903503, - "231": 0.5930265188217163, - "232": 0.56217360496521, - "233": 0.5278383493423462, - "234": 0.6075369119644165, - "235": 0.7591191530227661, - "236": 0.79591304063797, - "237": 0.680607795715332, - "238": 0.7248187065124512, - "239": 0.828799843788147, - "240": 0.5097420811653137, - "241": 0.4332064688205719, - "242": 0.6931511163711548, - "243": 0.6310138702392578, - "244": 0.4703831076622009, - "245": 0.6393974423408508, - "246": 0.9215507507324219, - "247": 0.6482958793640137, - "248": 0.3930441439151764, - "249": 0.41156721115112305, - "250": 0.5275195240974426, - "251": 0.6965681314468384, - "252": 0.8604452013969421, - "253": 0.7343969941139221, - "254": 0.8054675459861755, - "255": 0.7035790681838989, - "256": 0.46521276235580444, - "257": 0.5560733675956726, - "258": 0.8042013049125671, - "259": 0.6615998148918152, - "260": 0.4059397876262665, - "261": 0.4841102361679077, - "262": 0.7663136124610901, - "263": 0.8003219366073608, - "264": 0.5811874270439148, - "265": 0.5593263506889343, - "266": 0.673582911491394, - "267": 0.9121878743171692, - "268": 0.81010901927948, - "269": 0.5657550692558289 - }, - "loss": { - "216": 2.6997551918029785, - "217": 2.6929354667663574, - "218": 2.8422014713287354, - "219": 2.6970372200012207, - "220": 2.6564443111419678, - "221": 2.725572109222412, - "222": 2.7633790969848633, - "223": 2.714470863342285, - "224": 2.7507214546203613, - "225": 2.6631269454956055, - "226": 2.7074687480926514, - "227": 2.7278785705566406, - "228": 2.8227436542510986, - "229": 2.677269458770752, - "230": 2.6587533950805664, - "231": 2.6636836528778076, - "232": 2.6790342330932617, - "233": 2.677955150604248, - "234": 2.637813091278076, - "235": 2.720576286315918, - "236": 2.733254909515381, - "237": 2.7667438983917236, - "238": 2.6778652667999268, - "239": 2.7923903465270996, - "240": 2.699666976928711, - "241": 2.63845157623291, - "242": 2.6659626960754395, - "243": 2.752375602722168, - "244": 2.64259672164917, - "245": 2.659594774246216, - "246": 2.7044730186462402, - "247": 2.778343677520752, - "248": 2.6148858070373535, - "249": 2.5854883193969727, - "250": 2.6170525550842285, - "251": 2.6342358589172363, - "252": 2.7025701999664307, - "253": 2.7219066619873047, - "254": 2.6559152603149414, - "255": 2.7618870735168457, - "256": 2.6095192432403564, - "257": 2.615939140319824, - "258": 2.6630914211273193, - "259": 2.7261133193969727, - "260": 2.5777173042297363, - "261": 2.5941927433013916, - "262": 2.618781566619873, - "263": 2.7158823013305664, - "264": 2.6539714336395264, - "265": 2.579207420349121, - "266": 2.622006416320801, - "267": 2.686002016067505, - "268": 2.6722254753112793, - "269": 2.6395888328552246 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 0.944163, - 0.886242, - 0.91194, - 0.962698, - 0.949682, - 0.913782, - 0.91028, - 0.920106, - 0.937275, - 0.956082, - 0.936517, - 0.849452, - 0.927699, - 0.960198, - 0.949043, - 0.938074, - 0.944301, - 0.950552, - 0.934611, - 0.904234, - 0.896151, - 0.922753, - 0.910669, - 0.890475, - 0.954086, - 0.965657, - 0.917339, - 0.932546, - 0.959818, - 0.928626, - 0.864297, - 0.929682, - 0.971308, - 0.968282, - 0.949518, - 0.91567, - 0.879527, - 0.909857, - 0.891156, - 0.917754, - 0.960183, - 0.944196, - 0.891721, - 0.925684, - 0.969026, - 0.956782, - 0.899183, - 0.894518, - 0.940171, - 0.94282, - 0.920369, - 0.865881, - 0.890634, - 0.942835 - ], - "train_epoch_time": 4.8414764404296875, - "train_loss": 2.5999833176775677, - "train_score": 0.2584850249964332, - "val_loss": 2.6212340383934785, - "val_score": 0.2511660449814167 - }, - { - "epoch": 5, - "grad_norm": 0.7623428702354431, - "learning_rate": 1.0, - "model_norm": 87.73600769042969, - "step_logs": { - "grad_norm": { - "270": 0.5762381553649902, - "271": 0.7667810916900635, - "272": 0.725177526473999, - "273": 0.6380300521850586, - "274": 0.6508094072341919, - "275": 0.6889649033546448, - "276": 0.8186596632003784, - "277": 0.809810221195221, - "278": 0.4971579313278198, - "279": 0.4767281711101532, - "280": 0.7546098232269287, - "281": 0.7490273118019104, - "282": 0.5387448072433472, - "283": 0.6713287234306335, - "284": 0.8918309807777405, - "285": 0.7312997579574585, - "286": 0.5076626539230347, - "287": 0.5812913775444031, - "288": 0.8608074188232422, - "289": 0.8124175667762756, - "290": 0.6171245574951172, - "291": 0.7660766243934631, - "292": 0.8741928339004517, - "293": 0.7114712595939636, - "294": 0.6230983138084412, - "295": 0.6871888637542725, - "296": 0.8769329190254211, - "297": 0.9192678928375244, - "298": 0.6888883113861084, - "299": 0.5806081891059875, - "300": 0.5481948852539062, - "301": 0.6057829856872559, - "302": 0.8242425322532654, - "303": 0.7228921055793762, - "304": 0.6174896359443665, - "305": 0.9535212516784668, - "306": 0.8166515231132507, - "307": 0.6627100110054016, - "308": 0.760825514793396, - "309": 0.6396526098251343, - "310": 0.4781973958015442, - "311": 0.602472722530365, - "312": 0.7694366574287415, - "313": 0.8572793006896973, - "314": 0.6807762980461121, - "315": 0.7034918069839478, - "316": 0.7197319269180298, - "317": 0.6302210092544556, - "318": 0.7197474241256714, - "319": 0.6211883425712585, - "320": 0.5064917206764221, - "321": 0.5672907829284668, - "322": 0.6895321011543274, - "323": 0.7623428702354431 - }, - "loss": { - "270": 2.5940680503845215, - "271": 2.6578333377838135, - "272": 2.671351909637451, - "273": 2.6633901596069336, - "274": 2.593959331512451, - "275": 2.6608104705810547, - "276": 2.6063003540039062, - "277": 2.735334873199463, - "278": 2.594287395477295, - "279": 2.56589412689209, - "280": 2.573725700378418, - "281": 2.7140870094299316, - "282": 2.5862767696380615, - "283": 2.6108508110046387, - "284": 2.621504068374634, - "285": 2.686147689819336, - "286": 2.5590689182281494, - "287": 2.5385873317718506, - "288": 2.5845212936401367, - "289": 2.6836318969726562, - "290": 2.601633071899414, - "291": 2.5854430198669434, - "292": 2.6249122619628906, - "293": 2.628178596496582, - "294": 2.538827896118164, - "295": 2.6066033840179443, - "296": 2.6027989387512207, - "297": 2.687753677368164, - "298": 2.6585183143615723, - "299": 2.5458569526672363, - "300": 2.54611873626709, - "301": 2.544062376022339, - "302": 2.5724222660064697, - "303": 2.649348258972168, - "304": 2.5425119400024414, - "305": 2.5965559482574463, - "306": 2.6575493812561035, - "307": 2.6183481216430664, - "308": 2.553880214691162, - "309": 2.6155781745910645, - "310": 2.5026512145996094, - "311": 2.5396928787231445, - "312": 2.547732353210449, - "313": 2.6292998790740967, - "314": 2.5586540699005127, - "315": 2.553309202194214, - "316": 2.6014316082000732, - "317": 2.5686278343200684, - "318": 2.547663927078247, - "319": 2.594808578491211, - "320": 2.513535261154175, - "321": 2.5114336013793945, - "322": 2.554598569869995, - "323": 2.567309856414795 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 0.939848, - 0.900408, - 0.91039, - 0.929004, - 0.92452, - 0.918108, - 0.886074, - 0.892957, - 0.954529, - 0.957592, - 0.900394, - 0.906325, - 0.946869, - 0.920548, - 0.868282, - 0.909465, - 0.95206, - 0.9376, - 0.874622, - 0.890494, - 0.931799, - 0.898073, - 0.872928, - 0.912158, - 0.928968, - 0.916941, - 0.871287, - 0.864152, - 0.918059, - 0.937904, - 0.944274, - 0.932728, - 0.883353, - 0.91023, - 0.930247, - 0.851007, - 0.888513, - 0.922623, - 0.898207, - 0.927459, - 0.95631, - 0.933306, - 0.895906, - 0.87738, - 0.916955, - 0.911649, - 0.909452, - 0.928235, - 0.907714, - 0.930791, - 0.951447, - 0.939787, - 0.914864, - 0.898323 - ], - "train_epoch_time": 4.842908143997192, - "train_loss": 2.5616736331321244, - "train_score": 0.28084312235265757, - "val_loss": 2.591733685590917, - "val_score": 0.2663201417016928 - }, - { - "epoch": 6, - "grad_norm": 0.544720470905304, - "learning_rate": 1.0, - "model_norm": 87.81867980957031, - "step_logs": { - "grad_norm": { - "324": 0.6881338357925415, - "325": 0.7093273997306824, - "326": 0.821570098400116, - "327": 0.7350651621818542, - "328": 0.6052749752998352, - "329": 0.6674264669418335, - "330": 0.7156462669372559, - "331": 0.6024705767631531, - "332": 0.5499410629272461, - "333": 0.6008399724960327, - "334": 0.6784842014312744, - "335": 0.6539338231086731, - "336": 0.6002705693244934, - "337": 0.7022740244865417, - "338": 0.7502356171607971, - "339": 0.7172045707702637, - "340": 0.5320237278938293, - "341": 0.5824774503707886, - "342": 0.6027033925056458, - "343": 0.6100599765777588, - "344": 0.7220253348350525, - "345": 0.711876630783081, - "346": 0.6622322201728821, - "347": 0.7625877261161804, - "348": 0.7873712182044983, - "349": 0.8393401503562927, - "350": 0.679058849811554, - "351": 0.6693992614746094, - "352": 0.6074686646461487, - "353": 0.542557954788208, - "354": 0.627860426902771, - "355": 0.654497504234314, - "356": 0.6317742466926575, - "357": 0.6605423092842102, - "358": 0.5392128825187683, - "359": 0.519406259059906, - "360": 0.8243221640586853, - "361": 0.6306591629981995, - "362": 0.5197187662124634, - "363": 0.5710395574569702, - "364": 0.6349229216575623, - "365": 0.6464499831199646, - "366": 0.6011934280395508, - "367": 0.5506454110145569, - "368": 0.544942319393158, - "369": 0.5747122168540955, - "370": 0.6067695617675781, - "371": 0.6816163063049316, - "372": 0.6515807509422302, - "373": 0.5940066576004028, - "374": 0.5205538868904114, - "375": 0.4759959280490875, - "376": 0.4977388083934784, - "377": 0.544720470905304 - }, - "loss": { - "324": 2.5615949630737305, - "325": 2.5503807067871094, - "326": 2.572110176086426, - "327": 2.6042826175689697, - "328": 2.529184579849243, - "329": 2.5294673442840576, - "330": 2.552457332611084, - "331": 2.5794436931610107, - "332": 2.4881858825683594, - "333": 2.5058255195617676, - "334": 2.5222389698028564, - "335": 2.52669095993042, - "336": 2.494293689727783, - "337": 2.5254476070404053, - "338": 2.5520334243774414, - "339": 2.5825424194335938, - "340": 2.514739990234375, - "341": 2.495880365371704, - "342": 2.5257925987243652, - "343": 2.5114736557006836, - "344": 2.5242955684661865, - "345": 2.564255714416504, - "346": 2.525627613067627, - "347": 2.5470619201660156, - "348": 2.5338234901428223, - "349": 2.5905356407165527, - "350": 2.522367000579834, - "351": 2.535236358642578, - "352": 2.5280518531799316, - "353": 2.470916509628296, - "354": 2.51663875579834, - "355": 2.5265355110168457, - "356": 2.4779067039489746, - "357": 2.5526695251464844, - "358": 2.495434045791626, - "359": 2.495894193649292, - "360": 2.505380630493164, - "361": 2.5761022567749023, - "362": 2.4901740550994873, - "363": 2.5038435459136963, - "364": 2.49552845954895, - "365": 2.5320253372192383, - "366": 2.5038163661956787, - "367": 2.496917724609375, - "368": 2.4635276794433594, - "369": 2.4992551803588867, - "370": 2.4868030548095703, - "371": 2.515239953994751, - "372": 2.5180130004882812, - "373": 2.4773757457733154, - "374": 2.4581429958343506, - "375": 2.474637985229492, - "376": 2.4543020725250244, - "377": 2.4802401065826416 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 0.915392, - 0.910215, - 0.884009, - 0.906013, - 0.932465, - 0.919072, - 0.908822, - 0.934267, - 0.942708, - 0.932806, - 0.916375, - 0.92198, - 0.932636, - 0.911042, - 0.900677, - 0.909431, - 0.94672, - 0.936358, - 0.932916, - 0.931017, - 0.906404, - 0.910072, - 0.920115, - 0.897538, - 0.890999, - 0.880302, - 0.916249, - 0.918802, - 0.93198, - 0.943782, - 0.927368, - 0.921851, - 0.925464, - 0.921266, - 0.94495, - 0.948726, - 0.880584, - 0.928336, - 0.948555, - 0.938864, - 0.925266, - 0.923768, - 0.932682, - 0.942759, - 0.943154, - 0.938017, - 0.931077, - 0.915452, - 0.922251, - 0.933521, - 0.947761, - 0.956225, - 0.951954, - 0.943559 - ], - "train_epoch_time": 4.844611406326294, - "train_loss": 2.475346781978306, - "train_score": 0.30142911581945214, - "val_loss": 2.5075279111566555, - "val_score": 0.2918654919423411 - }, - { - "epoch": 7, - "grad_norm": 0.58362877368927, - "learning_rate": 1.0, - "model_norm": 87.90218353271484, - "step_logs": { - "grad_norm": { - "378": 0.568577766418457, - "379": 0.6245736479759216, - "380": 0.5861882567405701, - "381": 0.4929637610912323, - "382": 0.6263610124588013, - "383": 0.6466138362884521, - "384": 0.5652489066123962, - "385": 0.6791654825210571, - "386": 0.6533142924308777, - "387": 0.5913721919059753, - "388": 0.7104600071907043, - "389": 0.8303994536399841, - "390": 0.7594033479690552, - "391": 0.5994577407836914, - "392": 0.5189815163612366, - "393": 0.5038019418716431, - "394": 0.5772358179092407, - "395": 0.5958951711654663, - "396": 0.6662114858627319, - "397": 0.6174487471580505, - "398": 0.508374810218811, - "399": 0.591770589351654, - "400": 0.5781763195991516, - "401": 0.5616276264190674, - "402": 0.6043291687965393, - "403": 0.5351585149765015, - "404": 0.5233591794967651, - "405": 0.5638923645019531, - "406": 0.6078712940216064, - "407": 0.5717411041259766, - "408": 0.5401195287704468, - "409": 0.4989900588989258, - "410": 0.5397713780403137, - "411": 0.5587407350540161, - "412": 0.5320559144020081, - "413": 0.6947481632232666, - "414": 0.718398928642273, - "415": 0.8130577206611633, - "416": 1.1151018142700195, - "417": 1.0088691711425781, - "418": 0.5021718144416809, - "419": 0.40648096799850464, - "420": 0.44817137718200684, - "421": 0.520017147064209, - "422": 0.5725849270820618, - "423": 0.6407066583633423, - "424": 0.6596114039421082, - "425": 0.6369249224662781, - "426": 0.6807340383529663, - "427": 0.5690308213233948, - "428": 0.5339997410774231, - "429": 0.5982414484024048, - "430": 0.649864673614502, - "431": 0.58362877368927 - }, - "loss": { - "378": 2.500174045562744, - "379": 2.473900318145752, - "380": 2.5107316970825195, - "381": 2.4709434509277344, - "382": 2.478297233581543, - "383": 2.5236899852752686, - "384": 2.473050355911255, - "385": 2.4877266883850098, - "386": 2.5103933811187744, - "387": 2.500427484512329, - "388": 2.4669265747070312, - "389": 2.559154510498047, - "390": 2.4983279705047607, - "391": 2.503368854522705, - "392": 2.472099542617798, - "393": 2.4667510986328125, - "394": 2.4647960662841797, - "395": 2.48909330368042, - "396": 2.4767849445343018, - "397": 2.509556770324707, - "398": 2.4718313217163086, - "399": 2.4844582080841064, - "400": 2.472041606903076, - "401": 2.463960647583008, - "402": 2.442333221435547, - "403": 2.45332670211792, - "404": 2.434847831726074, - "405": 2.4583888053894043, - "406": 2.430128574371338, - "407": 2.4709701538085938, - "408": 2.4280524253845215, - "409": 2.452427864074707, - "410": 2.43495774269104, - "411": 2.471374988555908, - "412": 2.4536819458007812, - "413": 2.456254243850708, - "414": 2.5245273113250732, - "415": 2.4855000972747803, - "416": 2.544663190841675, - "417": 2.5951011180877686, - "418": 2.5055694580078125, - "419": 2.433615207672119, - "420": 2.4409799575805664, - "421": 2.432000160217285, - "422": 2.4715933799743652, - "423": 2.467902183532715, - "424": 2.4839553833007812, - "425": 2.494706153869629, - "426": 2.4653239250183105, - "427": 2.4996910095214844, - "428": 2.415006399154663, - "429": 2.4614522457122803, - "430": 2.441516876220703, - "431": 2.483922243118286 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 0.939274, - 0.92692, - 0.935953, - 0.953131, - 0.926653, - 0.9235, - 0.939322, - 0.915157, - 0.92165, - 0.934639, - 0.907191, - 0.881271, - 0.896527, - 0.933033, - 0.948338, - 0.95107, - 0.936687, - 0.93342, - 0.917768, - 0.929404, - 0.950319, - 0.934163, - 0.936668, - 0.939843, - 0.930434, - 0.94485, - 0.946748, - 0.939257, - 0.929345, - 0.937958, - 0.94333, - 0.951688, - 0.94355, - 0.940591, - 0.945461, - 0.910536, - 0.907263, - 0.882625, - 0.803648, - 0.836048, - 0.952088, - 0.967168, - 0.960483, - 0.947332, - 0.937801, - 0.923217, - 0.919473, - 0.924807, - 0.914091, - 0.939172, - 0.944253, - 0.932227, - 0.920397, - 0.935834 - ], - "train_epoch_time": 4.844684600830078, - "train_loss": 2.4265488082743443, - "train_score": 0.2991156294150879, - "val_loss": 2.454537824703002, - "val_score": 0.2879996051676101 - }, - { - "epoch": 8, - "grad_norm": 0.5858098268508911, - "learning_rate": 1.0, - "model_norm": 87.97018432617188, - "step_logs": { - "grad_norm": { - "432": 0.5174282193183899, - "433": 0.5653510689735413, - "434": 0.6048844456672668, - "435": 0.6042084097862244, - "436": 0.6482882499694824, - "437": 0.5685992240905762, - "438": 0.4903128147125244, - "439": 0.5669147372245789, - "440": 0.6930384039878845, - "441": 0.6139175891876221, - "442": 0.4596112072467804, - "443": 0.4412894546985626, - "444": 0.4856876730918884, - "445": 0.5192708969116211, - "446": 0.6828057765960693, - "447": 0.6767101287841797, - "448": 0.6061641573905945, - "449": 0.5818963050842285, - "450": 0.5864323377609253, - "451": 0.5374137759208679, - "452": 0.48834657669067383, - "453": 0.5185680389404297, - "454": 0.5447973012924194, - "455": 0.5373228788375854, - "456": 0.5501261353492737, - "457": 0.7204861044883728, - "458": 0.7994251847267151, - "459": 0.6163647174835205, - "460": 0.5685983300209045, - "461": 0.5511287450790405, - "462": 0.5754255652427673, - "463": 0.6026052832603455, - "464": 0.6120621562004089, - "465": 0.5343873500823975, - "466": 0.44022950530052185, - "467": 0.4660203456878662, - "468": 0.5254347920417786, - "469": 0.5624760389328003, - "470": 0.5719310641288757, - "471": 0.5804353952407837, - "472": 0.5198171138763428, - "473": 0.4791575074195862, - "474": 0.5389455556869507, - "475": 0.5344741344451904, - "476": 0.48459771275520325, - "477": 0.48139262199401855, - "478": 0.5401302576065063, - "479": 0.5814175009727478, - "480": 0.5682058930397034, - "481": 0.5351996421813965, - "482": 0.5071942210197449, - "483": 0.5557736158370972, - "484": 0.6169612407684326, - "485": 0.5858098268508911 - }, - "loss": { - "432": 2.432744264602661, - "433": 2.4534616470336914, - "434": 2.44284725189209, - "435": 2.4656524658203125, - "436": 2.4371800422668457, - "437": 2.481755495071411, - "438": 2.406954288482666, - "439": 2.4203262329101562, - "440": 2.459423065185547, - "441": 2.4922173023223877, - "442": 2.4211604595184326, - "443": 2.3916897773742676, - "444": 2.4236063957214355, - "445": 2.4038991928100586, - "446": 2.4305214881896973, - "447": 2.4868862628936768, - "448": 2.4516091346740723, - "449": 2.4630825519561768, - "450": 2.4366302490234375, - "451": 2.436910629272461, - "452": 2.4233202934265137, - "453": 2.4246230125427246, - "454": 2.4233484268188477, - "455": 2.4261343479156494, - "456": 2.4060890674591064, - "457": 2.432471990585327, - "458": 2.491461992263794, - "459": 2.4399023056030273, - "460": 2.4218220710754395, - "461": 2.4430437088012695, - "462": 2.4276366233825684, - "463": 2.4408247470855713, - "464": 2.4266357421875, - "465": 2.4531545639038086, - "466": 2.3855481147766113, - "467": 2.409026622772217, - "468": 2.41092848777771, - "469": 2.439362049102783, - "470": 2.416613817214966, - "471": 2.4677913188934326, - "472": 2.4265360832214355, - "473": 2.416658401489258, - "474": 2.418534278869629, - "475": 2.424913167953491, - "476": 2.3806135654449463, - "477": 2.424333095550537, - "478": 2.4047040939331055, - "479": 2.4056622982025146, - "480": 2.4200260639190674, - "481": 2.4149255752563477, - "482": 2.405825138092041, - "483": 2.4439802169799805, - "484": 2.420701503753662, - "485": 2.461639404296875 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 0.947843, - 0.938846, - 0.930329, - 0.931072, - 0.920622, - 0.938847, - 0.952435, - 0.937739, - 0.911041, - 0.929701, - 0.958199, - 0.960882, - 0.953593, - 0.946894, - 0.912484, - 0.915692, - 0.930287, - 0.935685, - 0.934082, - 0.944057, - 0.953102, - 0.947459, - 0.942295, - 0.94384, - 0.940831, - 0.903585, - 0.886325, - 0.927771, - 0.937428, - 0.941473, - 0.936157, - 0.930763, - 0.928342, - 0.944997, - 0.960966, - 0.956869, - 0.945844, - 0.939101, - 0.936612, - 0.936101, - 0.947258, - 0.954652, - 0.943352, - 0.944375, - 0.952996, - 0.954386, - 0.942809, - 0.934352, - 0.937466, - 0.944014, - 0.94925, - 0.940563, - 0.927109, - 0.934838 - ], - "train_epoch_time": 4.845614671707153, - "train_loss": 2.3960921372368484, - "train_score": 0.30780801653348905, - "val_loss": 2.431917364096395, - "val_score": 0.3006691307205831 - }, - { - "epoch": 9, - "grad_norm": 0.5992441773414612, - "learning_rate": 1.0, - "model_norm": 88.04580688476562, - "step_logs": { - "grad_norm": { - "486": 0.5251680016517639, - "487": 0.5225484371185303, - "488": 0.5134865641593933, - "489": 0.5666976571083069, - "490": 0.6372716426849365, - "491": 0.7346780896186829, - "492": 0.9128353595733643, - "493": 1.184725046157837, - "494": 0.7774388194084167, - "495": 0.80953049659729, - "496": 1.1751888990402222, - "497": 0.6669449806213379, - "498": 0.5073861479759216, - "499": 0.4833794832229614, - "500": 0.4841935634613037, - "501": 0.4673689603805542, - "502": 0.4761419892311096, - "503": 0.5035741925239563, - "504": 0.5230599641799927, - "505": 0.5073630213737488, - "506": 0.5405907034873962, - "507": 0.4714689254760742, - "508": 0.4369595944881439, - "509": 0.4372880458831787, - "510": 0.44072920083999634, - "511": 0.464080274105072, - "512": 0.4588129222393036, - "513": 0.5141434669494629, - "514": 0.5300304293632507, - "515": 0.5264018177986145, - "516": 0.5475291609764099, - "517": 0.5486292243003845, - "518": 0.5629437565803528, - "519": 0.5145405530929565, - "520": 0.452772319316864, - "521": 0.4707406163215637, - "522": 0.47186407446861267, - "523": 0.4546498656272888, - "524": 0.47039440274238586, - "525": 0.4974057674407959, - "526": 0.5125047564506531, - "527": 0.4869077503681183, - "528": 0.4762588143348694, - "529": 0.5857961773872375, - "530": 0.6514634490013123, - "531": 0.5966834425926208, - "532": 0.5027233958244324, - "533": 0.44797879457473755, - "534": 0.4814678132534027, - "535": 0.5191361904144287, - "536": 0.5164108872413635, - "537": 0.48637133836746216, - "538": 0.5138224959373474, - "539": 0.5992441773414612 - }, - "loss": { - "486": 2.4083127975463867, - "487": 2.4202003479003906, - "488": 2.405160903930664, - "489": 2.423948287963867, - "490": 2.424227714538574, - "491": 2.4508426189422607, - "492": 2.478623390197754, - "493": 2.5330142974853516, - "494": 2.4962596893310547, - "495": 2.4829611778259277, - "496": 2.538888931274414, - "497": 2.5282459259033203, - "498": 2.43544340133667, - "499": 2.4205265045166016, - "500": 2.421490430831909, - "501": 2.395378589630127, - "502": 2.3809010982513428, - "503": 2.404325008392334, - "504": 2.410860061645508, - "505": 2.390225410461426, - "506": 2.42000150680542, - "507": 2.3842532634735107, - "508": 2.3806405067443848, - "509": 2.375567674636841, - "510": 2.3685219287872314, - "511": 2.3860483169555664, - "512": 2.384050130844116, - "513": 2.3912487030029297, - "514": 2.431748867034912, - "515": 2.3876798152923584, - "516": 2.4083287715911865, - "517": 2.371127128601074, - "518": 2.4180474281311035, - "519": 2.3849360942840576, - "520": 2.3860342502593994, - "521": 2.3552846908569336, - "522": 2.3718647956848145, - "523": 2.347947359085083, - "524": 2.384767532348633, - "525": 2.38918399810791, - "526": 2.3818764686584473, - "527": 2.364521026611328, - "528": 2.383082866668701, - "529": 2.3857955932617188, - "530": 2.434995174407959, - "531": 2.3783154487609863, - "532": 2.3764476776123047, - "533": 2.356940746307373, - "534": 2.372837543487549, - "535": 2.3827857971191406, - "536": 2.4039812088012695, - "537": 2.394853115081787, - "538": 2.391368865966797, - "539": 2.384692668914795 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 0.945841, - 0.9466, - 0.948035, - 0.937871, - 0.922712, - 0.900807, - 0.856098, - 0.783051, - 0.89201, - 0.883418, - 0.786174, - 0.919144, - 0.9498, - 0.953957, - 0.953826, - 0.956393, - 0.954553, - 0.949906, - 0.946305, - 0.948904, - 0.943058, - 0.955461, - 0.961445, - 0.96131, - 0.96061, - 0.956818, - 0.957717, - 0.947622, - 0.945391, - 0.945156, - 0.941407, - 0.940317, - 0.938501, - 0.947414, - 0.958811, - 0.955071, - 0.955167, - 0.957837, - 0.955664, - 0.950771, - 0.947744, - 0.952261, - 0.954572, - 0.932908, - 0.919839, - 0.930363, - 0.949511, - 0.959165, - 0.953428, - 0.946475, - 0.947448, - 0.952936, - 0.947686, - 0.92998 - ], - "train_epoch_time": 4.844855308532715, - "train_loss": 2.4032935036477263, - "train_score": 0.29880290525715525, - "val_loss": 2.44017634818803, - "val_score": 0.28712955719431016 - }, - { - "epoch": 10, - "grad_norm": 0.41238293051719666, - "learning_rate": 1.0, - "model_norm": 88.12128448486328, - "step_logs": { - "grad_norm": { - "540": 0.49666303396224976, - "541": 0.44918397068977356, - "542": 0.487358421087265, - "543": 0.5815653800964355, - "544": 0.647682785987854, - "545": 0.5364251136779785, - "546": 0.49116113781929016, - "547": 0.5506793260574341, - "548": 0.5831376910209656, - "549": 0.5818339586257935, - "550": 0.5751243829727173, - "551": 0.5565703511238098, - "552": 0.5409532785415649, - "553": 0.5474473237991333, - "554": 0.5073561668395996, - "555": 0.44616034626960754, - "556": 0.49524757266044617, - "557": 0.5654675960540771, - "558": 0.6029525995254517, - "559": 0.5905118584632874, - "560": 0.543039858341217, - "561": 0.5158525109291077, - "562": 0.4918135702610016, - "563": 0.4921610355377197, - "564": 0.5384225845336914, - "565": 0.5163503885269165, - "566": 0.5473888516426086, - "567": 0.5944982767105103, - "568": 0.6192070245742798, - "569": 0.5348639488220215, - "570": 0.531512439250946, - "571": 0.5428785681724548, - "572": 0.5575944185256958, - "573": 0.5357584953308105, - "574": 0.5161674618721008, - "575": 0.5172922015190125, - "576": 0.5503405332565308, - "577": 0.6006607413291931, - "578": 0.5994082689285278, - "579": 0.5745787620544434, - "580": 0.5670735836029053, - "581": 0.5644622445106506, - "582": 0.7373393774032593, - "583": 1.0047847032546997, - "584": 1.1907254457473755, - "585": 0.5557610988616943, - "586": 0.5193143486976624, - "587": 0.4940536320209503, - "588": 0.4744510352611542, - "589": 0.5412111282348633, - "590": 0.5464239716529846, - "591": 0.49607542157173157, - "592": 0.4465716779232025, - "593": 0.41238293051719666 - }, - "loss": { - "540": 2.4032466411590576, - "541": 2.345470428466797, - "542": 2.368605136871338, - "543": 2.3844478130340576, - "544": 2.3914103507995605, - "545": 2.415172576904297, - "546": 2.3889405727386475, - "547": 2.3648898601531982, - "548": 2.385829448699951, - "549": 2.373162269592285, - "550": 2.3915953636169434, - "551": 2.390748977661133, - "552": 2.3755428791046143, - "553": 2.3753912448883057, - "554": 2.378481864929199, - "555": 2.326127290725708, - "556": 2.375342607498169, - "557": 2.3681674003601074, - "558": 2.365988254547119, - "559": 2.3784890174865723, - "560": 2.3928966522216797, - "561": 2.3594465255737305, - "562": 2.3655896186828613, - "563": 2.3578858375549316, - "564": 2.3804662227630615, - "565": 2.353290557861328, - "566": 2.3801143169403076, - "567": 2.3858840465545654, - "568": 2.389169692993164, - "569": 2.3770551681518555, - "570": 2.359036922454834, - "571": 2.365795135498047, - "572": 2.3864266872406006, - "573": 2.3413143157958984, - "574": 2.350370168685913, - "575": 2.3472342491149902, - "576": 2.37874174118042, - "577": 2.384798526763916, - "578": 2.37863826751709, - "579": 2.4000275135040283, - "580": 2.3721818923950195, - "581": 2.3964407444000244, - "582": 2.370248794555664, - "583": 2.40104079246521, - "584": 2.529804229736328, - "585": 2.4456586837768555, - "586": 2.401750087738037, - "587": 2.3562047481536865, - "588": 2.3346498012542725, - "589": 2.394507884979248, - "590": 2.392519235610962, - "591": 2.3665237426757812, - "592": 2.367276668548584, - "593": 2.3399550914764404 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 0.951184, - 0.958762, - 0.952255, - 0.933775, - 0.919364, - 0.943778, - 0.951936, - 0.939748, - 0.933476, - 0.933424, - 0.935321, - 0.939157, - 0.941981, - 0.940659, - 0.948665, - 0.958968, - 0.950906, - 0.936759, - 0.928653, - 0.931703, - 0.941958, - 0.946619, - 0.951362, - 0.951145, - 0.942604, - 0.946389, - 0.940782, - 0.931041, - 0.92572, - 0.94324, - 0.943505, - 0.941365, - 0.938842, - 0.942242, - 0.946362, - 0.946073, - 0.940148, - 0.929675, - 0.929779, - 0.935648, - 0.936523, - 0.937666, - 0.897114, - 0.826282, - 0.781113, - 0.940604, - 0.946841, - 0.950754, - 0.954008, - 0.942362, - 0.941266, - 0.950576, - 0.959581, - 0.964936 - ], - "train_epoch_time": 4.843826770782471, - "train_loss": 2.341122251740487, - "train_score": 0.32124955166703145, - "val_loss": 2.391289904799171, - "val_score": 0.30632893233020053 - }, - { - "epoch": 11, - "grad_norm": 0.5648512244224548, - "learning_rate": 1.0, - "model_norm": 88.19158935546875, - "step_logs": { - "grad_norm": { - "594": 0.4455691874027252, - "595": 0.5053259134292603, - "596": 0.5574855804443359, - "597": 0.5647223591804504, - "598": 0.5500990152359009, - "599": 0.5880614519119263, - "600": 0.5595914125442505, - "601": 0.4719184935092926, - "602": 0.4335293471813202, - "603": 0.46494951844215393, - "604": 0.48453763127326965, - "605": 0.5016939043998718, - "606": 0.5016187429428101, - "607": 0.5204160809516907, - "608": 0.5653454065322876, - "609": 0.5589511394500732, - "610": 0.5241890549659729, - "611": 0.5061217546463013, - "612": 0.5021583437919617, - "613": 0.5518539547920227, - "614": 0.5411025881767273, - "615": 0.5096712112426758, - "616": 0.5380481481552124, - "617": 0.6245166063308716, - "618": 0.6331339478492737, - "619": 0.704375147819519, - "620": 0.6344725489616394, - "621": 0.5149283409118652, - "622": 0.4801318943500519, - "623": 0.44577476382255554, - "624": 0.44214946031570435, - "625": 0.48248186707496643, - "626": 0.5095715522766113, - "627": 0.5438368320465088, - "628": 0.6116637587547302, - "629": 0.5771706104278564, - "630": 0.5602117776870728, - "631": 0.5381978750228882, - "632": 0.551306962966919, - "633": 0.5591872930526733, - "634": 0.5699526071548462, - "635": 0.5426019430160522, - "636": 0.5143658518791199, - "637": 0.4915454089641571, - "638": 0.4730825424194336, - "639": 0.5039641261100769, - "640": 0.6594997048377991, - "641": 0.6140817403793335, - "642": 0.47418537735939026, - "643": 0.47246062755584717, - "644": 0.5561679601669312, - "645": 0.6243742108345032, - "646": 0.5975499153137207, - "647": 0.5648512244224548 - }, - "loss": { - "594": 2.3640313148498535, - "595": 2.352384090423584, - "596": 2.3432674407958984, - "597": 2.3749561309814453, - "598": 2.3416640758514404, - "599": 2.3634157180786133, - "600": 2.3780298233032227, - "601": 2.342787742614746, - "602": 2.347719192504883, - "603": 2.340097188949585, - "604": 2.331915855407715, - "605": 2.3315820693969727, - "606": 2.36726450920105, - "607": 2.3293869495391846, - "608": 2.3595693111419678, - "609": 2.353480577468872, - "610": 2.3440096378326416, - "611": 2.3488593101501465, - "612": 2.340973138809204, - "613": 2.339700698852539, - "614": 2.3572349548339844, - "615": 2.31711483001709, - "616": 2.358613967895508, - "617": 2.3390748500823975, - "618": 2.368706226348877, - "619": 2.3891701698303223, - "620": 2.37221622467041, - "621": 2.3539481163024902, - "622": 2.3576271533966064, - "623": 2.3381154537200928, - "624": 2.333294153213501, - "625": 2.3332927227020264, - "626": 2.3361706733703613, - "627": 2.361553430557251, - "628": 2.353455066680908, - "629": 2.3577561378479004, - "630": 2.3264617919921875, - "631": 2.3549859523773193, - "632": 2.337280750274658, - "633": 2.3584494590759277, - "634": 2.341733932495117, - "635": 2.368011951446533, - "636": 2.31221342086792, - "637": 2.3277416229248047, - "638": 2.3182497024536133, - "639": 2.3294882774353027, - "640": 2.350050449371338, - "641": 2.3857479095458984, - "642": 2.3451220989227295, - "643": 2.3097457885742188, - "644": 2.3297903537750244, - "645": 2.367734432220459, - "646": 2.359572172164917, - "647": 2.3486928939819336 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 0.959702, - 0.948519, - 0.937809, - 0.937084, - 0.939308, - 0.931827, - 0.938227, - 0.954626, - 0.961513, - 0.955849, - 0.952073, - 0.948789, - 0.949536, - 0.94506, - 0.936569, - 0.937756, - 0.944633, - 0.948291, - 0.948894, - 0.938895, - 0.941527, - 0.946922, - 0.942179, - 0.923045, - 0.921986, - 0.905935, - 0.921788, - 0.946682, - 0.953389, - 0.959237, - 0.959792, - 0.952486, - 0.947351, - 0.941071, - 0.926367, - 0.934017, - 0.936813, - 0.942064, - 0.93895, - 0.93783, - 0.935139, - 0.941473, - 0.945884, - 0.950661, - 0.953952, - 0.948304, - 0.9153, - 0.926757, - 0.954253, - 0.953906, - 0.937748, - 0.923938, - 0.929659, - 0.936398 - ], - "train_epoch_time": 4.845021963119507, - "train_loss": 2.3404482294189366, - "train_score": 0.32949022589147176, - "val_loss": 2.39555838543293, - "val_score": 0.3175633250298374 - }, - { - "epoch": 12, - "grad_norm": 0.3189324140548706, - "learning_rate": 1.0, - "model_norm": 88.23883819580078, - "step_logs": { - "grad_norm": { - "648": 0.5454807281494141, - "649": 0.49072641134262085, - "650": 0.4966956675052643, - "651": 0.5081068277359009, - "652": 0.49845921993255615, - "653": 0.4477521777153015, - "654": 0.4483279883861542, - "655": 0.4255515933036804, - "656": 0.426658034324646, - "657": 0.46205249428749084, - "658": 0.45439672470092773, - "659": 0.4755401015281677, - "660": 0.5065727233886719, - "661": 0.47532299160957336, - "662": 0.4635762572288513, - "663": 0.4944394826889038, - "664": 0.5047117471694946, - "665": 0.4712497591972351, - "666": 0.45057788491249084, - "667": 0.43672606348991394, - "668": 0.416374146938324, - "669": 0.3980002999305725, - "670": 0.3841959834098816, - "671": 0.3838506042957306, - "672": 0.4220142364501953, - "673": 0.46137169003486633, - "674": 0.5507155060768127, - "675": 0.4968607723712921, - "676": 0.4080953001976013, - "677": 0.39979761838912964, - "678": 0.3902898132801056, - "679": 0.40663012862205505, - "680": 0.41750550270080566, - "681": 0.4333384931087494, - "682": 0.4720942974090576, - "683": 0.4359283447265625, - "684": 0.3877403438091278, - "685": 0.3725552558898926, - "686": 0.3724731206893921, - "687": 0.37966641783714294, - "688": 0.3572253882884979, - "689": 0.37735888361930847, - "690": 0.36963099241256714, - "691": 0.34954479336738586, - "692": 0.307496577501297, - "693": 0.31760311126708984, - "694": 0.3197334110736847, - "695": 0.2972220778465271, - "696": 0.2828977406024933, - "697": 0.3033442795276642, - "698": 0.34348076581954956, - "699": 0.32761478424072266, - "700": 0.30834025144577026, - "701": 0.3189324140548706 - }, - "loss": { - "648": 2.366711139678955, - "649": 2.344536781311035, - "650": 2.326383590698242, - "651": 2.334792375564575, - "652": 2.325902223587036, - "653": 2.312213659286499, - "654": 2.3022360801696777, - "655": 2.3183021545410156, - "656": 2.296701669692993, - "657": 2.3039915561676025, - "658": 2.312582015991211, - "659": 2.291131019592285, - "660": 2.2965288162231445, - "661": 2.3163814544677734, - "662": 2.297990083694458, - "663": 2.307262897491455, - "664": 2.3095717430114746, - "665": 2.326970100402832, - "666": 2.3024840354919434, - "667": 2.3115291595458984, - "668": 2.269829273223877, - "669": 2.291423797607422, - "670": 2.3064560890197754, - "671": 2.280780553817749, - "672": 2.2601380348205566, - "673": 2.280937671661377, - "674": 2.3007781505584717, - "675": 2.309664011001587, - "676": 2.2695772647857666, - "677": 2.2876391410827637, - "678": 2.295290231704712, - "679": 2.2544820308685303, - "680": 2.260594606399536, - "681": 2.2877604961395264, - "682": 2.3171114921569824, - "683": 2.2861509323120117, - "684": 2.2774858474731445, - "685": 2.28551983833313, - "686": 2.2663800716400146, - "687": 2.278992176055908, - "688": 2.2726635932922363, - "689": 2.2567200660705566, - "690": 2.2422826290130615, - "691": 2.261599063873291, - "692": 2.2625021934509277, - "693": 2.2599430084228516, - "694": 2.2674577236175537, - "695": 2.2820215225219727, - "696": 2.263211250305176, - "697": 2.2413551807403564, - "698": 2.2700552940368652, - "699": 2.2490994930267334, - "700": 2.2653770446777344, - "701": 2.2599213123321533 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 0.940857, - 0.945566, - 0.938506, - 0.930963, - 0.927018, - 0.93006, - 0.924117, - 0.922323, - 0.916105, - 0.904851, - 0.900551, - 0.891108, - 0.880382, - 0.880269, - 0.876153, - 0.865786, - 0.858563, - 0.858399, - 0.855368, - 0.851699, - 0.848152, - 0.844951, - 0.840943, - 0.834886, - 0.82419, - 0.813575, - 0.79549, - 0.797803, - 0.802797, - 0.798097, - 0.793364, - 0.785353, - 0.778387, - 0.771096, - 0.761199, - 0.759214, - 0.758311, - 0.753941, - 0.74791, - 0.741456, - 0.737494, - 0.729718, - 0.724393, - 0.720276, - 0.717475, - 0.710766, - 0.704675, - 0.700255, - 0.695056, - 0.687684, - 0.679157, - 0.674163, - 0.669473, - 0.662803 - ], - "train_epoch_time": 4.8442628383636475, - "train_loss": 2.251964889605726, - "train_score": 0.3482794565569551, - "val_loss": 2.305449564065506, - "val_score": 0.3320805830057672 - }, - { - "epoch": 13, - "grad_norm": 0.1924910694360733, - "learning_rate": 0.6666666666666667, - "model_norm": 88.2591781616211, - "step_logs": { - "grad_norm": { - "702": 0.30999884009361267, - "703": 0.3081570565700531, - "704": 0.329313188791275, - "705": 0.3394043743610382, - "706": 0.3209366500377655, - "707": 0.30572962760925293, - "708": 0.32642218470573425, - "709": 0.3607868254184723, - "710": 0.3755326569080353, - "711": 0.3518368601799011, - "712": 0.29647645354270935, - "713": 0.2808452248573303, - "714": 0.2867784798145294, - "715": 0.2971489727497101, - "716": 0.312147319316864, - "717": 0.30570414662361145, - "718": 0.2687722146511078, - "719": 0.26342299580574036, - "720": 0.2747038006782532, - "721": 0.26705795526504517, - "722": 0.2823183536529541, - "723": 0.2924771308898926, - "724": 0.30509480834007263, - "725": 0.2584215998649597, - "726": 0.22097358107566833, - "727": 0.2101549655199051, - "728": 0.21350732445716858, - "729": 0.20022758841514587, - "730": 0.21657611429691315, - "731": 0.21615301072597504, - "732": 0.24807579815387726, - "733": 0.26091575622558594, - "734": 0.25208982825279236, - "735": 0.2280517965555191, - "736": 0.22254043817520142, - "737": 0.2286107838153839, - "738": 0.21865878999233246, - "739": 0.23763251304626465, - "740": 0.21739302575588226, - "741": 0.2225056141614914, - "742": 0.21373525261878967, - "743": 0.18853570520877838, - "744": 0.20300988852977753, - "745": 0.19326931238174438, - "746": 0.20534896850585938, - "747": 0.23516331613063812, - "748": 0.2517220675945282, - "749": 0.23871637880802155, - "750": 0.21123512089252472, - "751": 0.1957864761352539, - "752": 0.2079763114452362, - "753": 0.22575657069683075, - "754": 0.216865673661232, - "755": 0.1924910694360733 - }, - "loss": { - "702": 2.249041795730591, - "703": 2.243256092071533, - "704": 2.2466816902160645, - "705": 2.2427403926849365, - "706": 2.2588908672332764, - "707": 2.23553466796875, - "708": 2.242371082305908, - "709": 2.2314717769622803, - "710": 2.248633861541748, - "711": 2.243069648742676, - "712": 2.2298953533172607, - "713": 2.2373337745666504, - "714": 2.260085105895996, - "715": 2.245069980621338, - "716": 2.2305984497070312, - "717": 2.2537453174591064, - "718": 2.239100694656372, - "719": 2.247382640838623, - "720": 2.2421340942382812, - "721": 2.2236971855163574, - "722": 2.2386865615844727, - "723": 2.2479429244995117, - "724": 2.260164737701416, - "725": 2.2370758056640625, - "726": 2.2144763469696045, - "727": 2.223630666732788, - "728": 2.2569363117218018, - "729": 2.2281651496887207, - "730": 2.235809326171875, - "731": 2.2433576583862305, - "732": 2.2419254779815674, - "733": 2.2283365726470947, - "734": 2.240781784057617, - "735": 2.2361268997192383, - "736": 2.2119195461273193, - "737": 2.2380967140197754, - "738": 2.2211649417877197, - "739": 2.2472715377807617, - "740": 2.221181869506836, - "741": 2.217360496520996, - "742": 2.23199462890625, - "743": 2.233004570007324, - "744": 2.2375247478485107, - "745": 2.2161431312561035, - "746": 2.227876663208008, - "747": 2.2121622562408447, - "748": 2.2314999103546143, - "749": 2.2282145023345947, - "750": 2.2316651344299316, - "751": 2.210264205932617, - "752": 2.2339797019958496, - "753": 2.218125581741333, - "754": 2.208958387374878, - "755": 2.225076675415039 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 0.657305, - 0.651388, - 0.644149, - 0.637536, - 0.632715, - 0.627462, - 0.62035, - 0.612322, - 0.605562, - 0.600977, - 0.597811, - 0.592512, - 0.586272, - 0.579734, - 0.572985, - 0.567322, - 0.562746, - 0.556899, - 0.55041, - 0.544585, - 0.538007, - 0.531605, - 0.525124, - 0.520614, - 0.515571, - 0.509752, - 0.503599, - 0.497761, - 0.491282, - 0.48519, - 0.478321, - 0.471883, - 0.466036, - 0.460484, - 0.454466, - 0.448259, - 0.442329, - 0.435872, - 0.430122, - 0.42391, - 0.417958, - 0.412223, - 0.405885, - 0.399882, - 0.39359, - 0.387008, - 0.380648, - 0.374739, - 0.369004, - 0.363051, - 0.356788, - 0.350435, - 0.344412, - 0.338549 - ], - "train_epoch_time": 4.844266176223755, - "train_loss": 2.218914871817853, - "train_score": 0.35500470776126924, - "val_loss": 2.2751464695935955, - "val_score": 0.3386373429224494 - }, - { - "epoch": 14, - "grad_norm": 0.18735578656196594, - "learning_rate": 0.33333333333333337, - "model_norm": 88.26632690429688, - "step_logs": { - "grad_norm": { - "756": 0.1972518414258957, - "757": 0.20013578236103058, - "758": 0.20564697682857513, - "759": 0.19823364913463593, - "760": 0.21963264048099518, - "761": 0.1970648318529129, - "762": 0.17788271605968475, - "763": 0.21304206550121307, - "764": 0.23827432096004486, - "765": 0.19121018052101135, - "766": 0.20167680084705353, - "767": 0.20170077681541443, - "768": 0.17833080887794495, - "769": 0.18598417937755585, - "770": 0.2017047256231308, - "771": 0.20328561961650848, - "772": 0.16974008083343506, - "773": 0.17685215175151825, - "774": 0.19144922494888306, - "775": 0.20395591855049133, - "776": 0.20244090259075165, - "777": 0.19777093827724457, - "778": 0.19130989909172058, - "779": 0.19305555522441864, - "780": 0.17410975694656372, - "781": 0.20465616881847382, - "782": 0.2013433575630188, - "783": 0.19822944700717926, - "784": 0.1960882842540741, - "785": 0.18061654269695282, - "786": 0.1915721893310547, - "787": 0.1670348346233368, - "788": 0.18097978830337524, - "789": 0.18367715179920197, - "790": 0.19308632612228394, - "791": 0.21377742290496826, - "792": 0.1805594116449356, - "793": 0.16841340065002441, - "794": 0.19316135346889496, - "795": 0.1863432675600052, - "796": 0.1662268042564392, - "797": 0.1815100908279419, - "798": 0.17816632986068726, - "799": 0.17863771319389343, - "800": 0.16933517158031464, - "801": 0.1726406365633011, - "802": 0.1802784949541092, - "803": 0.1657143533229828, - "804": 0.17711059749126434, - "805": 0.1710374653339386, - "806": 0.15857510268688202, - "807": 0.17609231173992157, - "808": 0.18111881613731384, - "809": 0.18735578656196594 - }, - "loss": { - "756": 2.200650215148926, - "757": 2.20719313621521, - "758": 2.2292795181274414, - "759": 2.2182607650756836, - "760": 2.187898635864258, - "761": 2.22731876373291, - "762": 2.2302346229553223, - "763": 2.2116222381591797, - "764": 2.215651273727417, - "765": 2.206503391265869, - "766": 2.221101760864258, - "767": 2.2292208671569824, - "768": 2.211580276489258, - "769": 2.2003841400146484, - "770": 2.239424705505371, - "771": 2.231846809387207, - "772": 2.222195863723755, - "773": 2.2056221961975098, - "774": 2.2195372581481934, - "775": 2.2274832725524902, - "776": 2.2267355918884277, - "777": 2.225849151611328, - "778": 2.1938111782073975, - "779": 2.219778060913086, - "780": 2.203202962875366, - "781": 2.204745292663574, - "782": 2.221733808517456, - "783": 2.2287707328796387, - "784": 2.2243988513946533, - "785": 2.200479507446289, - "786": 2.207712411880493, - "787": 2.210223913192749, - "788": 2.2429215908050537, - "789": 2.203415870666504, - "790": 2.2089006900787354, - "791": 2.2147860527038574, - "792": 2.219022274017334, - "793": 2.2020816802978516, - "794": 2.201672315597534, - "795": 2.218629837036133, - "796": 2.2059714794158936, - "797": 2.2188315391540527, - "798": 2.210559844970703, - "799": 2.20656681060791, - "800": 2.2285170555114746, - "801": 2.2160894870758057, - "802": 2.1947011947631836, - "803": 2.200603485107422, - "804": 2.223465919494629, - "805": 2.2105941772460938, - "806": 2.2058119773864746, - "807": 2.2213356494903564, - "808": 2.196129322052002, - "809": 2.2225122451782227 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 0.332354, - 0.326192, - 0.320013, - 0.313939, - 0.307595, - 0.301674, - 0.295675, - 0.289262, - 0.282921, - 0.27714, - 0.270931, - 0.264791, - 0.258777, - 0.252584, - 0.246361, - 0.240205, - 0.234212, - 0.228026, - 0.221815, - 0.215614, - 0.209472, - 0.20334, - 0.197206, - 0.191051, - 0.18495, - 0.178708, - 0.172567, - 0.166422, - 0.160271, - 0.154145, - 0.147966, - 0.141848, - 0.135668, - 0.129501, - 0.123328, - 0.117142, - 0.11102, - 0.104867, - 0.0986829, - 0.0925255, - 0.086373, - 0.0801991, - 0.0740347, - 0.0678679, - 0.0617039, - 0.0555348, - 0.0493647, - 0.0431982, - 0.0370274, - 0.0308579, - 0.0246879, - 0.0185161, - 0.0123445, - 0.00617254 - ], - "train_epoch_time": 4.844316482543945, - "train_loss": 2.2101509927506084, - "train_score": 0.35690907460545196, - "val_loss": 2.270607901769172, - "val_score": 0.33950290603971645 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:38:55.948032", - "final_model_norm": 88.26632690429688, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:37:14.339733", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "ngn", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 4.823538303375244, - "learning_rate": 1e-10, - "model_norm": 87.35120391845703, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 6.2565083503723145, - "3": 7.88982629776001, - "4": 11.964227676391602, - "5": 4.96006441116333, - "6": 6.8435235023498535, - "7": 5.717643737792969, - "8": 4.00455379486084, - "9": 5.72320032119751, - "10": 8.341522216796875, - "11": 3.961703062057495, - "12": 8.768805503845215, - "13": 16.514781951904297, - "14": 6.734203815460205, - "15": 6.085165023803711, - "16": 12.341890335083008, - "17": 7.289083003997803, - "18": 9.369098663330078, - "19": 16.68512725830078, - "20": 8.437172889709473, - "21": 5.4232001304626465, - "22": 17.26092529296875, - "23": 7.168286323547363, - "24": 4.13083028793335, - "25": 16.84851837158203, - "26": 13.853059768676758, - "27": 8.860191345214844, - "28": 9.997201919555664, - "29": 10.188836097717285, - "30": 4.45705509185791, - "31": 17.632375717163086, - "32": 9.241437911987305, - "33": 7.948631286621094, - "34": 3.2504324913024902, - "35": 14.708056449890137, - "36": 4.642722129821777, - "37": 4.221202373504639, - "38": 5.220492839813232, - "39": 2.6587140560150146, - "40": 2.6095921993255615, - "41": 1.6444931030273438, - "42": 6.288504600524902, - "43": 1.4923979043960571, - "44": 10.944479942321777, - "45": 1.7730772495269775, - "46": 6.987361431121826, - "47": 1.9503520727157593, - "48": 13.502943992614746, - "49": 1.7726036310195923, - "50": 2.3103184700012207, - "51": 12.616952896118164, - "52": 1.661839246749878, - "53": 4.823538303375244 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.7843165397644043, - "3": 4.029876232147217, - "4": 4.144651889801025, - "5": 4.560702800750732, - "6": 3.7188267707824707, - "7": 4.114666938781738, - "8": 4.800957202911377, - "9": 3.598372459411621, - "10": 4.166380882263184, - "11": 5.117392539978027, - "12": 6.413439750671387, - "13": 5.329720973968506, - "14": 3.988182306289673, - "15": 4.154473304748535, - "16": 4.940454483032227, - "17": 5.166607856750488, - "18": 4.151963710784912, - "19": 6.220420837402344, - "20": 5.248250961303711, - "21": 4.856843948364258, - "22": 8.520427703857422, - "23": 6.243471622467041, - "24": 4.959427833557129, - "25": 9.62490463256836, - "26": 8.307036399841309, - "27": 7.091299533843994, - "28": 6.120484352111816, - "29": 6.131654739379883, - "30": 6.962408065795898, - "31": 7.249964714050293, - "32": 5.922432899475098, - "33": 6.217356204986572, - "34": 4.812512397766113, - "35": 8.056571960449219, - "36": 4.838377952575684, - "37": 4.32801628112793, - "38": 4.577471733093262, - "39": 5.102447032928467, - "40": 4.514163017272949, - "41": 3.5082497596740723, - "42": 5.699440002441406, - "43": 3.7615678310394287, - "44": 6.136580467224121, - "45": 3.9205100536346436, - "46": 4.462265968322754, - "47": 4.1435136795043945, - "48": 6.60205078125, - "49": 3.658388614654541, - "50": 3.652296543121338, - "51": 5.712080001831055, - "52": 3.88555645942688, - "53": 4.9976348876953125 - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "step_size_list": [ - 1e-10, - 0.0090362, - 0.0331435, - 0.0410002, - 0.0335927, - 0.0787575, - 0.0683518, - 0.0899652, - 0.126261, - 0.0989421, - 0.0749047, - 0.164502, - 0.098413, - 0.0339758, - 0.108027, - 0.128372, - 0.0539351, - 0.123718, - 0.0749138, - 0.0399857, - 0.107737, - 0.184886, - 0.0506161, - 0.159009, - 0.262904, - 0.059713, - 0.0742172, - 0.135373, - 0.100498, - 0.098141, - 0.323281, - 0.0433756, - 0.11399, - 0.151604, - 0.389366, - 0.0673216, - 0.27652, - 0.293267, - 0.232953, - 0.506397, - 0.49893, - 0.623076, - 0.214606, - 0.685474, - 0.0917768, - 0.661352, - 0.152494, - 0.656666, - 0.0673391, - 0.689727, - 0.577796, - 0.06696, - 0.7378, - 0.300503 - ], - "train_epoch_time": 4.845650672912598, - "train_loss": 4.346086560910196, - "train_score": 0.15250291421988088, - "val_loss": 4.360582778155462, - "val_score": 0.151025222291492 - }, - { - "epoch": 1, - "grad_norm": 4.512044906616211, - "learning_rate": 1.0, - "model_norm": 87.37397003173828, - "step_logs": { - "grad_norm": { - "54": 7.251988410949707, - "55": 15.8644437789917, - "56": 1.1270478963851929, - "57": 4.473561763763428, - "58": 2.9332540035247803, - "59": 18.531587600708008, - "60": 11.311650276184082, - "61": 4.0336222648620605, - "62": 0.965795636177063, - "63": 7.779601573944092, - "64": 3.2097480297088623, - "65": 3.0750300884246826, - "66": 3.0253684520721436, - "67": 14.161813735961914, - "68": 5.438813209533691, - "69": 2.6942594051361084, - "70": 2.6608285903930664, - "71": 2.6943233013153076, - "72": 3.844778537750244, - "73": 2.3849990367889404, - "74": 17.495288848876953, - "75": 3.5518290996551514, - "76": 2.5672812461853027, - "77": 19.015371322631836, - "78": 3.306727170944214, - "79": 1.3112736940383911, - "80": 15.822526931762695, - "81": 3.9387097358703613, - "82": 4.532346248626709, - "83": 2.28145432472229, - "84": 13.847086906433105, - "85": 2.366462469100952, - "86": 0.8172315359115601, - "87": 4.138247489929199, - "88": 1.0341992378234863, - "89": 1.772910475730896, - "90": 7.529833793640137, - "91": 2.508044958114624, - "92": 2.8410980701446533, - "93": 1.5059130191802979, - "94": 1.2346277236938477, - "95": 2.402649164199829, - "96": 1.8017809391021729, - "97": 12.121929168701172, - "98": 2.8432464599609375, - "99": 0.9545131921768188, - "100": 4.520432472229004, - "101": 1.124801516532898, - "102": 2.301797866821289, - "103": 1.802559733390808, - "104": 2.4258575439453125, - "105": 4.497735977172852, - "106": 2.356623888015747, - "107": 4.512044906616211 - }, - "loss": { - "54": 4.339327335357666, - "55": 7.530325412750244, - "56": 3.7264466285705566, - "57": 4.140336990356445, - "58": 4.434525489807129, - "59": 6.198064804077148, - "60": 5.086067199707031, - "61": 5.056458473205566, - "62": 3.6493189334869385, - "63": 4.742439270019531, - "64": 4.1082048416137695, - "65": 4.339682579040527, - "66": 3.7419819831848145, - "67": 5.806866645812988, - "68": 5.333910942077637, - "69": 3.884222984313965, - "70": 4.3664140701293945, - "71": 4.497482776641846, - "72": 3.938791275024414, - "73": 4.020101547241211, - "74": 6.818613529205322, - "75": 3.8480968475341797, - "76": 4.219987392425537, - "77": 10.126190185546875, - "78": 4.448222637176514, - "79": 3.598482131958008, - "80": 8.532045364379883, - "81": 3.8793416023254395, - "82": 3.812709093093872, - "83": 4.266729354858398, - "84": 6.01512336730957, - "85": 4.1626996994018555, - "86": 3.3788065910339355, - "87": 4.7241597175598145, - "88": 3.4365878105163574, - "89": 3.7387542724609375, - "90": 4.382164478302002, - "91": 4.326023578643799, - "92": 3.707341194152832, - "93": 3.93203067779541, - "94": 3.5497255325317383, - "95": 3.884605646133423, - "96": 3.8811445236206055, - "97": 6.000147819519043, - "98": 4.273425579071045, - "99": 3.4745020866394043, - "100": 4.062713623046875, - "101": 3.5846211910247803, - "102": 3.8877429962158203, - "103": 3.731144905090332, - "104": 4.204802513122559, - "105": 3.9859097003936768, - "106": 3.831906795501709, - "107": 4.059577941894531 - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "step_size_list": [ - 0.141646, - 0.0564617, - 0.854383, - 0.292671, - 0.507585, - 0.0348386, - 0.0736441, - 0.383311, - 0.886683, - 0.135485, - 0.443677, - 0.478593, - 0.449843, - 0.0547377, - 0.265049, - 0.516949, - 0.552262, - 0.553388, - 0.347644, - 0.585661, - 0.0426533, - 0.378905, - 0.561508, - 0.0530394, - 0.448616, - 0.80716, - 0.0638109, - 0.33339, - 0.270716, - 0.621135, - 0.0590378, - 0.597851, - 0.910057, - 0.355555, - 0.86534, - 0.704049, - 0.133883, - 0.579029, - 0.478783, - 0.776173, - 0.823243, - 0.573715, - 0.705105, - 0.0755014, - 0.513914, - 0.884086, - 0.284506, - 0.849998, - 0.59474, - 0.696661, - 0.588315, - 0.282674, - 0.579824, - 0.285106 - ], - "train_epoch_time": 4.844127178192139, - "train_loss": 4.14564177788142, - "train_score": 0.08546336982479397, - "val_loss": 4.150988496130013, - "val_score": 0.08249766796177208 - }, - { - "epoch": 2, - "grad_norm": 0.7047867178916931, - "learning_rate": 1.0, - "model_norm": 87.48506927490234, - "step_logs": { - "grad_norm": { - "108": 1.4983487129211426, - "109": 1.2913858890533447, - "110": 1.6994794607162476, - "111": 2.730990409851074, - "112": 2.5205719470977783, - "113": 2.494032859802246, - "114": 2.87644100189209, - "115": 2.64274001121521, - "116": 2.8192801475524902, - "117": 0.813446581363678, - "118": 1.87333083152771, - "119": 12.412668228149414, - "120": 3.2681543827056885, - "121": 0.816596508026123, - "122": 8.50576114654541, - "123": 2.056885242462158, - "124": 1.4469059705734253, - "125": 4.668663024902344, - "126": 1.66146981716156, - "127": 1.6472582817077637, - "128": 1.199605107307434, - "129": 6.843109130859375, - "130": 12.066320419311523, - "131": 1.6553430557250977, - "132": 10.090495109558105, - "133": 1.880860447883606, - "134": 1.3028234243392944, - "135": 1.2324339151382446, - "136": 1.7973941564559937, - "137": 0.8165777921676636, - "138": 2.5478763580322266, - "139": 1.0959110260009766, - "140": 2.330714702606201, - "141": 1.665844202041626, - "142": 2.3689467906951904, - "143": 1.56474769115448, - "144": 5.588191509246826, - "145": 1.6102663278579712, - "146": 1.687662959098816, - "147": 0.5792037844657898, - "148": 1.041499137878418, - "149": 2.03286075592041, - "150": 0.7721120715141296, - "151": 3.103257894515991, - "152": 0.9085686206817627, - "153": 1.8697940111160278, - "154": 1.200742483139038, - "155": 0.9075438380241394, - "156": 0.9638105630874634, - "157": 5.558727741241455, - "158": 0.8548763990402222, - "159": 0.916251003742218, - "160": 0.823141872882843, - "161": 0.7047867178916931 - }, - "loss": { - "108": 4.136025905609131, - "109": 3.6685400009155273, - "110": 3.491719961166382, - "111": 4.523752689361572, - "112": 3.6392483711242676, - "113": 4.23232889175415, - "114": 3.9685144424438477, - "115": 4.4471001625061035, - "116": 3.67026948928833, - "117": 3.5676987171173096, - "118": 3.554564952850342, - "119": 6.902256011962891, - "120": 4.3198957443237305, - "121": 3.402722120285034, - "122": 4.637648582458496, - "123": 3.7611632347106934, - "124": 3.6920828819274902, - "125": 3.8159029483795166, - "126": 4.0966315269470215, - "127": 3.707505702972412, - "128": 3.344067096710205, - "129": 4.417843818664551, - "130": 4.530896186828613, - "131": 3.6134791374206543, - "132": 4.731829643249512, - "133": 3.390665054321289, - "134": 3.606768846511841, - "135": 3.2326221466064453, - "136": 3.841463088989258, - "137": 3.1436095237731934, - "138": 3.6988344192504883, - "139": 3.381613254547119, - "140": 3.393953323364258, - "141": 3.7028579711914062, - "142": 3.4534738063812256, - "143": 3.424208641052246, - "144": 3.822815418243408, - "145": 3.7035539150238037, - "146": 3.2978720664978027, - "147": 3.1762351989746094, - "148": 3.1373298168182373, - "149": 3.7157065868377686, - "150": 3.200949192047119, - "151": 3.6398849487304688, - "152": 3.362764835357666, - "153": 3.2505688667297363, - "154": 3.459547758102417, - "155": 3.165719985961914, - "156": 3.1284775733947754, - "157": 4.016909599304199, - "158": 3.189304828643799, - "159": 2.985337972640991, - "160": 3.219618320465088, - "161": 2.9568843841552734 - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "step_size_list": [ - 0.786534, - 0.8148, - 0.707422, - 0.548141, - 0.533936, - 0.576421, - 0.489609, - 0.560149, - 0.480123, - 0.915136, - 0.669504, - 0.082229, - 0.44718, - 0.910759, - 0.113635, - 0.640029, - 0.779109, - 0.259336, - 0.747987, - 0.732096, - 0.822933, - 0.158733, - 0.0585925, - 0.72508, - 0.0850423, - 0.657171, - 0.80952, - 0.809761, - 0.70398, - 0.904113, - 0.532615, - 0.849198, - 0.555469, - 0.727423, - 0.551724, - 0.736638, - 0.19668, - 0.740706, - 0.698409, - 0.949839, - 0.852607, - 0.642637, - 0.914811, - 0.430501, - 0.890678, - 0.650292, - 0.827556, - 0.884888, - 0.870728, - 0.206348, - 0.897205, - 0.876727, - 0.904794, - 0.922514 - ], - "train_epoch_time": 4.84415602684021, - "train_loss": 3.017545517547914, - "train_score": 0.18367557391802242, - "val_loss": 3.045292272907174, - "val_score": 0.1787950275241302 - }, - { - "epoch": 3, - "grad_norm": 0.4002307057380676, - "learning_rate": 1.0, - "model_norm": 87.59906768798828, - "step_logs": { - "grad_norm": { - "162": 1.2257070541381836, - "163": 1.4252961874008179, - "164": 0.7772822976112366, - "165": 0.854894757270813, - "166": 0.7863472700119019, - "167": 0.9834524393081665, - "168": 0.8009421825408936, - "169": 0.6448637247085571, - "170": 0.5537456274032593, - "171": 0.7591561675071716, - "172": 1.1511164903640747, - "173": 1.0290831327438354, - "174": 0.8881003260612488, - "175": 0.8370724320411682, - "176": 1.166159749031067, - "177": 1.0723377466201782, - "178": 0.8646930456161499, - "179": 0.5154723525047302, - "180": 0.6050717830657959, - "181": 0.7991882562637329, - "182": 1.3131170272827148, - "183": 0.8054488301277161, - "184": 0.7397527694702148, - "185": 0.9199510216712952, - "186": 0.9297583699226379, - "187": 0.7554708123207092, - "188": 0.8410573601722717, - "189": 1.3671910762786865, - "190": 0.7584227919578552, - "191": 0.5444364547729492, - "192": 0.4343738257884979, - "193": 0.572770893573761, - "194": 0.913043200969696, - "195": 1.0558162927627563, - "196": 0.6686832904815674, - "197": 0.6075354218482971, - "198": 1.2297452688217163, - "199": 0.7891676425933838, - "200": 0.6268939971923828, - "201": 0.5721192955970764, - "202": 0.7553921341896057, - "203": 0.9939144253730774, - "204": 0.8785144686698914, - "205": 0.6097036004066467, - "206": 0.5276299715042114, - "207": 0.7230173349380493, - "208": 0.9269310832023621, - "209": 0.8904930353164673, - "210": 0.6510063409805298, - "211": 0.579686164855957, - "212": 0.6641437411308289, - "213": 0.9507161378860474, - "214": 0.7723180055618286, - "215": 0.4002307057380676 - }, - "loss": { - "162": 3.0161948204040527, - "163": 3.284764289855957, - "164": 3.1054205894470215, - "165": 2.993417263031006, - "166": 3.031731367111206, - "167": 2.993283271789551, - "168": 3.10546612739563, - "169": 2.8692851066589355, - "170": 2.8510661125183105, - "171": 2.9046621322631836, - "172": 2.984726905822754, - "173": 3.0306029319763184, - "174": 3.0619125366210938, - "175": 2.881030321121216, - "176": 2.9785983562469482, - "177": 3.021331310272217, - "178": 3.0799038410186768, - "179": 2.8446714878082275, - "180": 2.7676024436950684, - "181": 2.871354341506958, - "182": 2.9102816581726074, - "183": 3.0837364196777344, - "184": 2.8665523529052734, - "185": 2.862290859222412, - "186": 2.9185848236083984, - "187": 2.8774290084838867, - "188": 2.854646921157837, - "189": 2.9404711723327637, - "190": 3.0454816818237305, - "191": 2.810105562210083, - "192": 2.724196434020996, - "193": 2.7497920989990234, - "194": 2.7893741130828857, - "195": 2.9536008834838867, - "196": 2.835805892944336, - "197": 2.773388624191284, - "198": 2.8488388061523438, - "199": 2.9932303428649902, - "200": 2.8020873069763184, - "201": 2.746697425842285, - "202": 2.765296697616577, - "203": 2.8473215103149414, - "204": 2.929778575897217, - "205": 2.7714762687683105, - "206": 2.740403652191162, - "207": 2.7294816970825195, - "208": 2.835923194885254, - "209": 2.847292423248291, - "210": 2.8470582962036133, - "211": 2.6816773414611816, - "212": 2.7541491985321045, - "213": 2.7499663829803467, - "214": 2.910299301147461, - "215": 2.7164416313171387 - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "step_size_list": [ - 0.800609, - 0.76381, - 0.911347, - 0.891206, - 0.907459, - 0.860913, - 0.906383, - 0.932431, - 0.948969, - 0.909748, - 0.818347, - 0.851267, - 0.8859, - 0.89158, - 0.814144, - 0.840126, - 0.891756, - 0.95538, - 0.937961, - 0.899912, - 0.771463, - 0.904823, - 0.912866, - 0.871203, - 0.871009, - 0.909773, - 0.889759, - 0.758816, - 0.913713, - 0.949902, - 0.966529, - 0.943705, - 0.869994, - 0.841248, - 0.926923, - 0.937609, - 0.790252, - 0.905771, - 0.93447, - 0.943766, - 0.906475, - 0.852171, - 0.883615, - 0.93715, - 0.951661, - 0.912608, - 0.868444, - 0.87777, - 0.930727, - 0.94104, - 0.92586, - 0.858855, - 0.907049, - 0.97136 - ], - "train_epoch_time": 4.844200849533081, - "train_loss": 2.661428938021448, - "train_score": 0.24712607608293016, - "val_loss": 2.681328601596276, - "val_score": 0.24196326097924728 - }, - { - "epoch": 4, - "grad_norm": 0.5701131224632263, - "learning_rate": 1.0, - "model_norm": 87.68059539794922, - "step_logs": { - "grad_norm": { - "216": 0.35157185792922974, - "217": 0.6305283308029175, - "218": 0.6689011454582214, - "219": 0.7682798504829407, - "220": 0.6697713136672974, - "221": 0.4669936001300812, - "222": 0.608452320098877, - "223": 0.9744377136230469, - "224": 0.6994677186012268, - "225": 0.45972776412963867, - "226": 0.5007526278495789, - "227": 0.7899810671806335, - "228": 0.890059769153595, - "229": 0.7429040670394897, - "230": 0.6641275882720947, - "231": 0.7926104068756104, - "232": 0.7025500535964966, - "233": 0.47195491194725037, - "234": 0.5225667357444763, - "235": 0.9557555317878723, - "236": 0.8144726753234863, - "237": 0.4025629758834839, - "238": 0.33580297231674194, - "239": 0.4634389281272888, - "240": 0.6295719146728516, - "241": 0.6642836928367615, - "242": 0.8138523101806641, - "243": 0.6546770930290222, - "244": 0.3867209553718567, - "245": 0.4119410216808319, - "246": 0.6824784874916077, - "247": 0.8117210865020752, - "248": 0.7649862170219421, - "249": 0.7218926548957825, - "250": 0.5709288716316223, - "251": 0.5692389011383057, - "252": 0.8376795649528503, - "253": 0.7577732801437378, - "254": 0.5632246136665344, - "255": 0.7132888436317444, - "256": 0.7931423783302307, - "257": 0.6427789330482483, - "258": 0.5352956056594849, - "259": 0.6267139315605164, - "260": 0.8365180492401123, - "261": 0.6753789782524109, - "262": 0.40073320269584656, - "263": 0.4384753108024597, - "264": 0.5244778394699097, - "265": 0.6394432783126831, - "266": 0.8070451021194458, - "267": 0.6586354970932007, - "268": 0.5192147493362427, - "269": 0.5701131224632263 - }, - "loss": { - "216": 2.6652145385742188, - "217": 2.6709206104278564, - "218": 2.754687786102295, - "219": 2.761075496673584, - "220": 2.8076882362365723, - "221": 2.6866326332092285, - "222": 2.720402240753174, - "223": 2.7774767875671387, - "224": 2.8788671493530273, - "225": 2.6797099113464355, - "226": 2.671217918395996, - "227": 2.7287678718566895, - "228": 2.8325276374816895, - "229": 2.7546448707580566, - "230": 2.7611241340637207, - "231": 2.7264084815979004, - "232": 2.8039281368255615, - "233": 2.6774282455444336, - "234": 2.6826274394989014, - "235": 2.7156553268432617, - "236": 2.899374485015869, - "237": 2.688506603240967, - "238": 2.6291966438293457, - "239": 2.63651704788208, - "240": 2.704230785369873, - "241": 2.7291460037231445, - "242": 2.705507516860962, - "243": 2.785426139831543, - "244": 2.657219886779785, - "245": 2.613079071044922, - "246": 2.66627836227417, - "247": 2.748772144317627, - "248": 2.7636160850524902, - "249": 2.778533935546875, - "250": 2.6858222484588623, - "251": 2.6874942779541016, - "252": 2.7338716983795166, - "253": 2.8107264041900635, - "254": 2.6686882972717285, - "255": 2.6972086429595947, - "256": 2.7379114627838135, - "257": 2.7553751468658447, - "258": 2.6499733924865723, - "259": 2.700737953186035, - "260": 2.694472312927246, - "261": 2.7928671836853027, - "262": 2.6408352851867676, - "263": 2.641464948654175, - "264": 2.676417589187622, - "265": 2.6676526069641113, - "266": 2.7247204780578613, - "267": 2.7644755840301514, - "268": 2.6589903831481934, - "269": 2.678616762161255 - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "step_size_list": [ - 0.977337, - 0.930731, - 0.924888, - 0.903433, - 0.926023, - 0.960996, - 0.936291, - 0.854019, - 0.921681, - 0.962061, - 0.955168, - 0.897384, - 0.877315, - 0.908944, - 0.926037, - 0.89669, - 0.919105, - 0.960065, - 0.951568, - 0.856028, - 0.897345, - 0.970743, - 0.979006, - 0.960863, - 0.931719, - 0.925202, - 0.890941, - 0.92856, - 0.972629, - 0.968551, - 0.919671, - 0.892975, - 0.90426, - 0.914263, - 0.94279, - 0.943142, - 0.886261, - 0.907319, - 0.9439, - 0.913813, - 0.896956, - 0.930255, - 0.948708, - 0.932214, - 0.885072, - 0.924504, - 0.970493, - 0.964885, - 0.951123, - 0.928817, - 0.893239, - 0.927248, - 0.951753, - 0.942799 - ], - "train_epoch_time": 4.843627214431763, - "train_loss": 2.6792670729512635, - "train_score": 0.24416808642906643, - "val_loss": 2.7003905209279635, - "val_score": 0.2410169705389288 - }, - { - "epoch": 5, - "grad_norm": 0.6564647555351257, - "learning_rate": 1.0, - "model_norm": 87.75013732910156, - "step_logs": { - "grad_norm": { - "270": 0.8181772828102112, - "271": 0.6801655888557434, - "272": 0.4163513481616974, - "273": 0.4679330289363861, - "274": 0.6349716186523438, - "275": 0.7468962073326111, - "276": 0.7852400541305542, - "277": 0.6241344809532166, - "278": 0.4764142334461212, - "279": 0.5797223448753357, - "280": 0.7787536382675171, - "281": 0.6379414796829224, - "282": 0.398966908454895, - "283": 0.49806177616119385, - "284": 0.7276239395141602, - "285": 0.7271779775619507, - "286": 0.6163162589073181, - "287": 0.5810685157775879, - "288": 0.6158027052879333, - "289": 0.6318897604942322, - "290": 0.593970537185669, - "291": 0.5808635950088501, - "292": 0.5572472810745239, - "293": 0.6181164979934692, - "294": 0.6605523824691772, - "295": 0.620652437210083, - "296": 0.5607692003250122, - "297": 0.5383511185646057, - "298": 0.5575741529464722, - "299": 0.6443954706192017, - "300": 0.6675539612770081, - "301": 0.65948486328125, - "302": 0.6175847053527832, - "303": 0.5659894943237305, - "304": 0.5658437609672546, - "305": 0.5902078747749329, - "306": 0.5941104888916016, - "307": 0.590848982334137, - "308": 0.6037076115608215, - "309": 0.6228870153427124, - "310": 0.5768495202064514, - "311": 0.5681480765342712, - "312": 0.5810586214065552, - "313": 0.5691453814506531, - "314": 0.5784564018249512, - "315": 0.6008875370025635, - "316": 0.5586863160133362, - "317": 0.6046246290206909, - "318": 0.6240992546081543, - "319": 0.5120927691459656, - "320": 0.4306298792362213, - "321": 0.4668160378932953, - "322": 0.6215357780456543, - "323": 0.6564647555351257 - }, - "loss": { - "270": 2.693901300430298, - "271": 2.79805326461792, - "272": 2.645878314971924, - "273": 2.638319253921509, - "274": 2.6527023315429688, - "275": 2.707859992980957, - "276": 2.6951546669006348, - "277": 2.751971483230591, - "278": 2.6244640350341797, - "279": 2.672447681427002, - "280": 2.652071237564087, - "281": 2.762293577194214, - "282": 2.6372337341308594, - "283": 2.6398119926452637, - "284": 2.659114360809326, - "285": 2.749040365219116, - "286": 2.6650798320770264, - "287": 2.69551420211792, - "288": 2.6418070793151855, - "289": 2.7103919982910156, - "290": 2.640364646911621, - "291": 2.675276279449463, - "292": 2.621049642562866, - "293": 2.670933723449707, - "294": 2.6734423637390137, - "295": 2.6947922706604004, - "296": 2.6273746490478516, - "297": 2.6775970458984375, - "298": 2.6315767765045166, - "299": 2.681574583053589, - "300": 2.653317928314209, - "301": 2.707068920135498, - "302": 2.6572303771972656, - "303": 2.666687250137329, - "304": 2.6224920749664307, - "305": 2.6868209838867188, - "306": 2.6465818881988525, - "307": 2.6512224674224854, - "308": 2.642472982406616, - "309": 2.693118095397949, - "310": 2.644137382507324, - "311": 2.6688456535339355, - "312": 2.6253676414489746, - "313": 2.6528191566467285, - "314": 2.64564847946167, - "315": 2.6564254760742188, - "316": 2.6119375228881836, - "317": 2.6588306427001953, - "318": 2.6486783027648926, - "319": 2.634204626083374, - "320": 2.6109776496887207, - "321": 2.604259490966797, - "322": 2.6050381660461426, - "323": 2.692103147506714 - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "step_size_list": [ - 0.889485, - 0.923643, - 0.968281, - 0.960157, - 0.929372, - 0.906613, - 0.897351, - 0.933903, - 0.958551, - 0.940841, - 0.897395, - 0.931389, - 0.970706, - 0.955123, - 0.909462, - 0.912262, - 0.933477, - 0.941061, - 0.933035, - 0.931395, - 0.937375, - 0.940681, - 0.944076, - 0.933251, - 0.924552, - 0.933295, - 0.943536, - 0.948659, - 0.944226, - 0.928138, - 0.92253, - 0.925643, - 0.933037, - 0.943339, - 0.942467, - 0.939122, - 0.937485, - 0.938229, - 0.935487, - 0.932807, - 0.940802, - 0.942974, - 0.939584, - 0.94246, - 0.940523, - 0.936364, - 0.943618, - 0.935676, - 0.931509, - 0.952584, - 0.965706, - 0.959842, - 0.930972, - 0.925893 - ], - "train_epoch_time": 4.844163179397583, - "train_loss": 2.630333215434377, - "train_score": 0.22913490846817258, - "val_loss": 2.6502758301495137, - "val_score": 0.22905155698139001 - }, - { - "epoch": 6, - "grad_norm": 0.5630146265029907, - "learning_rate": 1.0, - "model_norm": 87.8231430053711, - "step_logs": { - "grad_norm": { - "324": 0.5665695071220398, - "325": 0.6207720637321472, - "326": 0.679410994052887, - "327": 0.640845537185669, - "328": 0.6557438373565674, - "329": 0.6467770338058472, - "330": 0.5974902510643005, - "331": 0.6088232398033142, - "332": 0.6805403828620911, - "333": 0.6934331059455872, - "334": 0.6434071063995361, - "335": 0.6306788921356201, - "336": 0.6044982075691223, - "337": 0.5734537243843079, - "338": 0.5707707405090332, - "339": 0.5774801969528198, - "340": 0.6090638041496277, - "341": 0.6756903529167175, - "342": 0.5592733025550842, - "343": 0.5819928646087646, - "344": 0.6928020119667053, - "345": 0.6013165712356567, - "346": 0.4722282290458679, - "347": 0.4533126652240753, - "348": 0.5106614232063293, - "349": 0.552486002445221, - "350": 0.5333740711212158, - "351": 0.5413498282432556, - "352": 0.5809600949287415, - "353": 0.5913351774215698, - "354": 0.6134089231491089, - "355": 0.6003088355064392, - "356": 0.5555221438407898, - "357": 0.570401668548584, - "358": 0.637662947177887, - "359": 0.5945121645927429, - "360": 0.48445793986320496, - "361": 0.5014073848724365, - "362": 0.5888465642929077, - "363": 0.6221649050712585, - "364": 0.5847055315971375, - "365": 0.5664844512939453, - "366": 0.5298759341239929, - "367": 0.5440005660057068, - "368": 0.5984290838241577, - "369": 0.5895470380783081, - "370": 0.5926340222358704, - "371": 0.5938579440116882, - "372": 0.565003514289856, - "373": 0.5573912262916565, - "374": 0.5748423337936401, - "375": 0.5812371373176575, - "376": 0.5559704303741455, - "377": 0.5630146265029907 - }, - "loss": { - "324": 2.6389575004577637, - "325": 2.6744384765625, - "326": 2.656019687652588, - "327": 2.672060489654541, - "328": 2.643585443496704, - "329": 2.655033588409424, - "330": 2.6457149982452393, - "331": 2.6522912979125977, - "332": 2.617915630340576, - "333": 2.6835780143737793, - "334": 2.614906072616577, - "335": 2.6859030723571777, - "336": 2.6196837425231934, - "337": 2.6409616470336914, - "338": 2.6314563751220703, - "339": 2.6548380851745605, - "340": 2.6133038997650146, - "341": 2.670090675354004, - "342": 2.6241979598999023, - "343": 2.6148767471313477, - "344": 2.637996196746826, - "345": 2.6516692638397217, - "346": 2.5890603065490723, - "347": 2.5865163803100586, - "348": 2.598806619644165, - "349": 2.624627113342285, - "350": 2.600104570388794, - "351": 2.6266677379608154, - "352": 2.6116387844085693, - "353": 2.6305651664733887, - "354": 2.600620746612549, - "355": 2.6391561031341553, - "356": 2.6104931831359863, - "357": 2.6266136169433594, - "358": 2.617392063140869, - "359": 2.642134666442871, - "360": 2.5678536891937256, - "361": 2.5877432823181152, - "362": 2.5749778747558594, - "363": 2.643909454345703, - "364": 2.5950937271118164, - "365": 2.6365654468536377, - "366": 2.5696539878845215, - "367": 2.6266026496887207, - "368": 2.602271318435669, - "369": 2.644552707672119, - "370": 2.6005771160125732, - "371": 2.6311559677124023, - "372": 2.5861282348632812, - "373": 2.6212921142578125, - "374": 2.5832526683807373, - "375": 2.6001381874084473, - "376": 2.5962820053100586, - "377": 2.611752986907959 - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "step_size_list": [ - 0.942667, - 0.932797, - 0.920051, - 0.928636, - 0.924788, - 0.926974, - 0.936797, - 0.934687, - 0.918733, - 0.917775, - 0.92665, - 0.93106, - 0.934802, - 0.94139, - 0.941707, - 0.940905, - 0.933729, - 0.921239, - 0.943755, - 0.939173, - 0.916613, - 0.936172, - 0.958712, - 0.961794, - 0.952225, - 0.945046, - 0.948131, - 0.947162, - 0.939305, - 0.937678, - 0.932538, - 0.93609, - 0.94419, - 0.941677, - 0.927923, - 0.937307, - 0.956298, - 0.953673, - 0.936918, - 0.931789, - 0.9382, - 0.942634, - 0.948198, - 0.94667, - 0.935621, - 0.938338, - 0.936745, - 0.937192, - 0.941868, - 0.944054, - 0.939886, - 0.938998, - 0.943816, - 0.942787 - ], - "train_epoch_time": 4.843476295471191, - "train_loss": 2.5858833301358106, - "train_score": 0.24968503411308765, - "val_loss": 2.61170709776413, - "val_score": 0.2497084889086188 - }, - { - "epoch": 7, - "grad_norm": 0.5835149884223938, - "learning_rate": 1.0, - "model_norm": 87.91259002685547, - "step_logs": { - "grad_norm": { - "378": 0.631034791469574, - "379": 0.6645892262458801, - "380": 0.6508366465568542, - "381": 0.6770791411399841, - "382": 0.6244338750839233, - "383": 0.5523872971534729, - "384": 0.5418217182159424, - "385": 0.5459670424461365, - "386": 0.5279170274734497, - "387": 0.5229026675224304, - "388": 0.54593425989151, - "389": 0.6523922681808472, - "390": 0.6707743406295776, - "391": 0.5343193411827087, - "392": 0.4200863540172577, - "393": 0.6051865220069885, - "394": 0.7158398032188416, - "395": 0.6296555995941162, - "396": 0.5331427454948425, - "397": 0.5095428824424744, - "398": 0.5672648549079895, - "399": 0.6428970694541931, - "400": 0.6532034277915955, - "401": 0.5940384268760681, - "402": 0.5841777324676514, - "403": 0.6384387612342834, - "404": 0.6099355220794678, - "405": 0.579751193523407, - "406": 0.6122485399246216, - "407": 0.6530709266662598, - "408": 0.5794283151626587, - "409": 0.5202420353889465, - "410": 0.5804641842842102, - "411": 0.6161943674087524, - "412": 0.6395607590675354, - "413": 0.6825110912322998, - "414": 0.584525465965271, - "415": 0.5930807590484619, - "416": 0.5564717650413513, - "417": 0.5128763914108276, - "418": 0.6574738621711731, - "419": 0.8442243933677673, - "420": 0.6434152126312256, - "421": 0.5266668796539307, - "422": 0.6124412417411804, - "423": 0.7113578915596008, - "424": 0.6526036262512207, - "425": 0.6006346940994263, - "426": 0.7452445030212402, - "427": 0.6529521346092224, - "428": 0.5348069667816162, - "429": 0.5427950024604797, - "430": 0.6044876575469971, - "431": 0.5835149884223938 - }, - "loss": { - "378": 2.595905303955078, - "379": 2.645911693572998, - "380": 2.606935977935791, - "381": 2.6263294219970703, - "382": 2.6046905517578125, - "383": 2.601595640182495, - "384": 2.5734827518463135, - "385": 2.5872702598571777, - "386": 2.564354419708252, - "387": 2.578911781311035, - "388": 2.539823055267334, - "389": 2.5958669185638428, - "390": 2.6346778869628906, - "391": 2.6006574630737305, - "392": 2.528630256652832, - "393": 2.549687623977661, - "394": 2.611457109451294, - "395": 2.6117324829101562, - "396": 2.5580716133117676, - "397": 2.567366361618042, - "398": 2.547837257385254, - "399": 2.5905261039733887, - "400": 2.576244831085205, - "401": 2.6036343574523926, - "402": 2.5575199127197266, - "403": 2.584616184234619, - "404": 2.583188772201538, - "405": 2.5615596771240234, - "406": 2.5433802604675293, - "407": 2.5824694633483887, - "408": 2.5776078701019287, - "409": 2.552412986755371, - "410": 2.5194010734558105, - "411": 2.562621593475342, - "412": 2.558915615081787, - "413": 2.5721797943115234, - "414": 2.5612964630126953, - "415": 2.5327343940734863, - "416": 2.550753355026245, - "417": 2.5114545822143555, - "418": 2.5217838287353516, - "419": 2.5841362476348877, - "420": 2.592214584350586, - "421": 2.511564254760742, - "422": 2.519066333770752, - "423": 2.563769817352295, - "424": 2.5526232719421387, - "425": 2.5217087268829346, - "426": 2.5163087844848633, - "427": 2.579192876815796, - "428": 2.498741626739502, - "429": 2.5157222747802734, - "430": 2.4993467330932617, - "431": 2.5408687591552734 - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "step_size_list": [ - 0.928765, - 0.922965, - 0.924862, - 0.919729, - 0.930363, - 0.944605, - 0.94604, - 0.945532, - 0.94846, - 0.949657, - 0.944578, - 0.924232, - 0.92133, - 0.947967, - 0.966282, - 0.93299, - 0.910655, - 0.929453, - 0.947366, - 0.951869, - 0.940601, - 0.926119, - 0.923524, - 0.936534, - 0.937455, - 0.926911, - 0.932829, - 0.938432, - 0.931367, - 0.923722, - 0.938856, - 0.949651, - 0.937322, - 0.931026, - 0.925991, - 0.916969, - 0.937472, - 0.935069, - 0.942774, - 0.950237, - 0.921058, - 0.87881, - 0.926053, - 0.94767, - 0.93071, - 0.910176, - 0.923001, - 0.933244, - 0.90061, - 0.923659, - 0.945866, - 0.944682, - 0.931879, - 0.937205 - ], - "train_epoch_time": 4.843728542327881, - "train_loss": 2.485765217503311, - "train_score": 0.2834088055954089, - "val_loss": 2.507180509008852, - "val_score": 0.2780882244337303 - }, - { - "epoch": 8, - "grad_norm": 0.4784611463546753, - "learning_rate": 1.0, - "model_norm": 88.0074234008789, - "step_logs": { - "grad_norm": { - "432": 0.549848198890686, - "433": 0.5837130546569824, - "434": 0.7905740737915039, - "435": 0.5529758334159851, - "436": 0.44585034251213074, - "437": 0.511372447013855, - "438": 0.6252089142799377, - "439": 0.6288154721260071, - "440": 0.6147361993789673, - "441": 0.6964508891105652, - "442": 0.5719792246818542, - "443": 0.49305108189582825, - "444": 0.599076509475708, - "445": 0.668509304523468, - "446": 0.7662803530693054, - "447": 0.9397596716880798, - "448": 0.5515175461769104, - "449": 0.4790220856666565, - "450": 0.5375211834907532, - "451": 0.7502608895301819, - "452": 0.6550245881080627, - "453": 0.5108506083488464, - "454": 0.5343354940414429, - "455": 0.5929295420646667, - "456": 0.7053409218788147, - "457": 0.6709436178207397, - "458": 0.5561814308166504, - "459": 0.5841220617294312, - "460": 0.7298258543014526, - "461": 0.7944507598876953, - "462": 0.674461305141449, - "463": 0.5819013714790344, - "464": 0.6430020332336426, - "465": 0.624009370803833, - "466": 0.516533613204956, - "467": 0.5987129807472229, - "468": 0.6656692028045654, - "469": 0.6360422372817993, - "470": 0.5653752088546753, - "471": 0.5587164759635925, - "472": 0.7932880520820618, - "473": 0.8027954697608948, - "474": 0.5690339207649231, - "475": 0.5225327610969543, - "476": 0.6446221470832825, - "477": 0.7345907688140869, - "478": 0.5668221712112427, - "479": 0.5040398240089417, - "480": 0.5966363549232483, - "481": 0.5954105854034424, - "482": 0.6325654983520508, - "483": 0.5826522707939148, - "484": 0.4609067440032959, - "485": 0.4784611463546753 - }, - "loss": { - "432": 2.5038466453552246, - "433": 2.4998693466186523, - "434": 2.508211135864258, - "435": 2.572650194168091, - "436": 2.4810874462127686, - "437": 2.501356363296509, - "438": 2.4633829593658447, - "439": 2.5254392623901367, - "440": 2.480496644973755, - "441": 2.512538433074951, - "442": 2.5309786796569824, - "443": 2.4784884452819824, - "444": 2.48624849319458, - "445": 2.5248870849609375, - "446": 2.487924337387085, - "447": 2.586681365966797, - "448": 2.556032180786133, - "449": 2.480590343475342, - "450": 2.462827205657959, - "451": 2.4823365211486816, - "452": 2.539616823196411, - "453": 2.4547269344329834, - "454": 2.473958730697632, - "455": 2.4873905181884766, - "456": 2.477478504180908, - "457": 2.537742853164673, - "458": 2.455606698989868, - "459": 2.4600114822387695, - "460": 2.481431007385254, - "461": 2.5656208992004395, - "462": 2.504275321960449, - "463": 2.4905426502227783, - "464": 2.448953151702881, - "465": 2.507854461669922, - "466": 2.427478790283203, - "467": 2.4791388511657715, - "468": 2.4901974201202393, - "469": 2.507401943206787, - "470": 2.4635491371154785, - "471": 2.463207960128784, - "472": 2.4576711654663086, - "473": 2.554412603378296, - "474": 2.506570816040039, - "475": 2.4603357315063477, - "476": 2.448587417602539, - "477": 2.4790964126586914, - "478": 2.4630634784698486, - "479": 2.43473219871521, - "480": 2.444490909576416, - "481": 2.4640111923217773, - "482": 2.461678981781006, - "483": 2.4721994400024414, - "484": 2.404785394668579, - "485": 2.41409969329834 - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "step_size_list": [ - 0.943064, - 0.9362, - 0.889211, - 0.943904, - 0.961483, - 0.950325, - 0.926493, - 0.927398, - 0.929217, - 0.911972, - 0.939292, - 0.953251, - 0.932683, - 0.918696, - 0.894449, - 0.854182, - 0.943841, - 0.955793, - 0.944592, - 0.898166, - 0.922107, - 0.949527, - 0.945444, - 0.933995, - 0.908756, - 0.918532, - 0.940746, - 0.935148, - 0.903076, - 0.890471, - 0.916738, - 0.936348, - 0.922157, - 0.927959, - 0.947907, - 0.932579, - 0.918297, - 0.925351, - 0.939077, - 0.940411, - 0.886502, - 0.887981, - 0.939329, - 0.947429, - 0.921784, - 0.901848, - 0.938772, - 0.950414, - 0.93213, - 0.932889, - 0.924835, - 0.935751, - 0.957699, - 0.954732 - ], - "train_epoch_time": 4.844201564788818, - "train_loss": 2.4252914186529653, - "train_score": 0.29261567440402386, - "val_loss": 2.4552955723246805, - "val_score": 0.2873448260752944 - }, - { - "epoch": 9, - "grad_norm": 0.507682204246521, - "learning_rate": 1.0, - "model_norm": 88.0914535522461, - "step_logs": { - "grad_norm": { - "486": 0.57832932472229, - "487": 0.6478872895240784, - "488": 0.5928182601928711, - "489": 0.5464459657669067, - "490": 0.5789236426353455, - "491": 0.6152143478393555, - "492": 0.6163302659988403, - "493": 0.6283442378044128, - "494": 0.7662516832351685, - "495": 0.695849597454071, - "496": 0.45231619477272034, - "497": 0.43580302596092224, - "498": 0.5713932514190674, - "499": 0.6079331040382385, - "500": 0.6007699966430664, - "501": 0.6494527459144592, - "502": 0.6491064429283142, - "503": 0.5804978609085083, - "504": 0.5806142687797546, - "505": 0.6671934723854065, - "506": 0.6753543615341187, - "507": 0.6034969687461853, - "508": 0.5302227735519409, - "509": 0.5263882279396057, - "510": 0.4867282211780548, - "511": 0.48377421498298645, - "512": 0.5275049805641174, - "513": 0.544241189956665, - "514": 0.587230920791626, - "515": 0.6122822761535645, - "516": 0.6598381400108337, - "517": 0.6440941095352173, - "518": 0.6427991390228271, - "519": 0.6577604413032532, - "520": 0.5495144724845886, - "521": 0.5072782635688782, - "522": 0.5514366626739502, - "523": 0.5523213148117065, - "524": 0.7139912247657776, - "525": 0.6774431467056274, - "526": 0.6321172118186951, - "527": 0.5850508213043213, - "528": 0.5684505701065063, - "529": 0.558586597442627, - "530": 0.6529635190963745, - "531": 0.6388319730758667, - "532": 0.5655055046081543, - "533": 0.555770754814148, - "534": 0.5636330246925354, - "535": 0.567356526851654, - "536": 0.5862202048301697, - "537": 0.6310988664627075, - "538": 0.6419916749000549, - "539": 0.507682204246521 - }, - "loss": { - "486": 2.444479465484619, - "487": 2.4827351570129395, - "488": 2.4525582790374756, - "489": 2.4294040203094482, - "490": 2.4081668853759766, - "491": 2.4531545639038086, - "492": 2.44587779045105, - "493": 2.483649253845215, - "494": 2.4348349571228027, - "495": 2.4871487617492676, - "496": 2.431297540664673, - "497": 2.402846336364746, - "498": 2.4212663173675537, - "499": 2.4522478580474854, - "500": 2.4311389923095703, - "501": 2.462240219116211, - "502": 2.4582650661468506, - "503": 2.4273014068603516, - "504": 2.4112019538879395, - "505": 2.4532992839813232, - "506": 2.4747438430786133, - "507": 2.4368577003479004, - "508": 2.428983688354492, - "509": 2.435624361038208, - "510": 2.3989522457122803, - "511": 2.397653341293335, - "512": 2.4161252975463867, - "513": 2.430206298828125, - "514": 2.419283390045166, - "515": 2.4285733699798584, - "516": 2.4293394088745117, - "517": 2.4625699520111084, - "518": 2.4435346126556396, - "519": 2.4574975967407227, - "520": 2.41593074798584, - "521": 2.4223670959472656, - "522": 2.4104039669036865, - "523": 2.4375414848327637, - "524": 2.4481759071350098, - "525": 2.4842605590820312, - "526": 2.4485764503479004, - "527": 2.4255530834198, - "528": 2.4002795219421387, - "529": 2.4076528549194336, - "530": 2.4005117416381836, - "531": 2.4537055492401123, - "532": 2.4028143882751465, - "533": 2.4327540397644043, - "534": 2.4008846282958984, - "535": 2.4443836212158203, - "536": 2.4099483489990234, - "537": 2.4318974018096924, - "538": 2.403520107269287, - "539": 2.4510669708251953 - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "step_size_list": [ - 0.935968, - 0.922054, - 0.933144, - 0.942102, - 0.934941, - 0.928382, - 0.927942, - 0.926369, - 0.892402, - 0.911293, - 0.959625, - 0.961982, - 0.936837, - 0.929925, - 0.9309, - 0.921106, - 0.921066, - 0.935091, - 0.934662, - 0.916822, - 0.915624, - 0.930467, - 0.945295, - 0.94618, - 0.952947, - 0.953466, - 0.945551, - 0.942559, - 0.933472, - 0.928347, - 0.91776, - 0.922311, - 0.922043, - 0.919095, - 0.941181, - 0.949563, - 0.940666, - 0.94111, - 0.905703, - 0.915443, - 0.924562, - 0.934092, - 0.936933, - 0.939146, - 0.918437, - 0.923224, - 0.937606, - 0.940306, - 0.937946, - 0.938224, - 0.933446, - 0.92431, - 0.921031, - 0.950049 - ], - "train_epoch_time": 4.843919992446899, - "train_loss": 2.374070036804659, - "train_score": 0.30918108853411297, - "val_loss": 2.4118292178406917, - "val_score": 0.2987047940595279 - }, - { - "epoch": 10, - "grad_norm": 0.48772522807121277, - "learning_rate": 1.0, - "model_norm": 88.18107604980469, - "step_logs": { - "grad_norm": { - "540": 0.4458203911781311, - "541": 0.5136281251907349, - "542": 0.5244190096855164, - "543": 0.5123744606971741, - "544": 0.5148400664329529, - "545": 0.6042338609695435, - "546": 0.592066764831543, - "547": 0.5598015189170837, - "548": 0.5485560894012451, - "549": 0.5597467422485352, - "550": 0.6671572923660278, - "551": 0.5480512976646423, - "552": 0.444033145904541, - "553": 0.5193850994110107, - "554": 0.5741039514541626, - "555": 0.6345900297164917, - "556": 0.5866849422454834, - "557": 0.4788570702075958, - "558": 0.5482646822929382, - "559": 0.6118820309638977, - "560": 0.594347357749939, - "561": 0.606283962726593, - "562": 0.6163011789321899, - "563": 0.5812875032424927, - "564": 0.5867927074432373, - "565": 0.6255936622619629, - "566": 0.6476121544837952, - "567": 0.5879682302474976, - "568": 0.5346601009368896, - "569": 0.5324615836143494, - "570": 0.4745098352432251, - "571": 0.4630752503871918, - "572": 0.511570394039154, - "573": 0.5756678581237793, - "574": 0.5992061495780945, - "575": 0.5642318725585938, - "576": 0.536216139793396, - "577": 0.5043255090713501, - "578": 0.5142707824707031, - "579": 0.533702552318573, - "580": 0.5127367377281189, - "581": 0.48662662506103516, - "582": 0.49175190925598145, - "583": 0.6300363540649414, - "584": 0.5854738354682922, - "585": 0.51082444190979, - "586": 0.4999653995037079, - "587": 0.5188165307044983, - "588": 0.5914244651794434, - "589": 0.616300642490387, - "590": 0.6214520335197449, - "591": 0.6554830074310303, - "592": 0.5870980620384216, - "593": 0.48772522807121277 - }, - "loss": { - "540": 2.3845901489257812, - "541": 2.3942341804504395, - "542": 2.3856544494628906, - "543": 2.3923680782318115, - "544": 2.3843882083892822, - "545": 2.4156227111816406, - "546": 2.442225933074951, - "547": 2.400441884994507, - "548": 2.3967676162719727, - "549": 2.380504608154297, - "550": 2.417879581451416, - "551": 2.419123649597168, - "552": 2.3917505741119385, - "553": 2.3664190769195557, - "554": 2.411536693572998, - "555": 2.4109506607055664, - "556": 2.4361066818237305, - "557": 2.3568806648254395, - "558": 2.3607475757598877, - "559": 2.4009745121002197, - "560": 2.4093422889709473, - "561": 2.375514268875122, - "562": 2.42792010307312, - "563": 2.419508457183838, - "564": 2.3964576721191406, - "565": 2.3959453105926514, - "566": 2.4302735328674316, - "567": 2.404489040374756, - "568": 2.3777198791503906, - "569": 2.3871707916259766, - "570": 2.4041543006896973, - "571": 2.3582606315612793, - "572": 2.376455307006836, - "573": 2.3530220985412598, - "574": 2.389054536819458, - "575": 2.4047579765319824, - "576": 2.354835033416748, - "577": 2.3601691722869873, - "578": 2.368180274963379, - "579": 2.3701419830322266, - "580": 2.387045383453369, - "581": 2.3608124256134033, - "582": 2.345092296600342, - "583": 2.3491830825805664, - "584": 2.42059326171875, - "585": 2.3626253604888916, - "586": 2.37371826171875, - "587": 2.3939995765686035, - "588": 2.3850338459014893, - "589": 2.391475200653076, - "590": 2.389573097229004, - "591": 2.4043402671813965, - "592": 2.387998580932617, - "593": 2.3460845947265625 - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "step_size_list": [ - 0.959992, - 0.947783, - 0.945502, - 0.947986, - 0.947344, - 0.929739, - 0.933039, - 0.938725, - 0.940933, - 0.938255, - 0.915715, - 0.941548, - 0.960414, - 0.946076, - 0.936034, - 0.922921, - 0.934016, - 0.953611, - 0.940146, - 0.927671, - 0.931699, - 0.928188, - 0.927454, - 0.93473, - 0.932975, - 0.924494, - 0.920567, - 0.932934, - 0.943296, - 0.943946, - 0.955267, - 0.956512, - 0.947812, - 0.934214, - 0.930108, - 0.937916, - 0.942462, - 0.948872, - 0.947114, - 0.943317, - 0.947806, - 0.952242, - 0.950969, - 0.922096, - 0.933877, - 0.947667, - 0.949981, - 0.946774, - 0.931681, - 0.92643, - 0.925232, - 0.917978, - 0.932688, - 0.95175 - ], - "train_epoch_time": 4.844288349151611, - "train_loss": 2.351793902847312, - "train_score": 0.32671493903294185, - "val_loss": 2.3972862930166463, - "val_score": 0.3164197051497743 - }, - { - "epoch": 11, - "grad_norm": 0.6007877588272095, - "learning_rate": 1.0, - "model_norm": 88.26958465576172, - "step_logs": { - "grad_norm": { - "594": 0.5038158893585205, - "595": 0.5298601984977722, - "596": 0.7168340086936951, - "597": 0.9889169335365295, - "598": 0.7741715908050537, - "599": 0.6755338311195374, - "600": 0.5615705251693726, - "601": 0.5231757760047913, - "602": 0.4276902377605438, - "603": 0.4245786964893341, - "604": 0.4568428099155426, - "605": 0.47332683205604553, - "606": 0.494045227766037, - "607": 0.4955570101737976, - "608": 0.46529778838157654, - "609": 0.4713149666786194, - "610": 0.4874584972858429, - "611": 0.45520979166030884, - "612": 0.4652065634727478, - "613": 0.605072021484375, - "614": 0.6871828436851501, - "615": 0.6343675851821899, - "616": 0.6152904033660889, - "617": 0.600509762763977, - "618": 0.6721615791320801, - "619": 0.6511174440383911, - "620": 0.5150911808013916, - "621": 0.4967924952507019, - "622": 0.5662773251533508, - "623": 0.5638839602470398, - "624": 0.6103441119194031, - "625": 0.6011389493942261, - "626": 0.4980040192604065, - "627": 0.4948805570602417, - "628": 0.4760291576385498, - "629": 0.530250608921051, - "630": 0.6468032598495483, - "631": 0.7163217663764954, - "632": 0.590148389339447, - "633": 0.4722534120082855, - "634": 0.4898388683795929, - "635": 0.49531492590904236, - "636": 0.49066048860549927, - "637": 0.5269964337348938, - "638": 0.5501883029937744, - "639": 0.5197085738182068, - "640": 0.5104097127914429, - "641": 0.5442822575569153, - "642": 0.5863537788391113, - "643": 0.5064905881881714, - "644": 0.4422999322414398, - "645": 0.5581861734390259, - "646": 0.6343194842338562, - "647": 0.6007877588272095 - }, - "loss": { - "594": 2.3564181327819824, - "595": 2.349951982498169, - "596": 2.385848045349121, - "597": 2.412327289581299, - "598": 2.422610282897949, - "599": 2.4111275672912598, - "600": 2.3904480934143066, - "601": 2.3819735050201416, - "602": 2.340860605239868, - "603": 2.3423237800598145, - "604": 2.3417506217956543, - "605": 2.3592422008514404, - "606": 2.3720672130584717, - "607": 2.3765432834625244, - "608": 2.356266975402832, - "609": 2.3234362602233887, - "610": 2.3410472869873047, - "611": 2.351180076599121, - "612": 2.3297650814056396, - "613": 2.3659005165100098, - "614": 2.4027676582336426, - "615": 2.397608995437622, - "616": 2.3960280418395996, - "617": 2.3853845596313477, - "618": 2.3702778816223145, - "619": 2.4199793338775635, - "620": 2.347090244293213, - "621": 2.3456039428710938, - "622": 2.355914831161499, - "623": 2.36606502532959, - "624": 2.374274730682373, - "625": 2.388221502304077, - "626": 2.3333864212036133, - "627": 2.3182082176208496, - "628": 2.3259224891662598, - "629": 2.3107547760009766, - "630": 2.3683109283447266, - "631": 2.3690876960754395, - "632": 2.3664283752441406, - "633": 2.349916934967041, - "634": 2.3301212787628174, - "635": 2.344449520111084, - "636": 2.328540563583374, - "637": 2.353421449661255, - "638": 2.3580117225646973, - "639": 2.3385441303253174, - "640": 2.326274871826172, - "641": 2.3408737182617188, - "642": 2.333332061767578, - "643": 2.3740005493164062, - "644": 2.299046754837036, - "645": 2.356168270111084, - "646": 2.3704917430877686, - "647": 2.381502866744995 - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "step_size_list": [ - 0.948893, - 0.943632, - 0.902782, - 0.831463, - 0.889919, - 0.913548, - 0.938119, - 0.945667, - 0.962398, - 0.962945, - 0.957339, - 0.954671, - 0.951069, - 0.950872, - 0.956076, - 0.954377, - 0.951701, - 0.957793, - 0.955615, - 0.928184, - 0.910526, - 0.922576, - 0.926782, - 0.929724, - 0.912987, - 0.91946, - 0.946503, - 0.95002, - 0.93628, - 0.937038, - 0.927257, - 0.929665, - 0.949538, - 0.949828, - 0.95355, - 0.942651, - 0.918845, - 0.902287, - 0.931457, - 0.954696, - 0.951034, - 0.950279, - 0.950846, - 0.944283, - 0.939685, - 0.945404, - 0.946974, - 0.940489, - 0.931382, - 0.94874, - 0.959191, - 0.937982, - 0.921771, - 0.929557 - ], - "train_epoch_time": 4.843904495239258, - "train_loss": 2.349056749124951, - "train_score": 0.31112020254306166, - "val_loss": 2.388967074701898, - "val_score": 0.30335551849325815 - }, - { - "epoch": 12, - "grad_norm": 0.39419859647750854, - "learning_rate": 1.0, - "model_norm": 88.33519744873047, - "step_logs": { - "grad_norm": { - "648": 0.6511616110801697, - "649": 0.6989766359329224, - "650": 0.5482044219970703, - "651": 0.4628799557685852, - "652": 0.4435892403125763, - "653": 0.4629161059856415, - "654": 0.538483738899231, - "655": 0.5638764500617981, - "656": 0.517505943775177, - "657": 0.4965159595012665, - "658": 0.5503877997398376, - "659": 0.5701238512992859, - "660": 0.5422475934028625, - "661": 0.5240478515625, - "662": 0.5590012669563293, - "663": 0.5614112019538879, - "664": 0.4637320637702942, - "665": 0.41335898637771606, - "666": 0.3972206115722656, - "667": 0.38193708658218384, - "668": 0.41595569252967834, - "669": 0.4244782626628876, - "670": 0.4399038851261139, - "671": 0.4313145875930786, - "672": 0.4001099765300751, - "673": 0.41833269596099854, - "674": 0.44175443053245544, - "675": 0.4228600263595581, - "676": 0.42377057671546936, - "677": 0.39453262090682983, - "678": 0.39062777161598206, - "679": 0.38830944895744324, - "680": 0.44104278087615967, - "681": 0.4659619629383087, - "682": 0.4254288077354431, - "683": 0.39343419671058655, - "684": 0.3838067054748535, - "685": 0.3991822898387909, - "686": 0.42290619015693665, - "687": 0.4018757939338684, - "688": 0.3866080939769745, - "689": 0.405292809009552, - "690": 0.414725661277771, - "691": 0.4198669493198395, - "692": 0.3751433193683624, - "693": 0.3406931161880493, - "694": 0.34855344891548157, - "695": 0.3698401153087616, - "696": 0.41545191407203674, - "697": 0.45390641689300537, - "698": 0.4355085790157318, - "699": 0.42599087953567505, - "700": 0.4184563457965851, - "701": 0.39419859647750854 - }, - "loss": { - "648": 2.341552734375, - "649": 2.3889966011047363, - "650": 2.3494069576263428, - "651": 2.3306591510772705, - "652": 2.3127858638763428, - "653": 2.312307357788086, - "654": 2.296816349029541, - "655": 2.33444881439209, - "656": 2.3216490745544434, - "657": 2.321135997772217, - "658": 2.307474136352539, - "659": 2.322965145111084, - "660": 2.2960028648376465, - "661": 2.335008144378662, - "662": 2.334357500076294, - "663": 2.3267815113067627, - "664": 2.3098506927490234, - "665": 2.300386428833008, - "666": 2.3022618293762207, - "667": 2.258044719696045, - "668": 2.263434410095215, - "669": 2.3040952682495117, - "670": 2.2888686656951904, - "671": 2.2761268615722656, - "672": 2.271550178527832, - "673": 2.3023762702941895, - "674": 2.2836050987243652, - "675": 2.269228935241699, - "676": 2.3071327209472656, - "677": 2.2728633880615234, - "678": 2.274847984313965, - "679": 2.291092872619629, - "680": 2.2709689140319824, - "681": 2.2796080112457275, - "682": 2.2883238792419434, - "683": 2.294191360473633, - "684": 2.2727112770080566, - "685": 2.26954984664917, - "686": 2.2552878856658936, - "687": 2.2681663036346436, - "688": 2.259733200073242, - "689": 2.2676241397857666, - "690": 2.248312473297119, - "691": 2.2806448936462402, - "692": 2.236166000366211, - "693": 2.226914882659912, - "694": 2.247272491455078, - "695": 2.246020793914795, - "696": 2.268394947052002, - "697": 2.252779483795166, - "698": 2.252521276473999, - "699": 2.250152587890625, - "700": 2.258693218231201, - "701": 2.2442920207977295 - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "step_size_list": [ - 0.916976, - 0.902148, - 0.928972, - 0.939114, - 0.936455, - 0.927485, - 0.907783, - 0.898261, - 0.901205, - 0.899338, - 0.883837, - 0.875036, - 0.874102, - 0.872559, - 0.860937, - 0.854869, - 0.864948, - 0.866266, - 0.862614, - 0.858245, - 0.848129, - 0.841725, - 0.833739, - 0.828958, - 0.827027, - 0.819345, - 0.810436, - 0.806843, - 0.801363, - 0.798539, - 0.79314, - 0.787682, - 0.775807, - 0.767203, - 0.766183, - 0.763752, - 0.758655, - 0.751255, - 0.742885, - 0.739276, - 0.734786, - 0.72724, - 0.720331, - 0.714289, - 0.712074, - 0.70888, - 0.702453, - 0.694857, - 0.685355, - 0.67597, - 0.671804, - 0.666763, - 0.661599, - 0.657524 - ], - "train_epoch_time": 4.844761371612549, - "train_loss": 2.247865048576804, - "train_score": 0.345256456326549, - "val_loss": 2.29649863998597, - "val_score": 0.333964194105906 - }, - { - "epoch": 13, - "grad_norm": 0.21998971700668335, - "learning_rate": 0.6666666666666667, - "model_norm": 88.36693572998047, - "step_logs": { - "grad_norm": { - "702": 0.3778780400753021, - "703": 0.3608350455760956, - "704": 0.3651042878627777, - "705": 0.388094037771225, - "706": 0.3618382215499878, - "707": 0.349433958530426, - "708": 0.350179523229599, - "709": 0.3341130018234253, - "710": 0.29819419980049133, - "711": 0.2698521614074707, - "712": 0.2913244962692261, - "713": 0.31975212693214417, - "714": 0.3172418475151062, - "715": 0.29471907019615173, - "716": 0.28547587990760803, - "717": 0.26432570815086365, - "718": 0.2401735931634903, - "719": 0.26239922642707825, - "720": 0.2587120831012726, - "721": 0.27061891555786133, - "722": 0.2909615933895111, - "723": 0.3134424388408661, - "724": 0.3677830398082733, - "725": 0.32742029428482056, - "726": 0.2667600214481354, - "727": 0.2844672203063965, - "728": 0.279303640127182, - "729": 0.2534867525100708, - "730": 0.2552092969417572, - "731": 0.24614571034908295, - "732": 0.23709611594676971, - "733": 0.2705739140510559, - "734": 0.26088449358940125, - "735": 0.24297428131103516, - "736": 0.2223147451877594, - "737": 0.20578642189502716, - "738": 0.22110122442245483, - "739": 0.22143249213695526, - "740": 0.23313593864440918, - "741": 0.22729408740997314, - "742": 0.23405714333057404, - "743": 0.24651159346103668, - "744": 0.22185885906219482, - "745": 0.214141383767128, - "746": 0.22089646756649017, - "747": 0.20220467448234558, - "748": 0.21291328966617584, - "749": 0.21357682347297668, - "750": 0.21467262506484985, - "751": 0.2303503304719925, - "752": 0.21531184017658234, - "753": 0.21045315265655518, - "754": 0.20281724631786346, - "755": 0.21998971700668335 - }, - "loss": { - "702": 2.2505717277526855, - "703": 2.2411627769470215, - "704": 2.238123893737793, - "705": 2.239494562149048, - "706": 2.238781452178955, - "707": 2.2343153953552246, - "708": 2.2369303703308105, - "709": 2.2434890270233154, - "710": 2.239253282546997, - "711": 2.227365493774414, - "712": 2.2049736976623535, - "713": 2.235891342163086, - "714": 2.2205817699432373, - "715": 2.254295825958252, - "716": 2.2185144424438477, - "717": 2.1970832347869873, - "718": 2.2062389850616455, - "719": 2.2300188541412354, - "720": 2.2006869316101074, - "721": 2.2079954147338867, - "722": 2.2378220558166504, - "723": 2.2083661556243896, - "724": 2.2260518074035645, - "725": 2.2498276233673096, - "726": 2.2193684577941895, - "727": 2.2244410514831543, - "728": 2.2064828872680664, - "729": 2.218808889389038, - "730": 2.1951470375061035, - "731": 2.235578775405884, - "732": 2.2016549110412598, - "733": 2.2122249603271484, - "734": 2.2072882652282715, - "735": 2.2308173179626465, - "736": 2.1912527084350586, - "737": 2.1960291862487793, - "738": 2.213735342025757, - "739": 2.222991943359375, - "740": 2.20228910446167, - "741": 2.2345454692840576, - "742": 2.195842742919922, - "743": 2.1963882446289062, - "744": 2.1989965438842773, - "745": 2.1889209747314453, - "746": 2.2223312854766846, - "747": 2.21597957611084, - "748": 2.195054769515991, - "749": 2.1846275329589844, - "750": 2.2157058715820312, - "751": 2.199411630630493, - "752": 2.212005615234375, - "753": 2.203597068786621, - "754": 2.2048442363739014, - "755": 2.2119381427764893 - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "step_size_list": [ - 0.652859, - 0.64806, - 0.641815, - 0.634323, - 0.630146, - 0.624945, - 0.618948, - 0.613934, - 0.60981, - 0.605067, - 0.597977, - 0.590679, - 0.58474, - 0.579869, - 0.574128, - 0.568881, - 0.563716, - 0.556899, - 0.550901, - 0.544423, - 0.537685, - 0.530697, - 0.522438, - 0.518213, - 0.514244, - 0.507615, - 0.501684, - 0.496406, - 0.490236, - 0.484453, - 0.47854, - 0.4716, - 0.465767, - 0.460144, - 0.454449, - 0.448668, - 0.442274, - 0.436163, - 0.429807, - 0.423839, - 0.417567, - 0.411227, - 0.405558, - 0.399555, - 0.393356, - 0.387499, - 0.38121, - 0.375069, - 0.368949, - 0.362604, - 0.356686, - 0.350612, - 0.344568, - 0.33825 - ], - "train_epoch_time": 4.8431618213653564, - "train_loss": 2.195485944898433, - "train_score": 0.3584794207659137, - "val_loss": 2.256491680232857, - "val_score": 0.34196505412442263 - }, - { - "epoch": 14, - "grad_norm": 0.17988839745521545, - "learning_rate": 0.33333333333333337, - "model_norm": 88.37663269042969, - "step_logs": { - "grad_norm": { - "756": 0.1821930855512619, - "757": 0.19538429379463196, - "758": 0.20732107758522034, - "759": 0.20958596467971802, - "760": 0.2063555270433426, - "761": 0.20376798510551453, - "762": 0.20231837034225464, - "763": 0.21082890033721924, - "764": 0.22145602107048035, - "765": 0.2254459261894226, - "766": 0.20370836555957794, - "767": 0.19688469171524048, - "768": 0.2070452868938446, - "769": 0.20759566128253937, - "770": 0.22178387641906738, - "771": 0.19839537143707275, - "772": 0.18363282084465027, - "773": 0.18331368267536163, - "774": 0.1851062923669815, - "775": 0.19360743463039398, - "776": 0.20063072443008423, - "777": 0.1836511492729187, - "778": 0.18879574537277222, - "779": 0.20281025767326355, - "780": 0.19790515303611755, - "781": 0.2011026293039322, - "782": 0.19825921952724457, - "783": 0.1800878793001175, - "784": 0.1839653104543686, - "785": 0.20201678574085236, - "786": 0.18508686125278473, - "787": 0.18991634249687195, - "788": 0.19692765176296234, - "789": 0.18821491301059723, - "790": 0.19337418675422668, - "791": 0.19300681352615356, - "792": 0.18469208478927612, - "793": 0.18032197654247284, - "794": 0.17788352072238922, - "795": 0.1759253442287445, - "796": 0.18745002150535583, - "797": 0.18053771555423737, - "798": 0.1818985641002655, - "799": 0.18222616612911224, - "800": 0.19171017408370972, - "801": 0.1934817135334015, - "802": 0.16393093764781952, - "803": 0.1889495998620987, - "804": 0.17868955433368683, - "805": 0.1914677768945694, - "806": 0.1868775188922882, - "807": 0.18818259239196777, - "808": 0.17567041516304016, - "809": 0.17988839745521545 - }, - "loss": { - "756": 2.2010631561279297, - "757": 2.209050416946411, - "758": 2.1987504959106445, - "759": 2.207879066467285, - "760": 2.1938045024871826, - "761": 2.203763246536255, - "762": 2.1959850788116455, - "763": 2.2000350952148438, - "764": 2.1638104915618896, - "765": 2.1800477504730225, - "766": 2.1980581283569336, - "767": 2.1998472213745117, - "768": 2.2184081077575684, - "769": 2.171358108520508, - "770": 2.228346586227417, - "771": 2.169013023376465, - "772": 2.1921329498291016, - "773": 2.18513822555542, - "774": 2.1895694732666016, - "775": 2.173962354660034, - "776": 2.1969332695007324, - "777": 2.1867828369140625, - "778": 2.186495780944824, - "779": 2.2036492824554443, - "780": 2.190216302871704, - "781": 2.156318187713623, - "782": 2.1985085010528564, - "783": 2.160374164581299, - "784": 2.2032132148742676, - "785": 2.202622890472412, - "786": 2.1816070079803467, - "787": 2.1931629180908203, - "788": 2.1984469890594482, - "789": 2.152705669403076, - "790": 2.1666464805603027, - "791": 2.200831413269043, - "792": 2.2016005516052246, - "793": 2.1729722023010254, - "794": 2.1898653507232666, - "795": 2.177778720855713, - "796": 2.190587043762207, - "797": 2.1497602462768555, - "798": 2.182061195373535, - "799": 2.1854350566864014, - "800": 2.172102451324463, - "801": 2.1652748584747314, - "802": 2.194276809692383, - "803": 2.1874892711639404, - "804": 2.2078864574432373, - "805": 2.201608657836914, - "806": 2.193455696105957, - "807": 2.1880478858947754, - "808": 2.1807518005371094, - "809": 2.1754772663116455 - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "step_size_list": [ - 0.332498, - 0.326238, - 0.319984, - 0.313832, - 0.30772, - 0.30161, - 0.29548, - 0.289276, - 0.28304, - 0.276881, - 0.27091, - 0.264813, - 0.258611, - 0.252452, - 0.246243, - 0.240216, - 0.234145, - 0.227995, - 0.221837, - 0.215648, - 0.209474, - 0.203384, - 0.197213, - 0.191017, - 0.184879, - 0.178712, - 0.172573, - 0.166458, - 0.160296, - 0.154101, - 0.147976, - 0.14181, - 0.13564, - 0.129492, - 0.123325, - 0.117168, - 0.111016, - 0.104856, - 0.098695, - 0.0925317, - 0.0863599, - 0.0801981, - 0.0740325, - 0.0678662, - 0.0616962, - 0.0555289, - 0.0493678, - 0.0431946, - 0.0370271, - 0.0308563, - 0.0246865, - 0.0185157, - 0.0123446, - 0.00617256 - ], - "train_epoch_time": 4.844646692276001, - "train_loss": 2.182451911978263, - "train_score": 0.3627376254439525, - "val_loss": 2.247394614323683, - "val_score": 0.3452479188874723 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-03 10:40:37.758976", - "final_model_norm": 88.37663269042969, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-03 10:38:56.130617", - "step_scheduler_on_epoch": false - } - } -] \ No newline at end of file diff --git a/output/lr-stability/shakespeare.json b/output/lr-stability/shakespeare.json deleted file mode 100644 index b1ff3de..0000000 --- a/output/lr-stability/shakespeare.json +++ /dev/null @@ -1,33134 +0,0 @@ -[ - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 11.078807830810547, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.58893585205078, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 8.304182052612305, - "3": 5.066382884979248, - "4": 3.9865400791168213, - "5": 7.368899822235107, - "6": 22.684249877929688, - "7": 7.364748954772949, - "8": 5.8788042068481445, - "9": 4.168630599975586, - "10": 3.386579751968384, - "11": 4.124453067779541, - "12": 5.471625328063965, - "13": 7.303220748901367, - "14": 3.478468894958496, - "15": 4.954684257507324, - "16": 5.368632793426514, - "17": 3.2023041248321533, - "18": 8.319535255432129, - "19": 4.851791858673096, - "20": 24.409008026123047, - "21": 3.8841021060943604, - "22": 127.42274475097656, - "23": 5.798412799835205, - "24": 5.420220375061035, - "25": 3.785569429397583, - "26": 1.419108510017395, - "27": 1.837812066078186, - "28": 2.7772908210754395, - "29": 6.889194011688232, - "30": 7.025652885437012, - "31": 4.444455623626709, - "32": 4.854276180267334, - "33": 17.949159622192383, - "34": 4.730770587921143, - "35": 4.025436878204346, - "36": 1.6051708459854126, - "37": 1.831591248512268, - "38": 9.636731147766113, - "39": 4.211915016174316, - "40": 3.8471062183380127, - "41": 3.4780006408691406, - "42": 8.998422622680664, - "43": 3.369115114212036, - "44": 3.2289352416992188, - "45": 2.7891392707824707, - "46": 10.820326805114746, - "47": 4.309532642364502, - "48": 3.608126401901245, - "49": 3.1533291339874268, - "50": 1.862067699432373, - "51": 1.9424623250961304, - "52": 2.750100612640381, - "53": 11.078807830810547 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.94942569732666, - "3": 3.73760724067688, - "4": 3.612809658050537, - "5": 3.569070816040039, - "6": 4.066908836364746, - "7": 4.256112098693848, - "8": 3.772096633911133, - "9": 3.5143580436706543, - "10": 3.385322093963623, - "11": 3.4045357704162598, - "12": 3.4153342247009277, - "13": 3.389209032058716, - "14": 3.3212578296661377, - "15": 3.2782492637634277, - "16": 3.1905996799468994, - "17": 3.1310646533966064, - "18": 3.072298765182495, - "19": 3.246102809906006, - "20": 3.6585073471069336, - "21": 3.3017466068267822, - "22": 3.986306667327881, - "23": 3.561216354370117, - "24": 3.6109938621520996, - "25": 3.822118043899536, - "26": 3.377912759780884, - "27": 3.400693655014038, - "28": 3.3791050910949707, - "29": 3.5108470916748047, - "30": 6.646190166473389, - "31": 4.068477630615234, - "32": 3.6853525638580322, - "33": 5.691895008087158, - "34": 5.398983478546143, - "35": 4.3408613204956055, - "36": 3.483992099761963, - "37": 3.3839926719665527, - "38": 3.7317845821380615, - "39": 5.436063289642334, - "40": 4.654531955718994, - "41": 3.7016143798828125, - "42": 3.865525245666504, - "43": 5.218574523925781, - "44": 4.379077911376953, - "45": 3.5449743270874023, - "46": 3.9674806594848633, - "47": 5.936733722686768, - "48": 5.191201686859131, - "49": 4.071769714355469, - "50": 3.3786449432373047, - "51": 3.4644248485565186, - "52": 3.453007698059082, - "53": 4.158570289611816 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "train_epoch_time": 6.811787366867065, - "train_loss": 6.15240808511567, - "train_score": 0.059190055584719396, - "val_loss": 6.150430158139917, - "val_score": 0.05866550658449924 - }, - { - "epoch": 1, - "grad_norm": 2.8834519386291504, - "learning_rate": 0.1, - "model_norm": 87.52149200439453, - "step_logs": { - "grad_norm": { - "54": 3.6732442378997803, - "55": 4.203396797180176, - "56": 3.681767463684082, - "57": 2.510910749435425, - "58": 9.891633033752441, - "59": 3.7573633193969727, - "60": 3.672461986541748, - "61": 4.566214561462402, - "62": 2.6282222270965576, - "63": 3.428539514541626, - "64": 9.9284029006958, - "65": 3.2549185752868652, - "66": 3.269218683242798, - "67": 3.8156352043151855, - "68": 2.1252076625823975, - "69": 2.5927603244781494, - "70": 10.775301933288574, - "71": 3.968351125717163, - "72": 3.8314497470855713, - "73": 3.5274105072021484, - "74": 2.681727647781372, - "75": 9.447980880737305, - "76": 3.578968048095703, - "77": 3.49782133102417, - "78": 4.2003173828125, - "79": 3.821840763092041, - "80": 3.405994415283203, - "81": 2.4955978393554688, - "82": 1.8375983238220215, - "83": 3.0771450996398926, - "84": 3.340847969055176, - "85": 2.8377625942230225, - "86": 3.0561769008636475, - "87": 6.293949127197266, - "88": 3.9713973999023438, - "89": 3.349625825881958, - "90": 2.6180405616760254, - "91": 3.398827314376831, - "92": 5.225561618804932, - "93": 2.9980392456054688, - "94": 2.9232120513916016, - "95": 8.645142555236816, - "96": 3.205531358718872, - "97": 3.0876965522766113, - "98": 3.653186559677124, - "99": 7.007787227630615, - "100": 3.295431137084961, - "101": 4.338217735290527, - "102": 2.1290626525878906, - "103": 6.241903781890869, - "104": 3.0309388637542725, - "105": 3.022103786468506, - "106": 2.0792396068573, - "107": 2.8834519386291504 - }, - "loss": { - "54": 6.156980991363525, - "55": 5.252943515777588, - "56": 4.546120643615723, - "57": 3.4713292121887207, - "58": 3.952418327331543, - "59": 5.8539018630981445, - "60": 4.961085319519043, - "61": 4.065688610076904, - "62": 3.739987850189209, - "63": 3.7214434146881104, - "64": 4.012625694274902, - "65": 5.713657855987549, - "66": 4.8445234298706055, - "67": 4.021078109741211, - "68": 3.5700507164001465, - "69": 3.5213842391967773, - "70": 4.034528732299805, - "71": 5.999028205871582, - "72": 5.269189357757568, - "73": 4.237903594970703, - "74": 3.5325655937194824, - "75": 3.8926713466644287, - "76": 5.630416393280029, - "77": 4.770911693572998, - "78": 3.93550443649292, - "79": 3.725010871887207, - "80": 4.039775848388672, - "81": 3.4270312786102295, - "82": 3.5064220428466797, - "83": 3.4148120880126953, - "84": 3.8978304862976074, - "85": 3.4866814613342285, - "86": 3.732253074645996, - "87": 3.6477553844451904, - "88": 4.735589981079102, - "89": 4.110518455505371, - "90": 3.38773250579834, - "91": 3.782430648803711, - "92": 3.6394848823547363, - "93": 4.382370471954346, - "94": 3.5844132900238037, - "95": 3.831367015838623, - "96": 5.310178756713867, - "97": 4.451910972595215, - "98": 3.6832613945007324, - "99": 3.8495001792907715, - "100": 4.868527412414551, - "101": 4.114696502685547, - "102": 3.6535263061523438, - "103": 3.659620761871338, - "104": 4.640170097351074, - "105": 3.862802028656006, - "106": 3.389212131500244, - "107": 3.5657927989959717 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "train_epoch_time": 4.7939066886901855, - "train_loss": 3.8870661060300415, - "train_score": 0.15261163918068968, - "val_loss": 3.90794214589177, - "val_score": 0.15114182700861745 - }, - { - "epoch": 2, - "grad_norm": 0.6136992573738098, - "learning_rate": 0.1, - "model_norm": 87.49285888671875, - "step_logs": { - "grad_norm": { - "108": 9.166020393371582, - "109": 3.260446548461914, - "110": 3.1746766567230225, - "111": 3.308523654937744, - "112": 3.1027259826660156, - "113": 2.790318250656128, - "114": 2.773282527923584, - "115": 2.8242406845092773, - "116": 3.1578242778778076, - "117": 2.969750165939331, - "118": 1.5987517833709717, - "119": 2.622586488723755, - "120": 8.518936157226562, - "121": 3.055206537246704, - "122": 2.9690043926239014, - "123": 2.8096797466278076, - "124": 4.662749767303467, - "125": 2.8870620727539062, - "126": 2.519000768661499, - "127": 6.841214656829834, - "128": 2.865640163421631, - "129": 2.7572903633117676, - "130": 1.421618103981018, - "131": 4.630087375640869, - "132": 2.8943216800689697, - "133": 2.507316827774048, - "134": 5.654372215270996, - "135": 2.8063504695892334, - "136": 2.557739496231079, - "137": 2.2054760456085205, - "138": 2.807223320007324, - "139": 4.430174350738525, - "140": 2.672210693359375, - "141": 1.9283416271209717, - "142": 5.2248077392578125, - "143": 2.7290027141571045, - "144": 2.4538376331329346, - "145": 2.210589647293091, - "146": 2.3795573711395264, - "147": 3.1825039386749268, - "148": 2.4803688526153564, - "149": 0.5940036177635193, - "150": 0.5978416204452515, - "151": 0.9853432178497314, - "152": 2.614866018295288, - "153": 2.426819324493408, - "154": 1.579184651374817, - "155": 2.228175640106201, - "156": 4.564561367034912, - "157": 2.549464702606201, - "158": 1.9981169700622559, - "159": 3.2127575874328613, - "160": 2.4377102851867676, - "161": 0.6136992573738098 - }, - "loss": { - "108": 3.8695123195648193, - "109": 5.381319046020508, - "110": 4.597160339355469, - "111": 3.8058512210845947, - "112": 3.524437427520752, - "113": 3.7831192016601562, - "114": 3.4119577407836914, - "115": 3.7660770416259766, - "116": 3.387238025665283, - "117": 3.847681999206543, - "118": 3.3913140296936035, - "119": 3.441558361053467, - "120": 3.854508399963379, - "121": 5.141708850860596, - "122": 4.35847282409668, - "123": 3.669011354446411, - "124": 3.5422751903533936, - "125": 4.187644958496094, - "126": 3.495161533355713, - "127": 3.7085232734680176, - "128": 4.668492317199707, - "129": 3.9175596237182617, - "130": 3.3215131759643555, - "131": 3.5024757385253906, - "132": 4.1439528465271, - "133": 3.501526355743408, - "134": 3.558802604675293, - "135": 4.358184814453125, - "136": 3.6727261543273926, - "137": 3.349576234817505, - "138": 3.5831480026245117, - "139": 3.542271137237549, - "140": 4.042727470397949, - "141": 3.3847625255584717, - "142": 3.5358009338378906, - "143": 4.247830867767334, - "144": 3.6025948524475098, - "145": 3.360447406768799, - "146": 3.5581021308898926, - "147": 3.4227206707000732, - "148": 3.762047290802002, - "149": 3.307605266571045, - "150": 3.3204827308654785, - "151": 3.310724973678589, - "152": 3.3572402000427246, - "153": 3.640658140182495, - "154": 3.307203769683838, - "155": 3.4553513526916504, - "156": 3.5002264976501465, - "157": 3.9907894134521484, - "158": 3.4358487129211426, - "159": 3.39845871925354, - "160": 3.6986827850341797, - "161": 3.2815957069396973 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "train_epoch_time": 4.793991565704346, - "train_loss": 3.2852243656749534, - "train_score": 0.152724847531045, - "val_loss": 3.301639177495385, - "val_score": 0.1513795209909553 - }, - { - "epoch": 3, - "grad_norm": 1.58250892162323, - "learning_rate": 0.1, - "model_norm": 87.50431060791016, - "step_logs": { - "grad_norm": { - "162": 0.6990936398506165, - "163": 1.0855343341827393, - "164": 2.573493003845215, - "165": 2.3338770866394043, - "166": 1.2506153583526611, - "167": 1.7552529573440552, - "168": 3.7846457958221436, - "169": 2.4146616458892822, - "170": 1.3735642433166504, - "171": 2.3647353649139404, - "172": 2.1765995025634766, - "173": 1.4735000133514404, - "174": 1.7969722747802734, - "175": 2.8110909461975098, - "176": 2.3343889713287354, - "177": 0.9906945824623108, - "178": 1.060250163078308, - "179": 1.9494225978851318, - "180": 2.1394035816192627, - "181": 2.3163135051727295, - "182": 2.225015640258789, - "183": 2.0056939125061035, - "184": 2.1559886932373047, - "185": 2.453526496887207, - "186": 2.3028724193573, - "187": 1.7598356008529663, - "188": 1.9500482082366943, - "189": 2.6437535285949707, - "190": 2.325402021408081, - "191": 1.5147470235824585, - "192": 1.7627238035202026, - "193": 2.553441286087036, - "194": 2.381896495819092, - "195": 1.8004099130630493, - "196": 1.9215713739395142, - "197": 2.468478202819824, - "198": 2.2915127277374268, - "199": 1.7835581302642822, - "200": 1.7955331802368164, - "201": 2.077427864074707, - "202": 1.991347074508667, - "203": 1.7464768886566162, - "204": 1.7679321765899658, - "205": 1.8141162395477295, - "206": 1.7467771768569946, - "207": 1.6279795169830322, - "208": 1.6814764738082886, - "209": 1.8262842893600464, - "210": 1.8182737827301025, - "211": 1.76496422290802, - "212": 1.671907901763916, - "213": 1.5209786891937256, - "214": 1.5442737340927124, - "215": 1.58250892162323 - }, - "loss": { - "162": 3.2714474201202393, - "163": 3.3101048469543457, - "164": 3.3309435844421387, - "165": 3.574993133544922, - "166": 3.299924373626709, - "167": 3.3417928218841553, - "168": 3.441725730895996, - "169": 3.7685446739196777, - "170": 3.287553310394287, - "171": 3.321277618408203, - "172": 3.4721219539642334, - "173": 3.268068790435791, - "174": 3.3477368354797363, - "175": 3.3122897148132324, - "176": 3.5108697414398193, - "177": 3.21201753616333, - "178": 3.1934924125671387, - "179": 3.2229983806610107, - "180": 3.3485851287841797, - "181": 3.2375502586364746, - "182": 3.3506741523742676, - "183": 3.2008423805236816, - "184": 3.295527219772339, - "185": 3.2338011264801025, - "186": 3.3315224647521973, - "187": 3.176913022994995, - "188": 3.219912528991699, - "189": 3.215770721435547, - "190": 3.3704915046691895, - "191": 3.1469473838806152, - "192": 3.148859977722168, - "193": 3.1830079555511475, - "194": 3.315352201461792, - "195": 3.1420464515686035, - "196": 3.1758809089660645, - "197": 3.172865390777588, - "198": 3.253307819366455, - "199": 3.1109161376953125, - "200": 3.1349234580993652, - "201": 3.1295700073242188, - "202": 3.1752634048461914, - "203": 3.071497678756714, - "204": 3.1231439113616943, - "205": 3.070530414581299, - "206": 3.122929573059082, - "207": 3.043402671813965, - "208": 3.0755069255828857, - "209": 3.0376853942871094, - "210": 3.0979976654052734, - "211": 3.0375564098358154, - "212": 3.0894670486450195, - "213": 3.0360488891601562, - "214": 3.046515464782715, - "215": 3.0090155601501465 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "train_epoch_time": 4.793714284896851, - "train_loss": 3.051448314330157, - "train_score": 0.14354487985928396, - "val_loss": 3.0669197023393915, - "val_score": 0.14355805129599764 - }, - { - "epoch": 4, - "grad_norm": 1.7089091539382935, - "learning_rate": 0.1, - "model_norm": 87.51215362548828, - "step_logs": { - "grad_norm": { - "216": 1.597985863685608, - "217": 1.6878122091293335, - "218": 1.784617304801941, - "219": 1.7893139123916626, - "220": 1.831480622291565, - "221": 1.7717779874801636, - "222": 1.7698428630828857, - "223": 1.6941642761230469, - "224": 1.7306171655654907, - "225": 1.7256495952606201, - "226": 1.6578220129013062, - "227": 1.5820120573043823, - "228": 1.5035868883132935, - "229": 1.544783592224121, - "230": 1.699487566947937, - "231": 1.987848162651062, - "232": 1.772462010383606, - "233": 1.2227364778518677, - "234": 1.355668306350708, - "235": 1.5648186206817627, - "236": 1.8153234720230103, - "237": 1.884724736213684, - "238": 2.012338161468506, - "239": 1.8148020505905151, - "240": 1.4784231185913086, - "241": 1.3997631072998047, - "242": 1.4841691255569458, - "243": 1.6125293970108032, - "244": 1.6721493005752563, - "245": 1.786097526550293, - "246": 1.8887684345245361, - "247": 2.3786678314208984, - "248": 2.8075203895568848, - "249": 2.617809534072876, - "250": 2.1931307315826416, - "251": 1.7103222608566284, - "252": 1.528630256652832, - "253": 1.602358102798462, - "254": 1.8009368181228638, - "255": 2.232680082321167, - "256": 2.715921640396118, - "257": 1.8635884523391724, - "258": 1.7650214433670044, - "259": 1.5951802730560303, - "260": 1.4896814823150635, - "261": 1.5536892414093018, - "262": 1.6634559631347656, - "263": 1.9263232946395874, - "264": 2.0404441356658936, - "265": 1.8849246501922607, - "266": 1.6770967245101929, - "267": 1.484693169593811, - "268": 1.6399348974227905, - "269": 1.7089091539382935 - }, - "loss": { - "216": 3.068169116973877, - "217": 3.007662296295166, - "218": 3.077162742614746, - "219": 3.0051798820495605, - "220": 3.0847859382629395, - "221": 3.008432626724243, - "222": 3.0402050018310547, - "223": 3.016720771789551, - "224": 3.015456438064575, - "225": 3.0199806690216064, - "226": 3.031007766723633, - "227": 2.981315851211548, - "228": 3.021916389465332, - "229": 2.9599485397338867, - "230": 2.9997448921203613, - "231": 3.0120396614074707, - "232": 3.0592916011810303, - "233": 2.941696882247925, - "234": 2.9985203742980957, - "235": 2.9608969688415527, - "236": 2.986017942428589, - "237": 3.015174388885498, - "238": 3.020690679550171, - "239": 3.0142064094543457, - "240": 2.9589004516601562, - "241": 2.9445266723632812, - "242": 2.9277162551879883, - "243": 2.9550492763519287, - "244": 2.9244189262390137, - "245": 2.9472007751464844, - "246": 2.9702582359313965, - "247": 2.9962825775146484, - "248": 3.0938549041748047, - "249": 3.0575406551361084, - "250": 3.0636565685272217, - "251": 2.952287435531616, - "252": 2.9423060417175293, - "253": 2.9139065742492676, - "254": 2.9626102447509766, - "255": 2.958373546600342, - "256": 3.063141345977783, - "257": 3.007920265197754, - "258": 2.9447689056396484, - "259": 2.9120852947235107, - "260": 2.9216179847717285, - "261": 2.8991172313690186, - "262": 2.937103748321533, - "263": 2.910618543624878, - "264": 2.9704627990722656, - "265": 2.921873092651367, - "266": 2.9462780952453613, - "267": 2.8780341148376465, - "268": 2.889033317565918, - "269": 2.9055511951446533 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "train_epoch_time": 4.7957987785339355, - "train_loss": 2.913550460560934, - "train_score": 0.1759549856827283, - "val_loss": 2.9285061988983854, - "val_score": 0.17097804289050272 - }, - { - "epoch": 5, - "grad_norm": 1.910400152206421, - "learning_rate": 0.1, - "model_norm": 87.51984405517578, - "step_logs": { - "grad_norm": { - "270": 1.7309120893478394, - "271": 1.6838411092758179, - "272": 1.6812191009521484, - "273": 1.6011210680007935, - "274": 1.5274264812469482, - "275": 1.3145862817764282, - "276": 1.1602916717529297, - "277": 1.2141036987304688, - "278": 1.3638465404510498, - "279": 1.85237717628479, - "280": 1.7475277185440063, - "281": 1.7263325452804565, - "282": 2.2008743286132812, - "283": 2.158879041671753, - "284": 2.393897294998169, - "285": 1.9338291883468628, - "286": 1.4306427240371704, - "287": 1.8433418273925781, - "288": 1.6604335308074951, - "289": 1.3293074369430542, - "290": 1.6525572538375854, - "291": 2.1497445106506348, - "292": 2.0472335815429688, - "293": 1.7122303247451782, - "294": 1.858788251876831, - "295": 1.9067161083221436, - "296": 1.5989854335784912, - "297": 1.2284775972366333, - "298": 1.1961337327957153, - "299": 1.3169851303100586, - "300": 1.5709247589111328, - "301": 1.648550033569336, - "302": 1.4092910289764404, - "303": 1.1947563886642456, - "304": 1.3627091646194458, - "305": 1.3384456634521484, - "306": 1.2349458932876587, - "307": 1.1548428535461426, - "308": 1.1814287900924683, - "309": 1.3464531898498535, - "310": 1.5717324018478394, - "311": 2.253826141357422, - "312": 1.9190090894699097, - "313": 1.294960856437683, - "314": 1.46340012550354, - "315": 1.587857723236084, - "316": 1.7106577157974243, - "317": 1.8518284559249878, - "318": 1.3669970035552979, - "319": 1.2048537731170654, - "320": 1.3535815477371216, - "321": 1.7471632957458496, - "322": 1.7827051877975464, - "323": 1.910400152206421 - }, - "loss": { - "270": 2.9040279388427734, - "271": 2.9078006744384766, - "272": 2.921225070953369, - "273": 2.892879009246826, - "274": 2.8824362754821777, - "275": 2.8701939582824707, - "276": 2.864061117172241, - "277": 2.851010322570801, - "278": 2.872096061706543, - "279": 2.883958578109741, - "280": 2.942429780960083, - "281": 2.8529250621795654, - "282": 2.9261763095855713, - "283": 2.9349865913391113, - "284": 2.9358103275299072, - "285": 2.922347068786621, - "286": 2.873699188232422, - "287": 2.8408632278442383, - "288": 2.9175288677215576, - "289": 2.838775157928467, - "290": 2.855052947998047, - "291": 2.8938469886779785, - "292": 2.918454885482788, - "293": 2.8680355548858643, - "294": 2.851624011993408, - "295": 2.8883683681488037, - "296": 2.869182586669922, - "297": 2.8103950023651123, - "298": 2.8099215030670166, - "299": 2.79030704498291, - "300": 2.807589530944824, - "301": 2.8348944187164307, - "302": 2.8415722846984863, - "303": 2.7835493087768555, - "304": 2.7984981536865234, - "305": 2.794250249862671, - "306": 2.770634651184082, - "307": 2.7848353385925293, - "308": 2.7740893363952637, - "309": 2.775357961654663, - "310": 2.7942543029785156, - "311": 2.8301584720611572, - "312": 2.8827664852142334, - "313": 2.7745378017425537, - "314": 2.7593209743499756, - "315": 2.7866246700286865, - "316": 2.810638904571533, - "317": 2.8005151748657227, - "318": 2.798887252807617, - "319": 2.7478482723236084, - "320": 2.7415900230407715, - "321": 2.7598347663879395, - "322": 2.8220653533935547, - "323": 2.775735378265381 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "train_epoch_time": 4.794295072555542, - "train_loss": 2.8404472835436785, - "train_score": 0.2019850699383354, - "val_loss": 2.860597257909764, - "val_score": 0.19601661170459642 - }, - { - "epoch": 6, - "grad_norm": 1.8443446159362793, - "learning_rate": 0.1, - "model_norm": 87.52779388427734, - "step_logs": { - "grad_norm": { - "324": 2.155632972717285, - "325": 1.84988272190094, - "326": 1.6228352785110474, - "327": 1.4795079231262207, - "328": 1.364604115486145, - "329": 1.2524449825286865, - "330": 1.2218588590621948, - "331": 1.4108054637908936, - "332": 1.694453477859497, - "333": 1.5836282968521118, - "334": 1.5810085535049438, - "335": 1.9620267152786255, - "336": 1.7498195171356201, - "337": 1.4597772359848022, - "338": 1.6286495923995972, - "339": 1.6808140277862549, - "340": 1.5486146211624146, - "341": 1.660322666168213, - "342": 1.8241106271743774, - "343": 1.892255425453186, - "344": 1.6763458251953125, - "345": 1.6753884553909302, - "346": 1.7542225122451782, - "347": 1.6533803939819336, - "348": 1.511404275894165, - "349": 1.8656450510025024, - "350": 1.738365650177002, - "351": 1.6207376718521118, - "352": 1.671261191368103, - "353": 1.4202420711517334, - "354": 1.1749540567398071, - "355": 1.0920641422271729, - "356": 1.1776328086853027, - "357": 1.4487411975860596, - "358": 1.8593851327896118, - "359": 1.898881435394287, - "360": 1.814507246017456, - "361": 1.7532634735107422, - "362": 1.700311303138733, - "363": 1.670277714729309, - "364": 1.50487220287323, - "365": 1.3220475912094116, - "366": 1.311021327972412, - "367": 1.4480234384536743, - "368": 1.4873378276824951, - "369": 1.552438497543335, - "370": 1.5964924097061157, - "371": 1.6949729919433594, - "372": 1.8176980018615723, - "373": 1.950077772140503, - "374": 1.7779804468154907, - "375": 1.8435618877410889, - "376": 1.9465885162353516, - "377": 1.8443446159362793 - }, - "loss": { - "324": 2.820026397705078, - "325": 2.8304035663604736, - "326": 2.7701854705810547, - "327": 2.762284517288208, - "328": 2.744509696960449, - "329": 2.715801477432251, - "330": 2.7025036811828613, - "331": 2.7244620323181152, - "332": 2.7508177757263184, - "333": 2.7496819496154785, - "334": 2.728255033493042, - "335": 2.773054599761963, - "336": 2.787616729736328, - "337": 2.731870651245117, - "338": 2.7328028678894043, - "339": 2.7629458904266357, - "340": 2.7285022735595703, - "341": 2.7550153732299805, - "342": 2.762906074523926, - "343": 2.7425501346588135, - "344": 2.72770619392395, - "345": 2.7335047721862793, - "346": 2.738142967224121, - "347": 2.7749195098876953, - "348": 2.7302937507629395, - "349": 2.7354214191436768, - "350": 2.7608859539031982, - "351": 2.732132911682129, - "352": 2.7298476696014404, - "353": 2.714466094970703, - "354": 2.7010788917541504, - "355": 2.6623849868774414, - "356": 2.6772267818450928, - "357": 2.6973962783813477, - "358": 2.7395243644714355, - "359": 2.735440731048584, - "360": 2.692413806915283, - "361": 2.7381410598754883, - "362": 2.720083475112915, - "363": 2.7207460403442383, - "364": 2.6731836795806885, - "365": 2.693645477294922, - "366": 2.6823296546936035, - "367": 2.685734272003174, - "368": 2.684813976287842, - "369": 2.687150478363037, - "370": 2.677173614501953, - "371": 2.708266258239746, - "372": 2.701272964477539, - "373": 2.727740526199341, - "374": 2.717501163482666, - "375": 2.7007410526275635, - "376": 2.7074296474456787, - "377": 2.6967391967773438 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "train_epoch_time": 4.794239282608032, - "train_loss": 2.6888308897251036, - "train_score": 0.2316725699212322, - "val_loss": 2.707535773002458, - "val_score": 0.2303790546928016 - }, - { - "epoch": 7, - "grad_norm": 1.704913854598999, - "learning_rate": 0.1, - "model_norm": 87.53600311279297, - "step_logs": { - "grad_norm": { - "378": 2.0708065032958984, - "379": 1.681443214416504, - "380": 1.2056361436843872, - "381": 1.2746915817260742, - "382": 1.4549461603164673, - "383": 1.5162930488586426, - "384": 1.4708962440490723, - "385": 1.5431746244430542, - "386": 1.6435669660568237, - "387": 1.8571664094924927, - "388": 1.9098087549209595, - "389": 1.9669134616851807, - "390": 2.3033864498138428, - "391": 1.8136849403381348, - "392": 1.8377368450164795, - "393": 1.8172292709350586, - "394": 1.4593688249588013, - "395": 1.4622390270233154, - "396": 1.877022385597229, - "397": 2.0090367794036865, - "398": 1.8020083904266357, - "399": 2.0596206188201904, - "400": 1.696608066558838, - "401": 2.0540056228637695, - "402": 2.267760753631592, - "403": 2.2145755290985107, - "404": 1.3494840860366821, - "405": 1.5002892017364502, - "406": 1.7640374898910522, - "407": 1.5376811027526855, - "408": 1.225986361503601, - "409": 1.2189589738845825, - "410": 1.6300349235534668, - "411": 1.8727000951766968, - "412": 1.7715438604354858, - "413": 1.6847164630889893, - "414": 1.5470216274261475, - "415": 1.5455007553100586, - "416": 1.519396424293518, - "417": 1.603033185005188, - "418": 1.5024815797805786, - "419": 1.5774893760681152, - "420": 1.7047498226165771, - "421": 1.8606033325195312, - "422": 1.6975452899932861, - "423": 1.8237303495407104, - "424": 1.813138484954834, - "425": 1.9298568964004517, - "426": 1.4758800268173218, - "427": 1.3395049571990967, - "428": 1.4335174560546875, - "429": 1.5994044542312622, - "430": 1.578407883644104, - "431": 1.704913854598999 - }, - "loss": { - "378": 2.6904144287109375, - "379": 2.7103376388549805, - "380": 2.6507139205932617, - "381": 2.6244349479675293, - "382": 2.6512017250061035, - "383": 2.646717071533203, - "384": 2.6393022537231445, - "385": 2.63724422454834, - "386": 2.6495134830474854, - "387": 2.653243064880371, - "388": 2.6915817260742188, - "389": 2.6652557849884033, - "390": 2.6909615993499756, - "391": 2.6910130977630615, - "392": 2.64697265625, - "393": 2.6619622707366943, - "394": 2.6343002319335938, - "395": 2.6012232303619385, - "396": 2.634345531463623, - "397": 2.6683480739593506, - "398": 2.6859707832336426, - "399": 2.6322669982910156, - "400": 2.671630859375, - "401": 2.652303695678711, - "402": 2.7039732933044434, - "403": 2.674090623855591, - "404": 2.6350889205932617, - "405": 2.6096720695495605, - "406": 2.6331543922424316, - "407": 2.6284260749816895, - "408": 2.581529378890991, - "409": 2.6036624908447266, - "410": 2.621208667755127, - "411": 2.6714320182800293, - "412": 2.6453893184661865, - "413": 2.6183385848999023, - "414": 2.630486011505127, - "415": 2.602463960647583, - "416": 2.6082732677459717, - "417": 2.59086275100708, - "418": 2.609283924102783, - "419": 2.603783130645752, - "420": 2.6101644039154053, - "421": 2.6071391105651855, - "422": 2.616755962371826, - "423": 2.637195110321045, - "424": 2.629767656326294, - "425": 2.612427234649658, - "426": 2.6312780380249023, - "427": 2.574591636657715, - "428": 2.5845155715942383, - "429": 2.5824062824249268, - "430": 2.600257158279419, - "431": 2.5817208290100098 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "train_epoch_time": 4.7935755252838135, - "train_loss": 2.5908328111065684, - "train_score": 0.2439125268667979, - "val_loss": 2.610408023085304, - "val_score": 0.24100800070710626 - }, - { - "epoch": 8, - "grad_norm": 1.5788434743881226, - "learning_rate": 0.1, - "model_norm": 87.54491424560547, - "step_logs": { - "grad_norm": { - "432": 1.286270260810852, - "433": 1.4693156480789185, - "434": 1.6054952144622803, - "435": 1.5613069534301758, - "436": 1.4511334896087646, - "437": 1.5603748559951782, - "438": 1.5913562774658203, - "439": 1.7547959089279175, - "440": 1.653072714805603, - "441": 1.8857250213623047, - "442": 1.610456109046936, - "443": 1.8123440742492676, - "444": 1.4749577045440674, - "445": 2.1184768676757812, - "446": 1.6481423377990723, - "447": 2.067317485809326, - "448": 1.6184128522872925, - "449": 1.7942891120910645, - "450": 1.800119400024414, - "451": 1.5355799198150635, - "452": 1.4278818368911743, - "453": 1.3814152479171753, - "454": 1.2996751070022583, - "455": 1.3250993490219116, - "456": 1.3313499689102173, - "457": 1.2745602130889893, - "458": 1.3530325889587402, - "459": 1.4841971397399902, - "460": 1.6527849435806274, - "461": 1.762048602104187, - "462": 1.673614501953125, - "463": 1.4465807676315308, - "464": 1.2824547290802002, - "465": 1.2305985689163208, - "466": 1.2866114377975464, - "467": 1.3537986278533936, - "468": 1.3462580442428589, - "469": 1.305461049079895, - "470": 1.3501737117767334, - "471": 1.392318606376648, - "472": 1.3961211442947388, - "473": 1.39976966381073, - "474": 1.3901209831237793, - "475": 1.3982222080230713, - "476": 1.358236312866211, - "477": 1.50264310836792, - "478": 1.3991254568099976, - "479": 1.5541889667510986, - "480": 1.3254960775375366, - "481": 1.3470790386199951, - "482": 1.3513410091400146, - "483": 1.4285060167312622, - "484": 1.4070141315460205, - "485": 1.5788434743881226 - }, - "loss": { - "432": 2.588563919067383, - "433": 2.5665931701660156, - "434": 2.5865190029144287, - "435": 2.58758544921875, - "436": 2.589094638824463, - "437": 2.5738320350646973, - "438": 2.597832202911377, - "439": 2.586780071258545, - "440": 2.6245334148406982, - "441": 2.597990036010742, - "442": 2.598391532897949, - "443": 2.5842695236206055, - "444": 2.5860185623168945, - "445": 2.6066675186157227, - "446": 2.603336811065674, - "447": 2.579054594039917, - "448": 2.6035208702087402, - "449": 2.580047607421875, - "450": 2.617368698120117, - "451": 2.579369306564331, - "452": 2.5580849647521973, - "453": 2.560403347015381, - "454": 2.5808045864105225, - "455": 2.5537352561950684, - "456": 2.5496912002563477, - "457": 2.5557453632354736, - "458": 2.5602951049804688, - "459": 2.5526907444000244, - "460": 2.579969644546509, - "461": 2.5848283767700195, - "462": 2.5882349014282227, - "463": 2.5545496940612793, - "464": 2.5511064529418945, - "465": 2.561150550842285, - "466": 2.5469818115234375, - "467": 2.5514984130859375, - "468": 2.543325424194336, - "469": 2.5386500358581543, - "470": 2.543893575668335, - "471": 2.5512020587921143, - "472": 2.565943479537964, - "473": 2.545866012573242, - "474": 2.5484142303466797, - "475": 2.531568765640259, - "476": 2.5378899574279785, - "477": 2.5438013076782227, - "478": 2.553943634033203, - "479": 2.5409069061279297, - "480": 2.5679845809936523, - "481": 2.5286805629730225, - "482": 2.5337605476379395, - "483": 2.5237157344818115, - "484": 2.557138681411743, - "485": 2.563009262084961 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "train_epoch_time": 4.793356895446777, - "train_loss": 2.5702027982413513, - "train_score": 0.23034321196708651, - "val_loss": 2.5981002122197987, - "val_score": 0.2235397532599391 - }, - { - "epoch": 9, - "grad_norm": 1.1482586860656738, - "learning_rate": 0.1, - "model_norm": 87.55480194091797, - "step_logs": { - "grad_norm": { - "486": 1.5784811973571777, - "487": 1.3914556503295898, - "488": 1.3596466779708862, - "489": 1.3279658555984497, - "490": 1.2607295513153076, - "491": 1.1456457376480103, - "492": 1.0837055444717407, - "493": 1.0943005084991455, - "494": 1.0616426467895508, - "495": 1.071111798286438, - "496": 1.0862345695495605, - "497": 1.1060478687286377, - "498": 1.136170506477356, - "499": 1.235898494720459, - "500": 1.3385292291641235, - "501": 1.370406985282898, - "502": 1.2026985883712769, - "503": 1.4100940227508545, - "504": 1.644429087638855, - "505": 1.8106659650802612, - "506": 1.954629898071289, - "507": 1.80476975440979, - "508": 1.8550328016281128, - "509": 1.503313422203064, - "510": 1.3251838684082031, - "511": 1.3144601583480835, - "512": 1.3451131582260132, - "513": 1.3598833084106445, - "514": 1.3750338554382324, - "515": 1.336960792541504, - "516": 1.285061240196228, - "517": 1.275316596031189, - "518": 1.331743597984314, - "519": 1.3414835929870605, - "520": 1.3239773511886597, - "521": 1.3470772504806519, - "522": 1.3875837326049805, - "523": 1.4202936887741089, - "524": 1.3600045442581177, - "525": 1.332250714302063, - "526": 1.303304672241211, - "527": 1.289294719696045, - "528": 1.2900714874267578, - "529": 1.385382890701294, - "530": 1.633182406425476, - "531": 1.551501750946045, - "532": 1.4395169019699097, - "533": 1.3466784954071045, - "534": 1.3048006296157837, - "535": 1.3786487579345703, - "536": 1.4639127254486084, - "537": 1.4889410734176636, - "538": 1.3108350038528442, - "539": 1.1482586860656738 - }, - "loss": { - "486": 2.5756337642669678, - "487": 2.5349271297454834, - "488": 2.5285801887512207, - "489": 2.5507967472076416, - "490": 2.53084135055542, - "491": 2.534364700317383, - "492": 2.507439613342285, - "493": 2.525484800338745, - "494": 2.5282270908355713, - "495": 2.505523920059204, - "496": 2.5126185417175293, - "497": 2.5094051361083984, - "498": 2.5260982513427734, - "499": 2.498687267303467, - "500": 2.521278142929077, - "501": 2.53318190574646, - "502": 2.531080961227417, - "503": 2.534235715866089, - "504": 2.557523250579834, - "505": 2.541710376739502, - "506": 2.5630218982696533, - "507": 2.580857753753662, - "508": 2.5380778312683105, - "509": 2.5711212158203125, - "510": 2.538898468017578, - "511": 2.522522449493408, - "512": 2.5212037563323975, - "513": 2.537827968597412, - "514": 2.550327777862549, - "515": 2.5227861404418945, - "516": 2.509505033493042, - "517": 2.510568618774414, - "518": 2.512913227081299, - "519": 2.523221969604492, - "520": 2.510162830352783, - "521": 2.5164146423339844, - "522": 2.515011787414551, - "523": 2.5461232662200928, - "524": 2.51908016204834, - "525": 2.530780792236328, - "526": 2.5205748081207275, - "527": 2.5035438537597656, - "528": 2.5022497177124023, - "529": 2.4940085411071777, - "530": 2.54360294342041, - "531": 2.527162790298462, - "532": 2.520843267440796, - "533": 2.517914056777954, - "534": 2.4902191162109375, - "535": 2.494802713394165, - "536": 2.4951748847961426, - "537": 2.495988607406616, - "538": 2.5046000480651855, - "539": 2.4796130657196045 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "train_epoch_time": 4.793646574020386, - "train_loss": 2.48418271640794, - "train_score": 0.2766129393916219, - "val_loss": 2.517991020539872, - "val_score": 0.2703340268778336 - }, - { - "epoch": 10, - "grad_norm": 1.3202892541885376, - "learning_rate": 0.1, - "model_norm": 87.56735229492188, - "step_logs": { - "grad_norm": { - "540": 1.2227391004562378, - "541": 1.264283537864685, - "542": 1.351145625114441, - "543": 1.382710337638855, - "544": 1.3440003395080566, - "545": 1.3707528114318848, - "546": 1.436211347579956, - "547": 1.3877992630004883, - "548": 1.5721369981765747, - "549": 1.412706732749939, - "550": 1.5401239395141602, - "551": 1.5462980270385742, - "552": 1.4806427955627441, - "553": 1.3181809186935425, - "554": 1.2439841032028198, - "555": 1.269310474395752, - "556": 1.4574469327926636, - "557": 1.5510181188583374, - "558": 1.5461934804916382, - "559": 1.3501863479614258, - "560": 1.6920835971832275, - "561": 1.3779664039611816, - "562": 1.7835991382598877, - "563": 1.4689912796020508, - "564": 2.218740463256836, - "565": 1.8177419900894165, - "566": 3.5291826725006104, - "567": 2.6178267002105713, - "568": 3.536343812942505, - "569": 2.8160178661346436, - "570": 2.678770065307617, - "571": 1.8330696821212769, - "572": 1.226369857788086, - "573": 1.170863151550293, - "574": 1.2805521488189697, - "575": 1.3313273191452026, - "576": 1.5048911571502686, - "577": 1.5850555896759033, - "578": 1.5417934656143188, - "579": 1.4226044416427612, - "580": 1.2192103862762451, - "581": 1.1476165056228638, - "582": 1.0969921350479126, - "583": 1.1125699281692505, - "584": 1.1975789070129395, - "585": 1.2218101024627686, - "586": 1.15450119972229, - "587": 1.0483189821243286, - "588": 1.0810859203338623, - "589": 1.2447563409805298, - "590": 1.3714244365692139, - "591": 1.4388325214385986, - "592": 1.3979953527450562, - "593": 1.3202892541885376 - }, - "loss": { - "540": 2.4943041801452637, - "541": 2.48636794090271, - "542": 2.5075252056121826, - "543": 2.5327506065368652, - "544": 2.476402521133423, - "545": 2.491098403930664, - "546": 2.4865918159484863, - "547": 2.508561372756958, - "548": 2.497014284133911, - "549": 2.4996299743652344, - "550": 2.500722885131836, - "551": 2.5067903995513916, - "552": 2.4970755577087402, - "553": 2.481100559234619, - "554": 2.4737167358398438, - "555": 2.49249267578125, - "556": 2.4807984828948975, - "557": 2.4960312843322754, - "558": 2.4705493450164795, - "559": 2.496767520904541, - "560": 2.4527297019958496, - "561": 2.5173892974853516, - "562": 2.495147705078125, - "563": 2.5012295246124268, - "564": 2.519364833831787, - "565": 2.506685733795166, - "566": 2.566556930541992, - "567": 2.5751850605010986, - "568": 2.692901611328125, - "569": 2.7047135829925537, - "570": 2.615813732147217, - "571": 2.5705904960632324, - "572": 2.496788263320923, - "573": 2.4712114334106445, - "574": 2.45900821685791, - "575": 2.45928955078125, - "576": 2.468764305114746, - "577": 2.4921658039093018, - "578": 2.4838056564331055, - "579": 2.4985921382904053, - "580": 2.4581851959228516, - "581": 2.4484689235687256, - "582": 2.452770709991455, - "583": 2.430497169494629, - "584": 2.4521737098693848, - "585": 2.437474250793457, - "586": 2.4307210445404053, - "587": 2.425107955932617, - "588": 2.42672061920166, - "589": 2.416019916534424, - "590": 2.4553542137145996, - "591": 2.4518163204193115, - "592": 2.4379661083221436, - "593": 2.434680461883545 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "train_epoch_time": 4.79363226890564, - "train_loss": 2.4340638096397544, - "train_score": 0.2910083392600247, - "val_loss": 2.470148722969432, - "val_score": 0.28113788010469126 - }, - { - "epoch": 11, - "grad_norm": 1.2460404634475708, - "learning_rate": 0.1, - "model_norm": 87.57955932617188, - "step_logs": { - "grad_norm": { - "594": 1.190887451171875, - "595": 1.1612094640731812, - "596": 1.2796485424041748, - "597": 1.2939462661743164, - "598": 1.352797508239746, - "599": 1.4436309337615967, - "600": 1.4838576316833496, - "601": 1.5455995798110962, - "602": 1.4773309230804443, - "603": 1.3189791440963745, - "604": 1.2051525115966797, - "605": 1.1795378923416138, - "606": 1.2116870880126953, - "607": 1.3571536540985107, - "608": 1.322058081626892, - "609": 1.2179083824157715, - "610": 1.2482426166534424, - "611": 1.2081761360168457, - "612": 1.213883876800537, - "613": 1.1586085557937622, - "614": 1.2170231342315674, - "615": 1.2826834917068481, - "616": 1.1640968322753906, - "617": 1.155636191368103, - "618": 1.2509428262710571, - "619": 1.3532533645629883, - "620": 1.4139662981033325, - "621": 1.4417206048965454, - "622": 1.4218956232070923, - "623": 1.428883671760559, - "624": 1.3710283041000366, - "625": 1.2938613891601562, - "626": 1.247063398361206, - "627": 1.147965908050537, - "628": 1.270517110824585, - "629": 1.5310883522033691, - "630": 1.7752671241760254, - "631": 1.6297425031661987, - "632": 1.3927663564682007, - "633": 1.4218151569366455, - "634": 1.4464086294174194, - "635": 1.5107020139694214, - "636": 1.4589729309082031, - "637": 1.3154953718185425, - "638": 1.2757622003555298, - "639": 1.2317240238189697, - "640": 1.1954832077026367, - "641": 1.295788049697876, - "642": 1.3872429132461548, - "643": 1.448379635810852, - "644": 1.6278018951416016, - "645": 1.6655741930007935, - "646": 1.441634178161621, - "647": 1.2460404634475708 - }, - "loss": { - "594": 2.431124210357666, - "595": 2.4516854286193848, - "596": 2.437966823577881, - "597": 2.416505813598633, - "598": 2.426060676574707, - "599": 2.4314777851104736, - "600": 2.4269022941589355, - "601": 2.455850124359131, - "602": 2.4542617797851562, - "603": 2.4437007904052734, - "604": 2.407693386077881, - "605": 2.4272193908691406, - "606": 2.4195547103881836, - "607": 2.4127767086029053, - "608": 2.4124574661254883, - "609": 2.4287705421447754, - "610": 2.426764726638794, - "611": 2.397266387939453, - "612": 2.4216463565826416, - "613": 2.396771192550659, - "614": 2.4175186157226562, - "615": 2.4226953983306885, - "616": 2.4081461429595947, - "617": 2.422806739807129, - "618": 2.4365670680999756, - "619": 2.426455020904541, - "620": 2.420490264892578, - "621": 2.4348621368408203, - "622": 2.4125428199768066, - "623": 2.4133524894714355, - "624": 2.4224588871002197, - "625": 2.3952183723449707, - "626": 2.3972959518432617, - "627": 2.417902946472168, - "628": 2.3964099884033203, - "629": 2.4017157554626465, - "630": 2.4101107120513916, - "631": 2.4423046112060547, - "632": 2.3953487873077393, - "633": 2.4087698459625244, - "634": 2.3892393112182617, - "635": 2.3853797912597656, - "636": 2.418905735015869, - "637": 2.4223732948303223, - "638": 2.403280019760132, - "639": 2.3609299659729004, - "640": 2.365952968597412, - "641": 2.396477460861206, - "642": 2.3863439559936523, - "643": 2.4122841358184814, - "644": 2.4080750942230225, - "645": 2.4373939037323, - "646": 2.4049220085144043, - "647": 2.3869943618774414 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "train_epoch_time": 4.793908596038818, - "train_loss": 2.37648544325206, - "train_score": 0.30083729369910583, - "val_loss": 2.414509936397303, - "val_score": 0.2929777196778496 - }, - { - "epoch": 12, - "grad_norm": 0.874882161617279, - "learning_rate": 0.1, - "model_norm": 87.59091186523438, - "step_logs": { - "grad_norm": { - "648": 1.2153477668762207, - "649": 1.1675283908843994, - "650": 1.049970030784607, - "651": 1.0972448587417603, - "652": 1.0681356191635132, - "653": 0.9281523823738098, - "654": 0.9574427008628845, - "655": 1.0146082639694214, - "656": 1.003524899482727, - "657": 0.9913014769554138, - "658": 0.9196563363075256, - "659": 0.9900217652320862, - "660": 1.0558892488479614, - "661": 1.1580747365951538, - "662": 1.4004088640213013, - "663": 1.4685490131378174, - "664": 1.2901968955993652, - "665": 1.193713665008545, - "666": 1.1283073425292969, - "667": 0.9713377952575684, - "668": 0.9379022121429443, - "669": 0.9284307360649109, - "670": 0.934114396572113, - "671": 0.8895021080970764, - "672": 0.8586216568946838, - "673": 0.8883331418037415, - "674": 0.8018227815628052, - "675": 0.8049211502075195, - "676": 0.8063657879829407, - "677": 0.7673876285552979, - "678": 0.8404936790466309, - "679": 0.8184280395507812, - "680": 0.7469046115875244, - "681": 0.7634561657905579, - "682": 0.8151198625564575, - "683": 0.8497288227081299, - "684": 0.8237655162811279, - "685": 0.8332400918006897, - "686": 0.7675581574440002, - "687": 0.7410281896591187, - "688": 0.7786405086517334, - "689": 0.7372876405715942, - "690": 0.6957602500915527, - "691": 0.6824859380722046, - "692": 0.6738407015800476, - "693": 0.6567220687866211, - "694": 0.6754260659217834, - "695": 0.7066612839698792, - "696": 0.7177454233169556, - "697": 0.6948050856590271, - "698": 0.8061345219612122, - "699": 0.9835711121559143, - "700": 0.930682897567749, - "701": 0.874882161617279 - }, - "loss": { - "648": 2.376716136932373, - "649": 2.3776378631591797, - "650": 2.343829870223999, - "651": 2.3607192039489746, - "652": 2.381183385848999, - "653": 2.360600233078003, - "654": 2.3540377616882324, - "655": 2.369310140609741, - "656": 2.362597703933716, - "657": 2.355721950531006, - "658": 2.3551576137542725, - "659": 2.363320827484131, - "660": 2.359642505645752, - "661": 2.342151165008545, - "662": 2.3799564838409424, - "663": 2.406167984008789, - "664": 2.3755650520324707, - "665": 2.3758602142333984, - "666": 2.3462915420532227, - "667": 2.3359925746917725, - "668": 2.346792221069336, - "669": 2.3401670455932617, - "670": 2.3435280323028564, - "671": 2.3481383323669434, - "672": 2.3416247367858887, - "673": 2.3271641731262207, - "674": 2.3429627418518066, - "675": 2.3368120193481445, - "676": 2.3182358741760254, - "677": 2.3329615592956543, - "678": 2.3190741539001465, - "679": 2.333871364593506, - "680": 2.3324553966522217, - "681": 2.3340964317321777, - "682": 2.3210675716400146, - "683": 2.3353805541992188, - "684": 2.3347253799438477, - "685": 2.3267807960510254, - "686": 2.341536045074463, - "687": 2.351691961288452, - "688": 2.3285975456237793, - "689": 2.310883045196533, - "690": 2.325793504714966, - "691": 2.3111751079559326, - "692": 2.2995662689208984, - "693": 2.2912259101867676, - "694": 2.313803195953369, - "695": 2.333242893218994, - "696": 2.3076462745666504, - "697": 2.3276991844177246, - "698": 2.299652576446533, - "699": 2.330610752105713, - "700": 2.3376986980438232, - "701": 2.31112003326416 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "train_epoch_time": 4.79313063621521, - "train_loss": 2.315652990546425, - "train_score": 0.3212697273090888, - "val_loss": 2.3595519397069884, - "val_score": 0.3076609143159693 - }, - { - "epoch": 13, - "grad_norm": 0.5055509209632874, - "learning_rate": 0.06666666666666668, - "model_norm": 87.5983657836914, - "step_logs": { - "grad_norm": { - "702": 0.873421847820282, - "703": 0.8787020444869995, - "704": 0.8275917172431946, - "705": 0.6600570678710938, - "706": 0.6477454900741577, - "707": 0.7042960524559021, - "708": 0.7077363133430481, - "709": 0.6466545462608337, - "710": 0.5727100372314453, - "711": 0.5508549213409424, - "712": 0.5574941039085388, - "713": 0.612697958946228, - "714": 0.6018780469894409, - "715": 0.6349993944168091, - "716": 0.627842366695404, - "717": 0.5532848238945007, - "718": 0.5407602190971375, - "719": 0.5293595194816589, - "720": 0.5181177258491516, - "721": 0.49682825803756714, - "722": 0.5690038204193115, - "723": 0.56830894947052, - "724": 0.6396275162696838, - "725": 0.6821790933609009, - "726": 0.6260387301445007, - "727": 0.5721016526222229, - "728": 0.5962054133415222, - "729": 0.6370568871498108, - "730": 0.5463374853134155, - "731": 0.5727306008338928, - "732": 0.5895899534225464, - "733": 0.6114787459373474, - "734": 0.5390244126319885, - "735": 0.47865039110183716, - "736": 0.5278720259666443, - "737": 0.49008116126060486, - "738": 0.48022207617759705, - "739": 0.5447253584861755, - "740": 0.4932536780834198, - "741": 0.4671356678009033, - "742": 0.41701987385749817, - "743": 0.4587652385234833, - "744": 0.47394877672195435, - "745": 0.42169538140296936, - "746": 0.4012998342514038, - "747": 0.4062337577342987, - "748": 0.4478924870491028, - "749": 0.5040670037269592, - "750": 0.4971032440662384, - "751": 0.4145593047142029, - "752": 0.46045157313346863, - "753": 0.4367998242378235, - "754": 0.4862283170223236, - "755": 0.5055509209632874 - }, - "loss": { - "702": 2.3045897483825684, - "703": 2.3064589500427246, - "704": 2.3070566654205322, - "705": 2.2990026473999023, - "706": 2.318958282470703, - "707": 2.3057384490966797, - "708": 2.321763753890991, - "709": 2.3071179389953613, - "710": 2.312420606613159, - "711": 2.3025388717651367, - "712": 2.303982973098755, - "713": 2.3164925575256348, - "714": 2.3022115230560303, - "715": 2.316880941390991, - "716": 2.2946736812591553, - "717": 2.3245089054107666, - "718": 2.312926769256592, - "719": 2.2961387634277344, - "720": 2.2904229164123535, - "721": 2.298205852508545, - "722": 2.3078927993774414, - "723": 2.299222469329834, - "724": 2.288508415222168, - "725": 2.2892141342163086, - "726": 2.3007991313934326, - "727": 2.27486515045166, - "728": 2.288325786590576, - "729": 2.300913095474243, - "730": 2.3070125579833984, - "731": 2.286262035369873, - "732": 2.2943308353424072, - "733": 2.295842170715332, - "734": 2.2783195972442627, - "735": 2.2907257080078125, - "736": 2.297694683074951, - "737": 2.2753493785858154, - "738": 2.2839298248291016, - "739": 2.2937827110290527, - "740": 2.2947144508361816, - "741": 2.283738613128662, - "742": 2.3150370121002197, - "743": 2.2977237701416016, - "744": 2.2905197143554688, - "745": 2.299306631088257, - "746": 2.277128219604492, - "747": 2.2901813983917236, - "748": 2.2748513221740723, - "749": 2.255838632583618, - "750": 2.2761495113372803, - "751": 2.2779102325439453, - "752": 2.270442008972168, - "753": 2.2821240425109863, - "754": 2.2740554809570312, - "755": 2.277127981185913 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "train_epoch_time": 4.793538808822632, - "train_loss": 2.281302854491443, - "train_score": 0.32983545558353405, - "val_loss": 2.326994259787208, - "val_score": 0.31552274636368255 - }, - { - "epoch": 14, - "grad_norm": 0.4144708216190338, - "learning_rate": 0.03333333333333334, - "model_norm": 87.60089874267578, - "step_logs": { - "grad_norm": { - "756": 0.4931719899177551, - "757": 0.4075883626937866, - "758": 0.41429805755615234, - "759": 0.4326213002204895, - "760": 0.4707561731338501, - "761": 0.3977644145488739, - "762": 0.4253990948200226, - "763": 0.44635725021362305, - "764": 0.46539491415023804, - "765": 0.47820448875427246, - "766": 0.47383829951286316, - "767": 0.4402104318141937, - "768": 0.47765982151031494, - "769": 0.42489269375801086, - "770": 0.4826014041900635, - "771": 0.43354299664497375, - "772": 0.43740114569664, - "773": 0.4561915993690491, - "774": 0.4442673325538635, - "775": 0.4328565001487732, - "776": 0.43971607089042664, - "777": 0.42531150579452515, - "778": 0.4106575548648834, - "779": 0.39637649059295654, - "780": 0.40828216075897217, - "781": 0.4233377277851105, - "782": 0.4745389521121979, - "783": 0.5316278338432312, - "784": 0.41087356209754944, - "785": 0.4421166479587555, - "786": 0.4266990125179291, - "787": 0.43108993768692017, - "788": 0.45749399065971375, - "789": 0.4333341121673584, - "790": 0.4311707615852356, - "791": 0.3969404995441437, - "792": 0.4155759811401367, - "793": 0.3905121684074402, - "794": 0.4371906518936157, - "795": 0.42071861028671265, - "796": 0.44041207432746887, - "797": 0.39641088247299194, - "798": 0.46034425497055054, - "799": 0.4287641942501068, - "800": 0.36320623755455017, - "801": 0.41520726680755615, - "802": 0.44111791253089905, - "803": 0.4099193811416626, - "804": 0.4446238875389099, - "805": 0.4684411585330963, - "806": 0.3893916606903076, - "807": 0.4585827589035034, - "808": 0.4609387218952179, - "809": 0.4144708216190338 - }, - "loss": { - "756": 2.285633087158203, - "757": 2.2859766483306885, - "758": 2.2913501262664795, - "759": 2.2818572521209717, - "760": 2.2529425621032715, - "761": 2.285468578338623, - "762": 2.2915501594543457, - "763": 2.2674098014831543, - "764": 2.2713334560394287, - "765": 2.2697865962982178, - "766": 2.290040969848633, - "767": 2.2831172943115234, - "768": 2.294541597366333, - "769": 2.2672605514526367, - "770": 2.2870659828186035, - "771": 2.264914035797119, - "772": 2.2836861610412598, - "773": 2.283172130584717, - "774": 2.301900863647461, - "775": 2.2733378410339355, - "776": 2.2623291015625, - "777": 2.2673239707946777, - "778": 2.2862801551818848, - "779": 2.2801761627197266, - "780": 2.28262996673584, - "781": 2.272077798843384, - "782": 2.261648654937744, - "783": 2.2838706970214844, - "784": 2.2722668647766113, - "785": 2.277329921722412, - "786": 2.264673948287964, - "787": 2.2855968475341797, - "788": 2.2744250297546387, - "789": 2.290834665298462, - "790": 2.260718822479248, - "791": 2.2834455966949463, - "792": 2.266693115234375, - "793": 2.277648448944092, - "794": 2.2884092330932617, - "795": 2.26765513420105, - "796": 2.2677550315856934, - "797": 2.2779088020324707, - "798": 2.2651021480560303, - "799": 2.271761178970337, - "800": 2.257920265197754, - "801": 2.266298532485962, - "802": 2.263610601425171, - "803": 2.2619476318359375, - "804": 2.2639989852905273, - "805": 2.2840189933776855, - "806": 2.273719072341919, - "807": 2.254319667816162, - "808": 2.2900280952453613, - "809": 2.2776894569396973 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "train_epoch_time": 4.7934250831604, - "train_loss": 2.2720030760662455, - "train_score": 0.33221171087821894, - "val_loss": 2.318394170426063, - "val_score": 0.3180745909531546 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:23:07.624259", - "final_model_norm": 87.60089874267578, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:21:24.375213", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 10.934322357177734, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.43321990966797, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 8.455106735229492, - "3": 5.430062770843506, - "4": 5.646889686584473, - "5": 11.033295631408691, - "6": 19.40323257446289, - "7": 7.685751914978027, - "8": 4.723723888397217, - "9": 2.782180070877075, - "10": 2.4882020950317383, - "11": 3.0325138568878174, - "12": 4.416697978973389, - "13": 13.23110580444336, - "14": 3.7264244556427, - "15": 6.371815204620361, - "16": 7.2332377433776855, - "17": 5.820730686187744, - "18": 3.617907762527466, - "19": 7.497646808624268, - "20": 2.679824113845825, - "21": 3.7948453426361084, - "22": 22.81573486328125, - "23": 4.701910495758057, - "24": 17.263755798339844, - "25": 18.11946678161621, - "26": 14.757731437683105, - "27": 7.984584808349609, - "28": 4.494190692901611, - "29": 3.9699289798736572, - "30": 2.19315505027771, - "31": 2.6492249965667725, - "32": 5.787468433380127, - "33": 3.5560007095336914, - "34": 3.076932191848755, - "35": 5.001994609832764, - "36": 6.565011978149414, - "37": 3.4447667598724365, - "38": 7.241092681884766, - "39": 5.51541805267334, - "40": 6.2375102043151855, - "41": 16.69146728515625, - "42": 6.195662498474121, - "43": 6.505462169647217, - "44": 9.777633666992188, - "45": 5.433412075042725, - "46": 4.207071781158447, - "47": 8.236141204833984, - "48": 4.265207290649414, - "49": 3.7269554138183594, - "50": 1.688579797744751, - "51": 2.1194658279418945, - "52": 3.15309476852417, - "53": 10.934322357177734 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.944894552230835, - "3": 3.726104974746704, - "4": 3.616518020629883, - "5": 3.6710803508758545, - "6": 4.058044910430908, - "7": 4.218535423278809, - "8": 3.69265079498291, - "9": 3.4404852390289307, - "10": 3.4065136909484863, - "11": 3.3600831031799316, - "12": 3.3283584117889404, - "13": 3.448434352874756, - "14": 3.412850856781006, - "15": 3.4513070583343506, - "16": 3.5203781127929688, - "17": 3.7609071731567383, - "18": 3.273527145385742, - "19": 3.076810836791992, - "20": 3.258510112762451, - "21": 3.120025157928467, - "22": 3.4057347774505615, - "23": 3.2594664096832275, - "24": 6.348177909851074, - "25": 5.118218421936035, - "26": 4.498443603515625, - "27": 4.378347873687744, - "28": 4.566770553588867, - "29": 3.8768372535705566, - "30": 3.3816871643066406, - "31": 3.405897617340088, - "32": 3.4564743041992188, - "33": 3.9595227241516113, - "34": 3.3077290058135986, - "35": 3.292396068572998, - "36": 3.7437429428100586, - "37": 3.428488254547119, - "38": 3.564821481704712, - "39": 4.445432186126709, - "40": 3.9705464839935303, - "41": 5.185230731964111, - "42": 3.322171211242676, - "43": 4.240131855010986, - "44": 4.554407119750977, - "45": 5.6574811935424805, - "46": 4.055406093597412, - "47": 3.8916234970092773, - "48": 5.238371849060059, - "49": 4.262404918670654, - "50": 3.371361255645752, - "51": 3.319608211517334, - "52": 3.4928855895996094, - "53": 4.050593376159668 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "train_epoch_time": 4.795361518859863, - "train_loss": 5.793298119554561, - "train_score": 0.0842853300091697, - "val_loss": 5.795012561653566, - "val_score": 0.07970364541499678 - }, - { - "epoch": 1, - "grad_norm": 2.271104097366333, - "learning_rate": 0.1, - "model_norm": 87.44066619873047, - "step_logs": { - "grad_norm": { - "54": 7.6241044998168945, - "55": 18.086820602416992, - "56": 13.76170825958252, - "57": 12.195931434631348, - "58": 7.707003116607666, - "59": 7.350017547607422, - "60": 4.068539142608643, - "61": 2.0567800998687744, - "62": 1.7179681062698364, - "63": 1.5987063646316528, - "64": 1.7728155851364136, - "65": 1.6580462455749512, - "66": 2.2254655361175537, - "67": 2.6681747436523438, - "68": 4.53953742980957, - "69": 2.4346330165863037, - "70": 2.5816047191619873, - "71": 1.6540542840957642, - "72": 1.8485995531082153, - "73": 2.111536741256714, - "74": 2.41682767868042, - "75": 3.788759708404541, - "76": 2.339066743850708, - "77": 2.0891082286834717, - "78": 7.765440464019775, - "79": 2.2957189083099365, - "80": 2.1013708114624023, - "81": 1.9216939210891724, - "82": 1.3233511447906494, - "83": 1.3147692680358887, - "84": 1.985363483428955, - "85": 1.7441880702972412, - "86": 1.4367233514785767, - "87": 1.473137617111206, - "88": 1.9445949792861938, - "89": 3.4859619140625, - "90": 2.658531904220581, - "91": 1.740609049797058, - "92": 1.3440288305282593, - "93": 1.3734662532806396, - "94": 1.9076367616653442, - "95": 4.12608528137207, - "96": 3.1233537197113037, - "97": 2.544762372970581, - "98": 1.650414228439331, - "99": 1.5785913467407227, - "100": 2.0071909427642822, - "101": 2.1086478233337402, - "102": 2.614389419555664, - "103": 2.1235971450805664, - "104": 1.0054165124893188, - "105": 1.140705943107605, - "106": 1.9072152376174927, - "107": 2.271104097366333 - }, - "loss": { - "54": 5.75114631652832, - "55": 7.560795307159424, - "56": 5.97125244140625, - "57": 5.015491008758545, - "58": 4.765529155731201, - "59": 5.059174537658691, - "60": 4.556539535522461, - "61": 4.199892044067383, - "62": 3.8614931106567383, - "63": 3.665408134460449, - "64": 3.421212673187256, - "65": 3.278885841369629, - "66": 3.2003910541534424, - "67": 3.2887396812438965, - "68": 3.3668746948242188, - "69": 3.6330623626708984, - "70": 3.3452088832855225, - "71": 3.1023542881011963, - "72": 3.0678114891052246, - "73": 3.1246538162231445, - "74": 3.133157253265381, - "75": 3.2065348625183105, - "76": 3.39947772026062, - "77": 3.0320286750793457, - "78": 3.203878402709961, - "79": 3.2980666160583496, - "80": 3.0177414417266846, - "81": 3.018956184387207, - "82": 2.902022361755371, - "83": 2.8780031204223633, - "84": 2.890287399291992, - "85": 2.9209518432617188, - "86": 2.8200201988220215, - "87": 2.799708843231201, - "88": 2.807431936264038, - "89": 2.9304189682006836, - "90": 3.1970624923706055, - "91": 2.8154075145721436, - "92": 2.774245262145996, - "93": 2.7271995544433594, - "94": 2.782083034515381, - "95": 2.91900372505188, - "96": 3.2765560150146484, - "97": 2.9599997997283936, - "98": 2.764235496520996, - "99": 2.76535701751709, - "100": 2.7487878799438477, - "101": 2.838395357131958, - "102": 2.7675676345825195, - "103": 2.905409097671509, - "104": 2.6871585845947266, - "105": 2.6786069869995117, - "106": 2.6857080459594727, - "107": 2.813164234161377 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "train_epoch_time": 4.793367624282837, - "train_loss": 2.7579970372800675, - "train_score": 0.22975026898438825, - "val_loss": 2.770692006865544, - "val_score": 0.2255220296017475 - }, - { - "epoch": 2, - "grad_norm": 1.6037001609802246, - "learning_rate": 0.1, - "model_norm": 87.44985961914062, - "step_logs": { - "grad_norm": { - "108": 2.205274820327759, - "109": 1.9024920463562012, - "110": 1.8349051475524902, - "111": 1.9209145307540894, - "112": 1.98993980884552, - "113": 1.854478359222412, - "114": 1.439594030380249, - "115": 1.5765751600265503, - "116": 2.0506632328033447, - "117": 1.853440284729004, - "118": 1.4193261861801147, - "119": 1.6544969081878662, - "120": 2.240386962890625, - "121": 1.8756539821624756, - "122": 1.1770734786987305, - "123": 1.2625330686569214, - "124": 1.6702266931533813, - "125": 1.7356467247009277, - "126": 1.6991575956344604, - "127": 1.6498024463653564, - "128": 1.5905519723892212, - "129": 1.4519444704055786, - "130": 1.3913549184799194, - "131": 1.6237090826034546, - "132": 1.873119592666626, - "133": 1.8221279382705688, - "134": 1.51943039894104, - "135": 1.5637608766555786, - "136": 1.9931915998458862, - "137": 1.919577717781067, - "138": 1.651701807975769, - "139": 1.6597294807434082, - "140": 1.6811857223510742, - "141": 1.6458594799041748, - "142": 1.5167800188064575, - "143": 1.4958332777023315, - "144": 1.637245774269104, - "145": 1.579387903213501, - "146": 1.3156354427337646, - "147": 1.2954654693603516, - "148": 1.374786376953125, - "149": 1.4277857542037964, - "150": 1.3610658645629883, - "151": 1.325801968574524, - "152": 1.6275372505187988, - "153": 1.5998854637145996, - "154": 1.5871320962905884, - "155": 1.7424808740615845, - "156": 1.5622713565826416, - "157": 1.4811228513717651, - "158": 1.6517689228057861, - "159": 1.603561520576477, - "160": 1.6064814329147339, - "161": 1.6037001609802246 - }, - "loss": { - "108": 2.7527647018432617, - "109": 2.7791929244995117, - "110": 2.7051548957824707, - "111": 2.7681171894073486, - "112": 2.682340145111084, - "113": 2.7631428241729736, - "114": 2.6718995571136475, - "115": 2.694222927093506, - "116": 2.720673084259033, - "117": 2.756666660308838, - "118": 2.650331974029541, - "119": 2.6732842922210693, - "120": 2.6883387565612793, - "121": 2.768552780151367, - "122": 2.6281371116638184, - "123": 2.6367087364196777, - "124": 2.6483657360076904, - "125": 2.696963310241699, - "126": 2.6594419479370117, - "127": 2.6907296180725098, - "128": 2.646522045135498, - "129": 2.693516731262207, - "130": 2.613452434539795, - "131": 2.653848648071289, - "132": 2.6505908966064453, - "133": 2.6901187896728516, - "134": 2.6408727169036865, - "135": 2.607588291168213, - "136": 2.649486541748047, - "137": 2.699981689453125, - "138": 2.62245512008667, - "139": 2.6442720890045166, - "140": 2.6174240112304688, - "141": 2.6557726860046387, - "142": 2.617366313934326, - "143": 2.637457847595215, - "144": 2.5964584350585938, - "145": 2.6647820472717285, - "146": 2.6050243377685547, - "147": 2.6010234355926514, - "148": 2.5798959732055664, - "149": 2.6097328662872314, - "150": 2.5874581336975098, - "151": 2.595344305038452, - "152": 2.58966064453125, - "153": 2.6383798122406006, - "154": 2.598228931427002, - "155": 2.6442062854766846, - "156": 2.6013989448547363, - "157": 2.601226806640625, - "158": 2.5805084705352783, - "159": 2.6327877044677734, - "160": 2.5750956535339355, - "161": 2.6411633491516113 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "train_epoch_time": 4.793071031570435, - "train_loss": 2.581822080420627, - "train_score": 0.2562141319301241, - "val_loss": 2.606259554042882, - "val_score": 0.249811638647051 - }, - { - "epoch": 3, - "grad_norm": 1.315682291984558, - "learning_rate": 0.1, - "model_norm": 87.45896911621094, - "step_logs": { - "grad_norm": { - "162": 1.57014000415802, - "163": 1.5652203559875488, - "164": 1.3761399984359741, - "165": 1.378050446510315, - "166": 1.3502795696258545, - "167": 1.3488165140151978, - "168": 1.4270493984222412, - "169": 1.3370720148086548, - "170": 1.194875717163086, - "171": 1.1860826015472412, - "172": 1.2140578031539917, - "173": 1.1944050788879395, - "174": 1.2293858528137207, - "175": 1.3145146369934082, - "176": 1.4801857471466064, - "177": 1.4290152788162231, - "178": 1.4381383657455444, - "179": 1.5648773908615112, - "180": 1.621522068977356, - "181": 1.6166753768920898, - "182": 1.5579445362091064, - "183": 1.418610692024231, - "184": 1.2966928482055664, - "185": 1.329863429069519, - "186": 1.3860971927642822, - "187": 1.3883217573165894, - "188": 1.4934109449386597, - "189": 1.4227187633514404, - "190": 1.268064022064209, - "191": 1.3188798427581787, - "192": 1.4236650466918945, - "193": 1.3837710618972778, - "194": 1.2826850414276123, - "195": 1.3714286088943481, - "196": 1.4355472326278687, - "197": 1.3856053352355957, - "198": 1.4145541191101074, - "199": 1.3343926668167114, - "200": 1.127225637435913, - "201": 1.1470998525619507, - "202": 1.208306908607483, - "203": 1.22909414768219, - "204": 1.2772661447525024, - "205": 1.2968382835388184, - "206": 1.251835584640503, - "207": 1.3025931119918823, - "208": 1.4895981550216675, - "209": 1.4703450202941895, - "210": 1.4638627767562866, - "211": 1.378810167312622, - "212": 1.2432467937469482, - "213": 1.2543805837631226, - "214": 1.2786723375320435, - "215": 1.315682291984558 - }, - "loss": { - "162": 2.587583065032959, - "163": 2.6200835704803467, - "164": 2.585965156555176, - "165": 2.6061301231384277, - "166": 2.5670583248138428, - "167": 2.565309524536133, - "168": 2.5831704139709473, - "169": 2.610398292541504, - "170": 2.5702619552612305, - "171": 2.5738534927368164, - "172": 2.5414133071899414, - "173": 2.587667226791382, - "174": 2.555570363998413, - "175": 2.583132743835449, - "176": 2.550055503845215, - "177": 2.6150519847869873, - "178": 2.5643064975738525, - "179": 2.5992491245269775, - "180": 2.584167003631592, - "181": 2.623523235321045, - "182": 2.5763607025146484, - "183": 2.587963342666626, - "184": 2.56569766998291, - "185": 2.5684685707092285, - "186": 2.5555853843688965, - "187": 2.572157382965088, - "188": 2.5411853790283203, - "189": 2.591902256011963, - "190": 2.5444412231445312, - "191": 2.5447676181793213, - "192": 2.5466439723968506, - "193": 2.569901943206787, - "194": 2.543004035949707, - "195": 2.573594570159912, - "196": 2.5569801330566406, - "197": 2.5572328567504883, - "198": 2.5544891357421875, - "199": 2.570556163787842, - "200": 2.538686752319336, - "201": 2.5396692752838135, - "202": 2.537353754043579, - "203": 2.5404670238494873, - "204": 2.525038957595825, - "205": 2.542437791824341, - "206": 2.5132079124450684, - "207": 2.5575854778289795, - "208": 2.5400819778442383, - "209": 2.5585079193115234, - "210": 2.547358512878418, - "211": 2.5636322498321533, - "212": 2.5159521102905273, - "213": 2.527498722076416, - "214": 2.5264225006103516, - "215": 2.547900438308716 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "train_epoch_time": 4.793066024780273, - "train_loss": 2.5332912087269457, - "train_score": 0.25806693875977776, - "val_loss": 2.568290614917549, - "val_score": 0.252722265953722 - }, - { - "epoch": 4, - "grad_norm": 1.255943775177002, - "learning_rate": 0.1, - "model_norm": 87.46749114990234, - "step_logs": { - "grad_norm": { - "216": 1.3211983442306519, - "217": 1.3779386281967163, - "218": 1.6283546686172485, - "219": 1.542668342590332, - "220": 1.4222657680511475, - "221": 1.4642791748046875, - "222": 1.5152777433395386, - "223": 1.455426812171936, - "224": 1.4295985698699951, - "225": 1.4423514604568481, - "226": 1.4302033185958862, - "227": 1.4385539293289185, - "228": 1.4562808275222778, - "229": 1.4059540033340454, - "230": 1.3763949871063232, - "231": 1.45053231716156, - "232": 1.4418200254440308, - "233": 1.3518669605255127, - "234": 1.2135918140411377, - "235": 1.1925864219665527, - "236": 1.1781482696533203, - "237": 1.1683781147003174, - "238": 1.198647379875183, - "239": 1.187096118927002, - "240": 1.2433829307556152, - "241": 1.2245243787765503, - "242": 1.1704925298690796, - "243": 1.2215750217437744, - "244": 1.3533116579055786, - "245": 1.2959398031234741, - "246": 1.1733379364013672, - "247": 1.1857608556747437, - "248": 1.2436304092407227, - "249": 1.3097114562988281, - "250": 1.4403356313705444, - "251": 1.3560868501663208, - "252": 1.2285547256469727, - "253": 1.276785135269165, - "254": 1.3266159296035767, - "255": 1.2754143476486206, - "256": 1.1954617500305176, - "257": 1.2000350952148438, - "258": 1.1608084440231323, - "259": 1.143524408340454, - "260": 1.2010358572006226, - "261": 1.3104660511016846, - "262": 1.3789137601852417, - "263": 1.456289291381836, - "264": 1.4257675409317017, - "265": 1.3827656507492065, - "266": 1.2667171955108643, - "267": 1.2337268590927124, - "268": 1.2437505722045898, - "269": 1.255943775177002 - }, - "loss": { - "216": 2.540203094482422, - "217": 2.5396881103515625, - "218": 2.5548245906829834, - "219": 2.5851268768310547, - "220": 2.5351104736328125, - "221": 2.5615248680114746, - "222": 2.5332982540130615, - "223": 2.569162607192993, - "224": 2.549081563949585, - "225": 2.5589795112609863, - "226": 2.5483968257904053, - "227": 2.540041923522949, - "228": 2.532780885696411, - "229": 2.554763078689575, - "230": 2.535845994949341, - "231": 2.532985210418701, - "232": 2.542069435119629, - "233": 2.5587151050567627, - "234": 2.498936653137207, - "235": 2.529637575149536, - "236": 2.525259494781494, - "237": 2.52756667137146, - "238": 2.5121688842773438, - "239": 2.5410871505737305, - "240": 2.5145249366760254, - "241": 2.524502754211426, - "242": 2.5309348106384277, - "243": 2.532170295715332, - "244": 2.5251142978668213, - "245": 2.543562412261963, - "246": 2.498774766921997, - "247": 2.524109125137329, - "248": 2.500328302383423, - "249": 2.5229434967041016, - "250": 2.511627197265625, - "251": 2.5283007621765137, - "252": 2.50056791305542, - "253": 2.5200109481811523, - "254": 2.507739305496216, - "255": 2.538595199584961, - "256": 2.499328374862671, - "257": 2.5182833671569824, - "258": 2.50455379486084, - "259": 2.5087757110595703, - "260": 2.4964256286621094, - "261": 2.5303449630737305, - "262": 2.5143609046936035, - "263": 2.5192484855651855, - "264": 2.519637107849121, - "265": 2.5299739837646484, - "266": 2.5209293365478516, - "267": 2.497617483139038, - "268": 2.499988555908203, - "269": 2.515929937362671 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "train_epoch_time": 4.793076515197754, - "train_loss": 2.5068613774807242, - "train_score": 0.26193059542599845, - "val_loss": 2.5377160761030617, - "val_score": 0.25469557316514024 - }, - { - "epoch": 5, - "grad_norm": 1.0183618068695068, - "learning_rate": 0.1, - "model_norm": 87.47647857666016, - "step_logs": { - "grad_norm": { - "270": 1.2888706922531128, - "271": 1.2905149459838867, - "272": 1.2208613157272339, - "273": 1.2000612020492554, - "274": 1.1423448324203491, - "275": 1.0955835580825806, - "276": 1.1377270221710205, - "277": 1.2265115976333618, - "278": 1.2998406887054443, - "279": 1.2332520484924316, - "280": 1.1454983949661255, - "281": 1.228863000869751, - "282": 1.4385524988174438, - "283": 1.4464181661605835, - "284": 1.1787866353988647, - "285": 1.0656853914260864, - "286": 1.1027541160583496, - "287": 1.1337908506393433, - "288": 1.2419637441635132, - "289": 1.2795246839523315, - "290": 1.3119382858276367, - "291": 1.3148417472839355, - "292": 1.2502707242965698, - "293": 1.2403854131698608, - "294": 1.2405940294265747, - "295": 1.2264312505722046, - "296": 1.2762929201126099, - "297": 1.2564655542373657, - "298": 1.2502623796463013, - "299": 1.182131052017212, - "300": 1.1872307062149048, - "301": 1.1584244966506958, - "302": 1.1509478092193604, - "303": 1.1777397394180298, - "304": 1.2179162502288818, - "305": 1.2852668762207031, - "306": 1.4277710914611816, - "307": 1.4504923820495605, - "308": 1.411252737045288, - "309": 1.5104649066925049, - "310": 1.4985687732696533, - "311": 1.4138259887695312, - "312": 1.1221050024032593, - "313": 1.0279879570007324, - "314": 1.0947867631912231, - "315": 1.0967639684677124, - "316": 1.1049673557281494, - "317": 1.1171448230743408, - "318": 1.1055734157562256, - "319": 1.1026877164840698, - "320": 1.1551287174224854, - "321": 1.1338034868240356, - "322": 1.0455329418182373, - "323": 1.0183618068695068 - }, - "loss": { - "270": 2.4969587326049805, - "271": 2.52333927154541, - "272": 2.5029678344726562, - "273": 2.524637222290039, - "274": 2.50390362739563, - "275": 2.500753164291382, - "276": 2.4904160499572754, - "277": 2.5181005001068115, - "278": 2.4987549781799316, - "279": 2.5213732719421387, - "280": 2.492311954498291, - "281": 2.514549732208252, - "282": 2.5207369327545166, - "283": 2.5474534034729004, - "284": 2.4867730140686035, - "285": 2.4976253509521484, - "286": 2.485607147216797, - "287": 2.4887099266052246, - "288": 2.4849839210510254, - "289": 2.5085396766662598, - "290": 2.5221471786499023, - "291": 2.508707046508789, - "292": 2.4703402519226074, - "293": 2.491426944732666, - "294": 2.488715171813965, - "295": 2.5134172439575195, - "296": 2.5023412704467773, - "297": 2.501113176345825, - "298": 2.5273776054382324, - "299": 2.4985909461975098, - "300": 2.4877607822418213, - "301": 2.499028444290161, - "302": 2.478795051574707, - "303": 2.516446113586426, - "304": 2.4692130088806152, - "305": 2.5044076442718506, - "306": 2.4741435050964355, - "307": 2.5357775688171387, - "308": 2.5134716033935547, - "309": 2.4990007877349854, - "310": 2.5130791664123535, - "311": 2.5232057571411133, - "312": 2.489683151245117, - "313": 2.4821839332580566, - "314": 2.4768636226654053, - "315": 2.492902994155884, - "316": 2.4939475059509277, - "317": 2.501979351043701, - "318": 2.4935226440429688, - "319": 2.491177797317505, - "320": 2.4911839962005615, - "321": 2.4907495975494385, - "322": 2.4883058071136475, - "323": 2.4766368865966797 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "train_epoch_time": 4.792854309082031, - "train_loss": 2.47357450776668, - "train_score": 0.2708919923907544, - "val_loss": 2.522531079643766, - "val_score": 0.2605213112658666 - }, - { - "epoch": 6, - "grad_norm": 1.3232122659683228, - "learning_rate": 0.1, - "model_norm": 87.48561096191406, - "step_logs": { - "grad_norm": { - "324": 1.0678391456604004, - "325": 1.0858285427093506, - "326": 1.1414928436279297, - "327": 1.0667126178741455, - "328": 0.9203104972839355, - "329": 0.9948205947875977, - "330": 1.0342060327529907, - "331": 1.042789340019226, - "332": 1.193058967590332, - "333": 1.26388680934906, - "334": 1.3626110553741455, - "335": 1.4746543169021606, - "336": 1.3356393575668335, - "337": 1.1898353099822998, - "338": 1.0583469867706299, - "339": 0.9968602657318115, - "340": 1.0603820085525513, - "341": 1.1961801052093506, - "342": 1.2735388278961182, - "343": 1.2184572219848633, - "344": 1.113504409790039, - "345": 1.0792896747589111, - "346": 1.1413064002990723, - "347": 1.1297205686569214, - "348": 1.1726166009902954, - "349": 1.2580716609954834, - "350": 1.2107887268066406, - "351": 1.2144241333007812, - "352": 1.2301344871520996, - "353": 1.2117292881011963, - "354": 1.2339168787002563, - "355": 1.1750441789627075, - "356": 1.1156620979309082, - "357": 1.152209758758545, - "358": 1.1627806425094604, - "359": 1.2509145736694336, - "360": 1.2125744819641113, - "361": 1.1409679651260376, - "362": 1.047756314277649, - "363": 1.0098140239715576, - "364": 0.9484673142433167, - "365": 0.9900673031806946, - "366": 1.0080896615982056, - "367": 1.075703740119934, - "368": 1.2004772424697876, - "369": 1.35337233543396, - "370": 1.3664360046386719, - "371": 1.485229730606079, - "372": 1.5003336668014526, - "373": 1.4629193544387817, - "374": 1.4541159868240356, - "375": 1.5671086311340332, - "376": 1.491943597793579, - "377": 1.3232122659683228 - }, - "loss": { - "324": 2.473811626434326, - "325": 2.481210708618164, - "326": 2.483485221862793, - "327": 2.490971565246582, - "328": 2.4594228267669678, - "329": 2.471400022506714, - "330": 2.4843342304229736, - "331": 2.4998817443847656, - "332": 2.4674744606018066, - "333": 2.4877407550811768, - "334": 2.4874391555786133, - "335": 2.4866015911102295, - "336": 2.4862873554229736, - "337": 2.4790971279144287, - "338": 2.4758708477020264, - "339": 2.4860565662384033, - "340": 2.471980571746826, - "341": 2.491982936859131, - "342": 2.4847707748413086, - "343": 2.4865832328796387, - "344": 2.4886763095855713, - "345": 2.4785633087158203, - "346": 2.4921278953552246, - "347": 2.475213050842285, - "348": 2.4717774391174316, - "349": 2.4874706268310547, - "350": 2.473141670227051, - "351": 2.4926629066467285, - "352": 2.4693734645843506, - "353": 2.470925807952881, - "354": 2.4947831630706787, - "355": 2.4795145988464355, - "356": 2.454616069793701, - "357": 2.4772300720214844, - "358": 2.4731054306030273, - "359": 2.4879140853881836, - "360": 2.471583843231201, - "361": 2.473353624343872, - "362": 2.462623119354248, - "363": 2.4717414379119873, - "364": 2.4528353214263916, - "365": 2.4707860946655273, - "366": 2.463679552078247, - "367": 2.465583086013794, - "368": 2.468745708465576, - "369": 2.4872140884399414, - "370": 2.486074924468994, - "371": 2.482853412628174, - "372": 2.5016226768493652, - "373": 2.473024368286133, - "374": 2.478849411010742, - "375": 2.512293815612793, - "376": 2.5045816898345947, - "377": 2.4941835403442383 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "train_epoch_time": 4.792715072631836, - "train_loss": 2.4831072682116604, - "train_score": 0.26030868902439025, - "val_loss": 2.5313797309122186, - "val_score": 0.25334116700185566 - }, - { - "epoch": 7, - "grad_norm": 1.5443918704986572, - "learning_rate": 0.1, - "model_norm": 87.49939727783203, - "step_logs": { - "grad_norm": { - "378": 1.3162884712219238, - "379": 1.2067433595657349, - "380": 1.1712816953659058, - "381": 1.1231498718261719, - "382": 1.2187566757202148, - "383": 1.237554669380188, - "384": 1.1868064403533936, - "385": 1.070710301399231, - "386": 1.0229023694992065, - "387": 1.0611571073532104, - "388": 1.0525788068771362, - "389": 1.016467571258545, - "390": 1.0857120752334595, - "391": 1.1862807273864746, - "392": 1.128239393234253, - "393": 1.1056164503097534, - "394": 1.1890345811843872, - "395": 1.272507905960083, - "396": 1.2444751262664795, - "397": 1.2104840278625488, - "398": 1.2719988822937012, - "399": 1.2732030153274536, - "400": 1.1537237167358398, - "401": 1.0243120193481445, - "402": 0.9922335147857666, - "403": 0.9944742321968079, - "404": 1.075513243675232, - "405": 1.200055480003357, - "406": 1.2577424049377441, - "407": 1.3575090169906616, - "408": 1.3999048471450806, - "409": 1.4631588459014893, - "410": 1.5729089975357056, - "411": 1.538995385169983, - "412": 1.3967065811157227, - "413": 1.1819331645965576, - "414": 1.031980276107788, - "415": 0.9106211066246033, - "416": 0.9348329901695251, - "417": 1.0886101722717285, - "418": 1.254741907119751, - "419": 1.6187708377838135, - "420": 1.7849868535995483, - "421": 1.2913455963134766, - "422": 0.9638165235519409, - "423": 0.939207911491394, - "424": 1.0187772512435913, - "425": 1.1491801738739014, - "426": 1.2341713905334473, - "427": 1.4123573303222656, - "428": 1.4255586862564087, - "429": 1.4977840185165405, - "430": 1.5621732473373413, - "431": 1.5443918704986572 - }, - "loss": { - "378": 2.4901351928710938, - "379": 2.4646198749542236, - "380": 2.4778575897216797, - "381": 2.469677209854126, - "382": 2.4916670322418213, - "383": 2.461810827255249, - "384": 2.4723567962646484, - "385": 2.45829176902771, - "386": 2.457439422607422, - "387": 2.4681992530822754, - "388": 2.4569380283355713, - "389": 2.460292339324951, - "390": 2.4383909702301025, - "391": 2.464303493499756, - "392": 2.470714569091797, - "393": 2.450083017349243, - "394": 2.4778363704681396, - "395": 2.454130172729492, - "396": 2.479257106781006, - "397": 2.446122646331787, - "398": 2.4874773025512695, - "399": 2.4751386642456055, - "400": 2.4647936820983887, - "401": 2.4522576332092285, - "402": 2.442202091217041, - "403": 2.4207894802093506, - "404": 2.443239688873291, - "405": 2.4430959224700928, - "406": 2.4443068504333496, - "407": 2.44044828414917, - "408": 2.4579362869262695, - "409": 2.4598844051361084, - "410": 2.487255334854126, - "411": 2.4754133224487305, - "412": 2.474102735519409, - "413": 2.442674398422241, - "414": 2.44766902923584, - "415": 2.423190116882324, - "416": 2.4155349731445312, - "417": 2.4214839935302734, - "418": 2.422520875930786, - "419": 2.441464424133301, - "420": 2.4788379669189453, - "421": 2.4609122276306152, - "422": 2.4330925941467285, - "423": 2.4144163131713867, - "424": 2.4115402698516846, - "425": 2.41140079498291, - "426": 2.431873321533203, - "427": 2.4388933181762695, - "428": 2.433537483215332, - "429": 2.4347527027130127, - "430": 2.4340856075286865, - "431": 2.4404280185699463 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "train_epoch_time": 4.792433261871338, - "train_loss": 2.441129124831606, - "train_score": 0.266009460092446, - "val_loss": 2.488670668289938, - "val_score": 0.25693796614418074 - }, - { - "epoch": 8, - "grad_norm": 1.1139146089553833, - "learning_rate": 0.1, - "model_norm": 87.51409149169922, - "step_logs": { - "grad_norm": { - "432": 1.4680956602096558, - "433": 1.3616770505905151, - "434": 1.3802167177200317, - "435": 1.3216320276260376, - "436": 1.3114744424819946, - "437": 1.3092608451843262, - "438": 1.2603095769882202, - "439": 1.228329062461853, - "440": 1.1659294366836548, - "441": 1.0559760332107544, - "442": 1.1393606662750244, - "443": 1.2201030254364014, - "444": 1.3578343391418457, - "445": 1.3618934154510498, - "446": 1.3702973127365112, - "447": 1.4715440273284912, - "448": 1.8345134258270264, - "449": 1.824266791343689, - "450": 1.6442898511886597, - "451": 1.4994401931762695, - "452": 1.2396150827407837, - "453": 1.2815325260162354, - "454": 1.4068149328231812, - "455": 1.4612693786621094, - "456": 1.4176119565963745, - "457": 1.2853500843048096, - "458": 1.1139631271362305, - "459": 1.110557198524475, - "460": 1.296318531036377, - "461": 1.2464932203292847, - "462": 1.0801687240600586, - "463": 1.0897548198699951, - "464": 1.2457433938980103, - "465": 1.220635175704956, - "466": 1.0722063779830933, - "467": 1.0892789363861084, - "468": 1.1401158571243286, - "469": 1.1175123453140259, - "470": 1.1574429273605347, - "471": 1.2341843843460083, - "472": 1.1629436016082764, - "473": 1.1734954118728638, - "474": 1.2790582180023193, - "475": 1.3268572092056274, - "476": 1.3662433624267578, - "477": 1.3509211540222168, - "478": 1.3142013549804688, - "479": 1.226273536682129, - "480": 1.1497875452041626, - "481": 1.2131487131118774, - "482": 1.2446680068969727, - "483": 1.229183316230774, - "484": 1.1781516075134277, - "485": 1.1139146089553833 - }, - "loss": { - "432": 2.4437899589538574, - "433": 2.424687385559082, - "434": 2.4247090816497803, - "435": 2.410892963409424, - "436": 2.421461820602417, - "437": 2.403177261352539, - "438": 2.393446445465088, - "439": 2.3813343048095703, - "440": 2.402475595474243, - "441": 2.38427734375, - "442": 2.3911595344543457, - "443": 2.381458282470703, - "444": 2.41695237159729, - "445": 2.382270097732544, - "446": 2.384190559387207, - "447": 2.401710033416748, - "448": 2.4225590229034424, - "449": 2.476780891418457, - "450": 2.424055814743042, - "451": 2.422682046890259, - "452": 2.3920207023620605, - "453": 2.4021143913269043, - "454": 2.383765697479248, - "455": 2.4077653884887695, - "456": 2.377314329147339, - "457": 2.380800485610962, - "458": 2.3656201362609863, - "459": 2.37318754196167, - "460": 2.369588851928711, - "461": 2.3998665809631348, - "462": 2.369065761566162, - "463": 2.3675756454467773, - "464": 2.356987476348877, - "465": 2.3940508365631104, - "466": 2.352421522140503, - "467": 2.3660664558410645, - "468": 2.373870372772217, - "469": 2.375763416290283, - "470": 2.3583180904388428, - "471": 2.4042859077453613, - "472": 2.3731842041015625, - "473": 2.3786895275115967, - "474": 2.3778910636901855, - "475": 2.380321502685547, - "476": 2.3639659881591797, - "477": 2.400440216064453, - "478": 2.3682382106781006, - "479": 2.3443045616149902, - "480": 2.3650808334350586, - "481": 2.353428840637207, - "482": 2.377392053604126, - "483": 2.3923966884613037, - "484": 2.367537021636963, - "485": 2.3758602142333984 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "train_epoch_time": 4.792607307434082, - "train_loss": 2.3498305032037763, - "train_score": 0.3071209200656568, - "val_loss": 2.394291750462266, - "val_score": 0.2975522028250207 - }, - { - "epoch": 9, - "grad_norm": 1.1305856704711914, - "learning_rate": 0.1, - "model_norm": 87.52873992919922, - "step_logs": { - "grad_norm": { - "486": 1.1678463220596313, - "487": 1.1428059339523315, - "488": 1.0742453336715698, - "489": 1.1098260879516602, - "490": 1.2321646213531494, - "491": 1.35171639919281, - "492": 1.4551805257797241, - "493": 1.3502675294876099, - "494": 1.155698537826538, - "495": 1.1915760040283203, - "496": 1.2877423763275146, - "497": 1.3450067043304443, - "498": 1.28325617313385, - "499": 1.1674617528915405, - "500": 1.1079142093658447, - "501": 1.1063071489334106, - "502": 1.0905908346176147, - "503": 1.164880394935608, - "504": 1.2494115829467773, - "505": 1.2591413259506226, - "506": 1.3494834899902344, - "507": 1.3859891891479492, - "508": 1.3739418983459473, - "509": 1.3578109741210938, - "510": 1.335060954093933, - "511": 1.3035852909088135, - "512": 1.2293521165847778, - "513": 1.2602473497390747, - "514": 1.3364858627319336, - "515": 1.342375636100769, - "516": 1.1868771314620972, - "517": 1.070024013519287, - "518": 1.0482691526412964, - "519": 1.089015245437622, - "520": 1.2586957216262817, - "521": 1.3905738592147827, - "522": 1.5018579959869385, - "523": 1.577476143836975, - "524": 1.4661107063293457, - "525": 1.3028267621994019, - "526": 1.1227914094924927, - "527": 1.041851282119751, - "528": 1.1096352338790894, - "529": 1.17355215549469, - "530": 1.167136311531067, - "531": 1.151084303855896, - "532": 1.0863591432571411, - "533": 1.0845279693603516, - "534": 1.1236761808395386, - "535": 1.1533441543579102, - "536": 1.123315453529358, - "537": 1.0647786855697632, - "538": 1.0899544954299927, - "539": 1.1305856704711914 - }, - "loss": { - "486": 2.3578438758850098, - "487": 2.3592309951782227, - "488": 2.3549351692199707, - "489": 2.3621273040771484, - "490": 2.34975004196167, - "491": 2.368321180343628, - "492": 2.3742294311523438, - "493": 2.383321762084961, - "494": 2.345890998840332, - "495": 2.3425047397613525, - "496": 2.355344772338867, - "497": 2.3632798194885254, - "498": 2.3482437133789062, - "499": 2.353148937225342, - "500": 2.348996162414551, - "501": 2.339188575744629, - "502": 2.323655605316162, - "503": 2.349989891052246, - "504": 2.3400676250457764, - "505": 2.351712226867676, - "506": 2.3578600883483887, - "507": 2.350525379180908, - "508": 2.3507909774780273, - "509": 2.35506010055542, - "510": 2.3364036083221436, - "511": 2.357384204864502, - "512": 2.3415093421936035, - "513": 2.354396343231201, - "514": 2.3620967864990234, - "515": 2.3538894653320312, - "516": 2.3279030323028564, - "517": 2.30672025680542, - "518": 2.32662296295166, - "519": 2.3261046409606934, - "520": 2.326359748840332, - "521": 2.333083152770996, - "522": 2.329730749130249, - "523": 2.346832275390625, - "524": 2.361660957336426, - "525": 2.361846446990967, - "526": 2.3219640254974365, - "527": 2.3159098625183105, - "528": 2.334743022918701, - "529": 2.3389053344726562, - "530": 2.342283248901367, - "531": 2.3142433166503906, - "532": 2.300330400466919, - "533": 2.3037781715393066, - "534": 2.3184280395507812, - "535": 2.323700428009033, - "536": 2.326554775238037, - "537": 2.3354570865631104, - "538": 2.3264319896698, - "539": 2.3157200813293457 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "train_epoch_time": 4.792439699172974, - "train_loss": 2.309127076558095, - "train_score": 0.3164589310393614, - "val_loss": 2.35623586602107, - "val_score": 0.30715861878904216 - }, - { - "epoch": 10, - "grad_norm": 1.1796280145645142, - "learning_rate": 0.1, - "model_norm": 87.544189453125, - "step_logs": { - "grad_norm": { - "540": 1.161305546760559, - "541": 1.2283931970596313, - "542": 1.317402720451355, - "543": 1.3057178258895874, - "544": 1.2032058238983154, - "545": 1.14738929271698, - "546": 1.1586660146713257, - "547": 1.1149215698242188, - "548": 1.1587846279144287, - "549": 1.2024911642074585, - "550": 1.255492091178894, - "551": 1.1892679929733276, - "552": 1.1933448314666748, - "553": 1.3068780899047852, - "554": 1.3164958953857422, - "555": 1.3122694492340088, - "556": 1.2884422540664673, - "557": 1.221727728843689, - "558": 1.1230424642562866, - "559": 1.0691163539886475, - "560": 1.0665291547775269, - "561": 1.2649699449539185, - "562": 1.3877015113830566, - "563": 1.4894722700119019, - "564": 1.588516354560852, - "565": 1.434905767440796, - "566": 1.312994122505188, - "567": 1.328588843345642, - "568": 1.2876503467559814, - "569": 1.170223593711853, - "570": 1.1069793701171875, - "571": 1.1823673248291016, - "572": 1.4810973405838013, - "573": 1.5807106494903564, - "574": 1.4761574268341064, - "575": 1.3216222524642944, - "576": 1.1939986944198608, - "577": 1.1861799955368042, - "578": 1.235101342201233, - "579": 1.3082369565963745, - "580": 1.3869751691818237, - "581": 1.5206942558288574, - "582": 1.6433238983154297, - "583": 1.858879804611206, - "584": 1.9497158527374268, - "585": 1.6261554956436157, - "586": 1.1508755683898926, - "587": 1.0212258100509644, - "588": 0.9676312208175659, - "589": 1.0243943929672241, - "590": 1.1439132690429688, - "591": 1.2129299640655518, - "592": 1.2235816717147827, - "593": 1.1796280145645142 - }, - "loss": { - "540": 2.3171210289001465, - "541": 2.308004379272461, - "542": 2.315589427947998, - "543": 2.3284170627593994, - "544": 2.294079542160034, - "545": 2.32733154296875, - "546": 2.3148040771484375, - "547": 2.3080525398254395, - "548": 2.285910129547119, - "549": 2.3130908012390137, - "550": 2.3004648685455322, - "551": 2.328625202178955, - "552": 2.2756528854370117, - "553": 2.3235809803009033, - "554": 2.3086225986480713, - "555": 2.2931509017944336, - "556": 2.327099323272705, - "557": 2.2975168228149414, - "558": 2.2790915966033936, - "559": 2.289891242980957, - "560": 2.2908947467803955, - "561": 2.2981479167938232, - "562": 2.3118560314178467, - "563": 2.3111345767974854, - "564": 2.3224036693573, - "565": 2.306122303009033, - "566": 2.3056812286376953, - "567": 2.306670904159546, - "568": 2.3035149574279785, - "569": 2.2847745418548584, - "570": 2.2881369590759277, - "571": 2.282701253890991, - "572": 2.3043665885925293, - "573": 2.30226469039917, - "574": 2.3024446964263916, - "575": 2.2997875213623047, - "576": 2.3021397590637207, - "577": 2.295419216156006, - "578": 2.2787160873413086, - "579": 2.3125319480895996, - "580": 2.286283016204834, - "581": 2.335730791091919, - "582": 2.3143105506896973, - "583": 2.3098740577697754, - "584": 2.3531603813171387, - "585": 2.3374156951904297, - "586": 2.3087737560272217, - "587": 2.2460923194885254, - "588": 2.2396931648254395, - "589": 2.286303997039795, - "590": 2.27905011177063, - "591": 2.2785191535949707, - "592": 2.275733232498169, - "593": 2.2709147930145264 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "train_epoch_time": 4.792525291442871, - "train_loss": 2.263752429351916, - "train_score": 0.3371917592829235, - "val_loss": 2.308534987895278, - "val_score": 0.32576151765309846 - }, - { - "epoch": 11, - "grad_norm": 1.3685996532440186, - "learning_rate": 0.1, - "model_norm": 87.5596694946289, - "step_logs": { - "grad_norm": { - "594": 1.0865164995193481, - "595": 1.1329095363616943, - "596": 1.3010756969451904, - "597": 1.328014850616455, - "598": 1.3275071382522583, - "599": 1.3220125436782837, - "600": 1.1146235466003418, - "601": 1.0263105630874634, - "602": 1.0661321878433228, - "603": 1.1506239175796509, - "604": 1.1343413591384888, - "605": 1.104317545890808, - "606": 1.261232614517212, - "607": 1.3966481685638428, - "608": 1.2947789430618286, - "609": 1.2160342931747437, - "610": 1.1448036432266235, - "611": 1.0876213312149048, - "612": 0.9403430223464966, - "613": 0.954963207244873, - "614": 1.0463155508041382, - "615": 1.185558795928955, - "616": 1.34348464012146, - "617": 1.4730113744735718, - "618": 1.4631580114364624, - "619": 1.4056711196899414, - "620": 1.1971206665039062, - "621": 1.133603572845459, - "622": 1.261331558227539, - "623": 1.2839696407318115, - "624": 1.3165996074676514, - "625": 1.2759069204330444, - "626": 1.237928032875061, - "627": 1.2392375469207764, - "628": 1.3599416017532349, - "629": 1.5133132934570312, - "630": 1.6796135902404785, - "631": 2.099416732788086, - "632": 1.7489235401153564, - "633": 1.3765060901641846, - "634": 1.2964673042297363, - "635": 1.2260470390319824, - "636": 1.1240711212158203, - "637": 1.0787630081176758, - "638": 1.0385583639144897, - "639": 1.084102749824524, - "640": 1.1306864023208618, - "641": 1.2291929721832275, - "642": 1.3389650583267212, - "643": 1.4280952215194702, - "644": 1.4507155418395996, - "645": 1.438639760017395, - "646": 1.4191477298736572, - "647": 1.3685996532440186 - }, - "loss": { - "594": 2.279766321182251, - "595": 2.262509822845459, - "596": 2.25968861579895, - "597": 2.303629159927368, - "598": 2.248288154602051, - "599": 2.2764763832092285, - "600": 2.2789015769958496, - "601": 2.2551932334899902, - "602": 2.2632646560668945, - "603": 2.2653019428253174, - "604": 2.2451963424682617, - "605": 2.2576050758361816, - "606": 2.2759532928466797, - "607": 2.2747607231140137, - "608": 2.2684969902038574, - "609": 2.262915849685669, - "610": 2.242417335510254, - "611": 2.262439727783203, - "612": 2.2382800579071045, - "613": 2.2459542751312256, - "614": 2.2378830909729004, - "615": 2.2267580032348633, - "616": 2.27055287361145, - "617": 2.2495005130767822, - "618": 2.265409231185913, - "619": 2.2847390174865723, - "620": 2.252580165863037, - "621": 2.2570343017578125, - "622": 2.260960102081299, - "623": 2.264223098754883, - "624": 2.264275550842285, - "625": 2.252000331878662, - "626": 2.242096424102783, - "627": 2.2638721466064453, - "628": 2.2463698387145996, - "629": 2.2558932304382324, - "630": 2.2472801208496094, - "631": 2.301028251647949, - "632": 2.3061435222625732, - "633": 2.2686140537261963, - "634": 2.248030662536621, - "635": 2.2571120262145996, - "636": 2.221653938293457, - "637": 2.231825590133667, - "638": 2.227450132369995, - "639": 2.2359278202056885, - "640": 2.2498085498809814, - "641": 2.2468912601470947, - "642": 2.258052349090576, - "643": 2.229947566986084, - "644": 2.249079704284668, - "645": 2.262083053588867, - "646": 2.243039131164551, - "647": 2.2438366413116455 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "train_epoch_time": 4.792309522628784, - "train_loss": 2.23927902587004, - "train_score": 0.34341485840689334, - "val_loss": 2.2831107362950847, - "val_score": 0.33204918891088836 - }, - { - "epoch": 12, - "grad_norm": 0.6759992241859436, - "learning_rate": 0.1, - "model_norm": 87.57398986816406, - "step_logs": { - "grad_norm": { - "648": 1.3157293796539307, - "649": 1.2831898927688599, - "650": 1.242431879043579, - "651": 1.1514537334442139, - "652": 1.016585350036621, - "653": 0.9744150042533875, - "654": 1.1358592510223389, - "655": 1.2114596366882324, - "656": 1.149278163909912, - "657": 1.1524993181228638, - "658": 1.1944392919540405, - "659": 1.1589891910552979, - "660": 0.9806221127510071, - "661": 0.8652613162994385, - "662": 0.8761599063873291, - "663": 0.765261173248291, - "664": 0.7722328901290894, - "665": 0.782863199710846, - "666": 0.7870795130729675, - "667": 0.8094477653503418, - "668": 0.7380000352859497, - "669": 0.7013874053955078, - "670": 0.7496308088302612, - "671": 0.7957020401954651, - "672": 0.8425513505935669, - "673": 0.8213631510734558, - "674": 0.9047280550003052, - "675": 0.9603910446166992, - "676": 0.9602252244949341, - "677": 1.0297188758850098, - "678": 1.132676362991333, - "679": 1.2204123735427856, - "680": 1.1892904043197632, - "681": 1.067742943763733, - "682": 1.0063056945800781, - "683": 0.9673774242401123, - "684": 0.9493207335472107, - "685": 0.9398552775382996, - "686": 0.8061246871948242, - "687": 0.7620973587036133, - "688": 0.7281740307807922, - "689": 0.7557730078697205, - "690": 0.8111618757247925, - "691": 0.7774203419685364, - "692": 0.7169901728630066, - "693": 0.7558302879333496, - "694": 0.8023281097412109, - "695": 0.7568250894546509, - "696": 0.6141039729118347, - "697": 0.6380651593208313, - "698": 0.6953994035720825, - "699": 0.7113130688667297, - "700": 0.704087495803833, - "701": 0.6759992241859436 - }, - "loss": { - "648": 2.261910915374756, - "649": 2.2563204765319824, - "650": 2.244753837585449, - "651": 2.2408347129821777, - "652": 2.2249274253845215, - "653": 2.2067465782165527, - "654": 2.2221713066101074, - "655": 2.235966682434082, - "656": 2.222716808319092, - "657": 2.214606761932373, - "658": 2.2392733097076416, - "659": 2.2052741050720215, - "660": 2.205606698989868, - "661": 2.208386182785034, - "662": 2.2034988403320312, - "663": 2.1935505867004395, - "664": 2.197619915008545, - "665": 2.2146215438842773, - "666": 2.194058418273926, - "667": 2.215623378753662, - "668": 2.1855344772338867, - "669": 2.190002918243408, - "670": 2.205420970916748, - "671": 2.1891214847564697, - "672": 2.1700940132141113, - "673": 2.1814677715301514, - "674": 2.188023090362549, - "675": 2.1961395740509033, - "676": 2.1827468872070312, - "677": 2.205850601196289, - "678": 2.208791971206665, - "679": 2.1849048137664795, - "680": 2.1808977127075195, - "681": 2.1970322132110596, - "682": 2.2265472412109375, - "683": 2.189401388168335, - "684": 2.190937042236328, - "685": 2.194042682647705, - "686": 2.1837193965911865, - "687": 2.188961982727051, - "688": 2.1881442070007324, - "689": 2.1811366081237793, - "690": 2.1552915573120117, - "691": 2.179030418395996, - "692": 2.1814942359924316, - "693": 2.18145489692688, - "694": 2.1861605644226074, - "695": 2.2093653678894043, - "696": 2.1812477111816406, - "697": 2.1637752056121826, - "698": 2.185196876525879, - "699": 2.1589841842651367, - "700": 2.18300461769104, - "701": 2.1738545894622803 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "train_epoch_time": 4.791497468948364, - "train_loss": 2.169372323116921, - "train_score": 0.36573260401721663, - "val_loss": 2.2238867980878196, - "val_score": 0.3526747271538328 - }, - { - "epoch": 13, - "grad_norm": 0.49907785654067993, - "learning_rate": 0.06666666666666668, - "model_norm": 87.5830307006836, - "step_logs": { - "grad_norm": { - "702": 0.6368564963340759, - "703": 0.6076768040657043, - "704": 0.5592228770256042, - "705": 0.5513622760772705, - "706": 0.6080222725868225, - "707": 0.577868640422821, - "708": 0.6604942679405212, - "709": 0.6498759388923645, - "710": 0.665377140045166, - "711": 0.7255287766456604, - "712": 0.7604535818099976, - "713": 0.8307657837867737, - "714": 0.8735730648040771, - "715": 0.8782442212104797, - "716": 0.7650600075721741, - "717": 0.6790304780006409, - "718": 0.6845667362213135, - "719": 0.66389000415802, - "720": 0.6423768997192383, - "721": 0.5914143919944763, - "722": 0.532931923866272, - "723": 0.4888806641101837, - "724": 0.5077181458473206, - "725": 0.5264216661453247, - "726": 0.5784741640090942, - "727": 0.5645992159843445, - "728": 0.5294567942619324, - "729": 0.48763424158096313, - "730": 0.5541208386421204, - "731": 0.5630053877830505, - "732": 0.5812532901763916, - "733": 0.5510789752006531, - "734": 0.5447343587875366, - "735": 0.5688351988792419, - "736": 0.4924420118331909, - "737": 0.4757325053215027, - "738": 0.5324620604515076, - "739": 0.5173011422157288, - "740": 0.5379522442817688, - "741": 0.592404842376709, - "742": 0.5272946953773499, - "743": 0.5140841603279114, - "744": 0.5539537668228149, - "745": 0.5049985647201538, - "746": 0.5906404852867126, - "747": 0.5667707920074463, - "748": 0.5557032823562622, - "749": 0.5358092784881592, - "750": 0.49783727526664734, - "751": 0.5133266448974609, - "752": 0.5335384011268616, - "753": 0.5760951042175293, - "754": 0.5938552021980286, - "755": 0.49907785654067993 - }, - "loss": { - "702": 2.1720659732818604, - "703": 2.162989854812622, - "704": 2.1660091876983643, - "705": 2.1644506454467773, - "706": 2.175673484802246, - "707": 2.1532578468322754, - "708": 2.160410165786743, - "709": 2.147061824798584, - "710": 2.158083915710449, - "711": 2.156714916229248, - "712": 2.14506196975708, - "713": 2.169018268585205, - "714": 2.182749032974243, - "715": 2.1624374389648438, - "716": 2.1641082763671875, - "717": 2.172196388244629, - "718": 2.1627159118652344, - "719": 2.181286573410034, - "720": 2.1652560234069824, - "721": 2.151679754257202, - "722": 2.1626691818237305, - "723": 2.167447090148926, - "724": 2.1818296909332275, - "725": 2.151371955871582, - "726": 2.1428914070129395, - "727": 2.1450066566467285, - "728": 2.178149700164795, - "729": 2.150766372680664, - "730": 2.1568174362182617, - "731": 2.1730165481567383, - "732": 2.164250373840332, - "733": 2.153724193572998, - "734": 2.1604862213134766, - "735": 2.1655867099761963, - "736": 2.1356310844421387, - "737": 2.1577720642089844, - "738": 2.1455540657043457, - "739": 2.1675660610198975, - "740": 2.1476006507873535, - "741": 2.1399364471435547, - "742": 2.159788131713867, - "743": 2.153350830078125, - "744": 2.1600308418273926, - "745": 2.1429643630981445, - "746": 2.1565303802490234, - "747": 2.1383442878723145, - "748": 2.1562609672546387, - "749": 2.1520001888275146, - "750": 2.158112049102783, - "751": 2.134209394454956, - "752": 2.1595122814178467, - "753": 2.1421687602996826, - "754": 2.135690927505493, - "755": 2.1421546936035156 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "train_epoch_time": 4.79168963432312, - "train_loss": 2.143818064844249, - "train_score": 0.37153761659667345, - "val_loss": 2.199015952142043, - "val_score": 0.3556705655104531 - }, - { - "epoch": 14, - "grad_norm": 0.4956870675086975, - "learning_rate": 0.03333333333333334, - "model_norm": 87.58612823486328, - "step_logs": { - "grad_norm": { - "756": 0.49513155221939087, - "757": 0.5022679567337036, - "758": 0.5577079057693481, - "759": 0.515231728553772, - "760": 0.5745097398757935, - "761": 0.5916004776954651, - "762": 0.536749541759491, - "763": 0.5285218954086304, - "764": 0.541519284248352, - "765": 0.5248390436172485, - "766": 0.512719988822937, - "767": 0.5196036696434021, - "768": 0.4581640362739563, - "769": 0.5233047604560852, - "770": 0.5259172916412354, - "771": 0.5370526909828186, - "772": 0.4967109262943268, - "773": 0.5193302035331726, - "774": 0.5008170008659363, - "775": 0.5132573246955872, - "776": 0.5467756390571594, - "777": 0.5054141283035278, - "778": 0.5527999997138977, - "779": 0.5361348986625671, - "780": 0.49893510341644287, - "781": 0.5206290483474731, - "782": 0.5152865648269653, - "783": 0.5393097400665283, - "784": 0.5165121555328369, - "785": 0.48669740557670593, - "786": 0.5691501498222351, - "787": 0.45931774377822876, - "788": 0.5266578793525696, - "789": 0.5444711446762085, - "790": 0.5368290543556213, - "791": 0.5440704822540283, - "792": 0.4691430330276489, - "793": 0.47556206583976746, - "794": 0.5098633766174316, - "795": 0.5082263350486755, - "796": 0.48889461159706116, - "797": 0.5263949036598206, - "798": 0.458205908536911, - "799": 0.47433793544769287, - "800": 0.5163528323173523, - "801": 0.5056624412536621, - "802": 0.512822687625885, - "803": 0.46433424949645996, - "804": 0.5228627920150757, - "805": 0.4945848882198334, - "806": 0.47502943873405457, - "807": 0.5427560210227966, - "808": 0.48139405250549316, - "809": 0.4956870675086975 - }, - "loss": { - "756": 2.1292901039123535, - "757": 2.13224458694458, - "758": 2.156043767929077, - "759": 2.1326234340667725, - "760": 2.1132423877716064, - "761": 2.1552364826202393, - "762": 2.1544415950775146, - "763": 2.134969472885132, - "764": 2.147430419921875, - "765": 2.1315479278564453, - "766": 2.146841049194336, - "767": 2.156494140625, - "768": 2.139413356781006, - "769": 2.1242666244506836, - "770": 2.160593271255493, - "771": 2.150193214416504, - "772": 2.155601739883423, - "773": 2.139258861541748, - "774": 2.14971923828125, - "775": 2.1444602012634277, - "776": 2.1487326622009277, - "777": 2.1447412967681885, - "778": 2.1246066093444824, - "779": 2.1411261558532715, - "780": 2.1247427463531494, - "781": 2.1267518997192383, - "782": 2.150043249130249, - "783": 2.1429920196533203, - "784": 2.158691883087158, - "785": 2.129425287246704, - "786": 2.132561445236206, - "787": 2.1354730129241943, - "788": 2.1702489852905273, - "789": 2.1353976726531982, - "790": 2.1300811767578125, - "791": 2.145681381225586, - "792": 2.147080898284912, - "793": 2.1334328651428223, - "794": 2.1277124881744385, - "795": 2.1520814895629883, - "796": 2.121870517730713, - "797": 2.1433753967285156, - "798": 2.135822296142578, - "799": 2.1297056674957275, - "800": 2.145019769668579, - "801": 2.147047758102417, - "802": 2.1173782348632812, - "803": 2.122710943222046, - "804": 2.139861583709717, - "805": 2.134819507598877, - "806": 2.1377744674682617, - "807": 2.142080307006836, - "808": 2.116997241973877, - "809": 2.141411542892456 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "train_epoch_time": 4.79171347618103, - "train_loss": 2.135158681527443, - "train_score": 0.3732671270839111, - "val_loss": 2.1931466248771763, - "val_score": 0.35748690503609853 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:24:48.598058", - "final_model_norm": 87.58612823486328, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:23:07.792803", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.1, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 3.928489923477173, - "learning_rate": 1.0000000000000001e-11, - "model_norm": 87.40995025634766, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 8.492406845092773, - "3": 5.133657455444336, - "4": 4.3554368019104, - "5": 8.35051441192627, - "6": 20.943737030029297, - "7": 8.242918014526367, - "8": 5.08157205581665, - "9": 3.359898567199707, - "10": 3.038463592529297, - "11": 3.604124069213867, - "12": 5.520622253417969, - "13": 6.1087188720703125, - "14": 4.094149112701416, - "15": 41.904884338378906, - "16": 3.4324355125427246, - "17": 13.723320007324219, - "18": 3.9538044929504395, - "19": 5.832334518432617, - "20": 3.944932699203491, - "21": 2.1189889907836914, - "22": 2.870297908782959, - "23": 3.1965341567993164, - "24": 5.530642986297607, - "25": 3.9083855152130127, - "26": 2.926786422729492, - "27": 4.4148430824279785, - "28": 4.216004848480225, - "29": 3.2344162464141846, - "30": 4.084825038909912, - "31": 19.7644100189209, - "32": 6.071712017059326, - "33": 2.593540668487549, - "34": 3.893601655960083, - "35": 4.604806900024414, - "36": 2.552725076675415, - "37": 3.1886115074157715, - "38": 8.129566192626953, - "39": 3.764469861984253, - "40": 4.238050937652588, - "41": 2.458381175994873, - "42": 9.120431900024414, - "43": 6.467569351196289, - "44": 4.2845048904418945, - "45": 4.342476844787598, - "46": 9.561224937438965, - "47": 5.701088905334473, - "48": 4.729899883270264, - "49": 3.9542646408081055, - "50": 11.983695030212402, - "51": 5.041728973388672, - "52": 5.196239948272705, - "53": 3.928489923477173 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.9338831901550293, - "3": 3.7350575923919678, - "4": 3.591062068939209, - "5": 3.5876054763793945, - "6": 4.030679702758789, - "7": 4.3120293617248535, - "8": 3.7534420490264893, - "9": 3.4793989658355713, - "10": 3.3799009323120117, - "11": 3.3830275535583496, - "12": 3.336364269256592, - "13": 3.4640421867370605, - "14": 3.2779111862182617, - "15": 3.5040996074676514, - "16": 3.3695948123931885, - "17": 6.5667901039123535, - "18": 3.649301528930664, - "19": 3.5469374656677246, - "20": 3.8657937049865723, - "21": 3.4264955520629883, - "22": 3.388160228729248, - "23": 3.492974281311035, - "24": 3.455472469329834, - "25": 3.8936333656311035, - "26": 3.418292999267578, - "27": 3.435255527496338, - "28": 3.6847920417785645, - "29": 3.361020088195801, - "30": 3.3433456420898438, - "31": 6.951195240020752, - "32": 5.358872413635254, - "33": 3.6707730293273926, - "34": 3.5337109565734863, - "35": 3.961641311645508, - "36": 3.6996350288391113, - "37": 3.5062618255615234, - "38": 3.81649112701416, - "39": 4.745336532592773, - "40": 4.115941047668457, - "41": 3.507040500640869, - "42": 3.8155136108398438, - "43": 5.306589126586914, - "44": 5.048976898193359, - "45": 4.098546981811523, - "46": 4.041048049926758, - "47": 5.62568473815918, - "48": 5.116580963134766, - "49": 4.063141822814941, - "50": 4.201789379119873, - "51": 6.328237533569336, - "52": 5.198306560516357, - "53": 3.7437965869903564 - }, - "lr": { - "0": 1.0000000000000001e-11, - "1": 0.0020000000098000003, - "2": 0.0040000000096000006, - "3": 0.0060000000094, - "4": 0.0080000000092, - "5": 0.010000000009, - "6": 0.0120000000088, - "7": 0.014000000008600001, - "8": 0.0160000000084, - "9": 0.018000000008200002, - "10": 0.020000000008000004, - "11": 0.022000000007800002, - "12": 0.0240000000076, - "13": 0.0260000000074, - "14": 0.0280000000072, - "15": 0.030000000007, - "16": 0.0320000000068, - "17": 0.034000000006599994, - "18": 0.0360000000064, - "19": 0.038000000006200005, - "20": 0.04000000000600001, - "21": 0.0420000000058, - "22": 0.044000000005600004, - "23": 0.046000000005400006, - "24": 0.0480000000052, - "25": 0.05000000000499999, - "26": 0.0520000000048, - "27": 0.05400000000460001, - "28": 0.0560000000044, - "29": 0.0580000000042, - "30": 0.060000000004, - "31": 0.06200000000380001, - "32": 0.0640000000036, - "33": 0.06600000000339999, - "34": 0.06800000000319999, - "35": 0.07000000000300001, - "36": 0.0720000000028, - "37": 0.0740000000026, - "38": 0.07600000000240001, - "39": 0.0780000000022, - "40": 0.08000000000200001, - "41": 0.08200000000180001, - "42": 0.0840000000016, - "43": 0.08600000000140001, - "44": 0.08800000000120001, - "45": 0.090000000001, - "46": 0.09200000000080001, - "47": 0.0940000000006, - "48": 0.0960000000004, - "49": 0.0980000000002, - "50": 0.1, - "51": 0.1, - "52": 0.1, - "53": 0.1 - } - }, - "train_epoch_time": 4.793660640716553, - "train_loss": 7.0373294337750165, - "train_score": 0.1526093973619182, - "val_loss": 7.05570218647936, - "val_score": 0.15117322052182067 - }, - { - "epoch": 1, - "grad_norm": 3.1967504024505615, - "learning_rate": 0.1, - "model_norm": 87.35136413574219, - "step_logs": { - "grad_norm": { - "54": 20.8322696685791, - "55": 5.69713020324707, - "56": 4.769029140472412, - "57": 2.6155433654785156, - "58": 21.443958282470703, - "59": 4.483689785003662, - "60": 4.094512462615967, - "61": 4.553788661956787, - "62": 3.568303346633911, - "63": 3.3428092002868652, - "64": 3.310009241104126, - "65": 10.45778751373291, - "66": 3.8152976036071777, - "67": 4.708316326141357, - "68": 3.467155933380127, - "69": 1.7733898162841797, - "70": 2.766688585281372, - "71": 2.9359614849090576, - "72": 4.071628570556641, - "73": 3.1312408447265625, - "74": 2.2443907260894775, - "75": 8.656401634216309, - "76": 3.2381207942962646, - "77": 3.167780876159668, - "78": 3.455178737640381, - "79": 9.005704879760742, - "80": 3.1383450031280518, - "81": 3.4697580337524414, - "82": 3.2605271339416504, - "83": 5.5265421867370605, - "84": 3.0896403789520264, - "85": 3.085052251815796, - "86": 5.85503625869751, - "87": 3.1433417797088623, - "88": 3.538795232772827, - "89": 4.574154376983643, - "90": 2.9779162406921387, - "91": 2.368004560470581, - "92": 9.186367988586426, - "93": 3.1846976280212402, - "94": 2.9605798721313477, - "95": 2.9102890491485596, - "96": 3.2285008430480957, - "97": 3.100440740585327, - "98": 1.6425262689590454, - "99": 2.2215452194213867, - "100": 9.593896865844727, - "101": 3.614487648010254, - "102": 3.2893424034118652, - "103": 2.878619432449341, - "104": 0.9948781728744507, - "105": 1.8634400367736816, - "106": 7.743080139160156, - "107": 3.1967504024505615 - }, - "loss": { - "54": 7.0251545906066895, - "55": 6.5295729637146, - "56": 5.340699195861816, - "57": 3.5550639629364014, - "58": 4.719113826751709, - "59": 8.22577953338623, - "60": 6.953283786773682, - "61": 6.079184532165527, - "62": 5.362835884094238, - "63": 4.218271732330322, - "64": 3.491568088531494, - "65": 4.205788612365723, - "66": 5.975528717041016, - "67": 5.122035503387451, - "68": 4.456905364990234, - "69": 3.4620778560638428, - "70": 3.4128365516662598, - "71": 3.813060998916626, - "72": 3.4397783279418945, - "73": 4.167916297912598, - "74": 3.4135727882385254, - "75": 3.848520517349243, - "76": 5.363252639770508, - "77": 4.492628574371338, - "78": 3.6933460235595703, - "79": 3.979837417602539, - "80": 5.384007453918457, - "81": 4.589946269989014, - "82": 3.8715908527374268, - "83": 3.53947377204895, - "84": 4.503907203674316, - "85": 3.727509021759033, - "86": 3.5863893032073975, - "87": 4.5610175132751465, - "88": 3.7871623039245605, - "89": 3.5952672958374023, - "90": 4.161242961883545, - "91": 3.4317400455474854, - "92": 3.900672435760498, - "93": 5.428202152252197, - "94": 4.576373100280762, - "95": 3.8025903701782227, - "96": 3.4560348987579346, - "97": 3.8929941654205322, - "98": 3.376396894454956, - "99": 3.422096014022827, - "100": 3.8941197395324707, - "101": 5.518061637878418, - "102": 4.7969970703125, - "103": 3.895348310470581, - "104": 3.356802225112915, - "105": 3.353306770324707, - "106": 3.750880241394043, - "107": 5.0138325691223145 - }, - "lr": { - "54": 0.1, - "55": 0.1, - "56": 0.1, - "57": 0.1, - "58": 0.1, - "59": 0.1, - "60": 0.1, - "61": 0.1, - "62": 0.1, - "63": 0.1, - "64": 0.1, - "65": 0.1, - "66": 0.1, - "67": 0.1, - "68": 0.1, - "69": 0.1, - "70": 0.1, - "71": 0.1, - "72": 0.1, - "73": 0.1, - "74": 0.1, - "75": 0.1, - "76": 0.1, - "77": 0.1, - "78": 0.1, - "79": 0.1, - "80": 0.1, - "81": 0.1, - "82": 0.1, - "83": 0.1, - "84": 0.1, - "85": 0.1, - "86": 0.1, - "87": 0.1, - "88": 0.1, - "89": 0.1, - "90": 0.1, - "91": 0.1, - "92": 0.1, - "93": 0.1, - "94": 0.1, - "95": 0.1, - "96": 0.1, - "97": 0.1, - "98": 0.1, - "99": 0.1, - "100": 0.1, - "101": 0.1, - "102": 0.1, - "103": 0.1, - "104": 0.1, - "105": 0.1, - "106": 0.1, - "107": 0.1 - } - }, - "train_epoch_time": 4.791884183883667, - "train_loss": 4.236121246222957, - "train_score": 0.058572453354489344, - "val_loss": 4.240466636850421, - "val_score": 0.061486438019404316 - }, - { - "epoch": 2, - "grad_norm": 3.2641994953155518, - "learning_rate": 0.1, - "model_norm": 87.329833984375, - "step_logs": { - "grad_norm": { - "108": 3.091353178024292, - "109": 2.8348827362060547, - "110": 9.003742218017578, - "111": 2.9350898265838623, - "112": 2.8313305377960205, - "113": 2.802385091781616, - "114": 2.3809354305267334, - "115": 2.76755428314209, - "116": 5.0462212562561035, - "117": 2.8333096504211426, - "118": 2.7225873470306396, - "119": 6.37400484085083, - "120": 2.846667528152466, - "121": 2.7285897731781006, - "122": 1.2651418447494507, - "123": 2.024562120437622, - "124": 2.5707547664642334, - "125": 6.322628021240234, - "126": 2.8623361587524414, - "127": 2.727850914001465, - "128": 0.9342372417449951, - "129": 1.2566787004470825, - "130": 2.0622317790985107, - "131": 6.882542610168457, - "132": 3.343644380569458, - "133": 2.787292957305908, - "134": 1.7388594150543213, - "135": 5.157271385192871, - "136": 2.7840070724487305, - "137": 2.4878382682800293, - "138": 3.4821557998657227, - "139": 2.5549707412719727, - "140": 1.742350459098816, - "141": 2.621386766433716, - "142": 2.7173802852630615, - "143": 2.51566481590271, - "144": 2.563798666000366, - "145": 3.376544237136841, - "146": 2.679647922515869, - "147": 2.15069317817688, - "148": 2.3656318187713623, - "149": 1.2963240146636963, - "150": 3.189444065093994, - "151": 2.649932384490967, - "152": 1.1049697399139404, - "153": 1.5990302562713623, - "154": 1.2787617444992065, - "155": 1.2961159944534302, - "156": 3.5000383853912354, - "157": 2.621171236038208, - "158": 1.2562835216522217, - "159": 3.0938045978546143, - "160": 2.4794678688049316, - "161": 3.2641994953155518 - }, - "loss": { - "108": 4.230429649353027, - "109": 3.4804975986480713, - "110": 3.9522042274475098, - "111": 5.217118263244629, - "112": 4.460448741912842, - "113": 3.7666289806365967, - "114": 3.398249864578247, - "115": 3.635561466217041, - "116": 3.5293471813201904, - "117": 4.248546123504639, - "118": 3.544987440109253, - "119": 3.6407437324523926, - "120": 4.547821044921875, - "121": 3.848416805267334, - "122": 3.346789836883545, - "123": 3.369158983230591, - "124": 3.5375986099243164, - "125": 3.647054672241211, - "126": 4.53694486618042, - "127": 3.825772285461426, - "128": 3.331212282180786, - "129": 3.3430371284484863, - "130": 3.3759121894836426, - "131": 3.6432371139526367, - "132": 4.652709484100342, - "133": 3.9427027702331543, - "134": 3.3405842781066895, - "135": 3.507436513900757, - "136": 4.2222466468811035, - "137": 3.5759730339050293, - "138": 3.399026393890381, - "139": 3.8177061080932617, - "140": 3.261749267578125, - "141": 3.3000082969665527, - "142": 3.634598970413208, - "143": 3.3618576526641846, - "144": 3.549241542816162, - "145": 3.3747048377990723, - "146": 3.745640277862549, - "147": 3.2384514808654785, - "148": 3.2761054039001465, - "149": 3.289032459259033, - "150": 3.3667030334472656, - "151": 3.7193541526794434, - "152": 3.256500244140625, - "153": 3.2474305629730225, - "154": 3.222397804260254, - "155": 3.2512006759643555, - "156": 3.3207297325134277, - "157": 3.6948719024658203, - "158": 3.165006160736084, - "159": 3.186288833618164, - "160": 3.476701498031616, - "161": 3.25138258934021 - }, - "lr": { - "108": 0.1, - "109": 0.1, - "110": 0.1, - "111": 0.1, - "112": 0.1, - "113": 0.1, - "114": 0.1, - "115": 0.1, - "116": 0.1, - "117": 0.1, - "118": 0.1, - "119": 0.1, - "120": 0.1, - "121": 0.1, - "122": 0.1, - "123": 0.1, - "124": 0.1, - "125": 0.1, - "126": 0.1, - "127": 0.1, - "128": 0.1, - "129": 0.1, - "130": 0.1, - "131": 0.1, - "132": 0.1, - "133": 0.1, - "134": 0.1, - "135": 0.1, - "136": 0.1, - "137": 0.1, - "138": 0.1, - "139": 0.1, - "140": 0.1, - "141": 0.1, - "142": 0.1, - "143": 0.1, - "144": 0.1, - "145": 0.1, - "146": 0.1, - "147": 0.1, - "148": 0.1, - "149": 0.1, - "150": 0.1, - "151": 0.1, - "152": 0.1, - "153": 0.1, - "154": 0.1, - "155": 0.1, - "156": 0.1, - "157": 0.1, - "158": 0.1, - "159": 0.1, - "160": 0.1, - "161": 0.1 - } - }, - "train_epoch_time": 4.7920238971710205, - "train_loss": 3.588182293428068, - "train_score": 0.11997063305319809, - "val_loss": 3.607787262832256, - "val_score": 0.11555503753583822 - }, - { - "epoch": 3, - "grad_norm": 1.3324508666992188, - "learning_rate": 0.1, - "model_norm": 87.85812377929688, - "step_logs": { - "grad_norm": { - "162": 2.824432134628296, - "163": 0.671918511390686, - "164": 0.6593815684318542, - "165": 1.7013866901397705, - "166": 0.708214521408081, - "167": 0.6305429935455322, - "168": 0.8797547817230225, - "169": 2.1628949642181396, - "170": 2.7668163776397705, - "171": 2.6776111125946045, - "172": 8.3070068359375, - "173": 1.7021747827529907, - "174": 2.527327537536621, - "175": 3.311972141265869, - "176": 4.841958999633789, - "177": 3.6141247749328613, - "178": 3.2142221927642822, - "179": 4.1299824714660645, - "180": 2.6084084510803223, - "181": 28.179595947265625, - "182": 1.637022852897644, - "183": 89.59008026123047, - "184": 3.3377041816711426, - "185": 3.0138769149780273, - "186": 5.9667534828186035, - "187": 3.6820621490478516, - "188": 3.018510103225708, - "189": 2.218217372894287, - "190": 2.8095602989196777, - "191": 4.044013977050781, - "192": 2.6811869144439697, - "193": 1.3078272342681885, - "194": 2.956500768661499, - "195": 2.7388901710510254, - "196": 1.413649559020996, - "197": 1.7727783918380737, - "198": 4.475114822387695, - "199": 2.8780641555786133, - "200": 2.163017511367798, - "201": 3.7061965465545654, - "202": 2.895972967147827, - "203": 1.4888063669204712, - "204": 1.859369158744812, - "205": 2.2908899784088135, - "206": 3.5580227375030518, - "207": 2.626107931137085, - "208": 1.303713321685791, - "209": 1.16728937625885, - "210": 1.6314259767532349, - "211": 3.220306634902954, - "212": 2.8753163814544678, - "213": 1.4265707731246948, - "214": 0.87506502866745, - "215": 1.3324508666992188 - }, - "loss": { - "162": 3.6087803840637207, - "163": 3.1152126789093018, - "164": 3.039452075958252, - "165": 3.073056697845459, - "166": 3.0707786083221436, - "167": 3.107726812362671, - "168": 3.0354068279266357, - "169": 3.02984619140625, - "170": 3.068946361541748, - "171": 3.366084575653076, - "172": 3.1058127880096436, - "173": 3.1386094093322754, - "174": 3.136183738708496, - "175": 3.2795944213867188, - "176": 3.295351505279541, - "177": 3.6594529151916504, - "178": 3.1454052925109863, - "179": 3.2168116569519043, - "180": 3.4305734634399414, - "181": 3.4626173973083496, - "182": 3.180250644683838, - "183": 6.397340774536133, - "184": 3.869750499725342, - "185": 3.7022652626037598, - "186": 3.723776340484619, - "187": 4.366543769836426, - "188": 3.800497055053711, - "189": 3.2572691440582275, - "190": 3.449812889099121, - "191": 3.408133029937744, - "192": 3.788762092590332, - "193": 3.215562343597412, - "194": 3.25341796875, - "195": 3.5887646675109863, - "196": 3.187283992767334, - "197": 3.2056093215942383, - "198": 3.386817455291748, - "199": 3.8443965911865234, - "200": 3.2804689407348633, - "201": 3.272469997406006, - "202": 3.664766788482666, - "203": 3.2230629920959473, - "204": 3.1280860900878906, - "205": 3.25093674659729, - "206": 3.2604851722717285, - "207": 3.567448616027832, - "208": 3.109919548034668, - "209": 3.0726699829101562, - "210": 3.1369528770446777, - "211": 3.1474156379699707, - "212": 3.4623429775238037, - "213": 3.111325979232788, - "214": 3.050870895385742, - "215": 3.0334606170654297 - }, - "lr": { - "162": 0.1, - "163": 0.1, - "164": 0.1, - "165": 0.1, - "166": 0.1, - "167": 0.1, - "168": 0.1, - "169": 0.1, - "170": 0.1, - "171": 0.1, - "172": 0.1, - "173": 0.1, - "174": 0.1, - "175": 0.1, - "176": 0.1, - "177": 0.1, - "178": 0.1, - "179": 0.1, - "180": 0.1, - "181": 0.1, - "182": 0.1, - "183": 0.1, - "184": 0.1, - "185": 0.1, - "186": 0.1, - "187": 0.1, - "188": 0.1, - "189": 0.1, - "190": 0.1, - "191": 0.1, - "192": 0.1, - "193": 0.1, - "194": 0.1, - "195": 0.1, - "196": 0.1, - "197": 0.1, - "198": 0.1, - "199": 0.1, - "200": 0.1, - "201": 0.1, - "202": 0.1, - "203": 0.1, - "204": 0.1, - "205": 0.1, - "206": 0.1, - "207": 0.1, - "208": 0.1, - "209": 0.1, - "210": 0.1, - "211": 0.1, - "212": 0.1, - "213": 0.1, - "214": 0.1, - "215": 0.1 - } - }, - "train_epoch_time": 4.788628578186035, - "train_loss": 3.0937729036448847, - "train_score": 0.14534164279314504, - "val_loss": 3.116196415591322, - "val_score": 0.14355805131310573 - }, - { - "epoch": 4, - "grad_norm": 1.835491418838501, - "learning_rate": 0.1, - "model_norm": 87.8695297241211, - "step_logs": { - "grad_norm": { - "216": 1.8210769891738892, - "217": 3.1851627826690674, - "218": 2.7567389011383057, - "219": 1.3343204259872437, - "220": 1.0685020685195923, - "221": 1.6193506717681885, - "222": 2.0600414276123047, - "223": 2.947606086730957, - "224": 2.5700862407684326, - "225": 1.65345299243927, - "226": 1.679834246635437, - "227": 2.455029249191284, - "228": 2.3733439445495605, - "229": 1.955888271331787, - "230": 1.9781748056411743, - "231": 2.608536720275879, - "232": 2.326899766921997, - "233": 1.7234989404678345, - "234": 1.6420572996139526, - "235": 2.279515266418457, - "236": 2.1274642944335938, - "237": 1.75114107131958, - "238": 1.8417428731918335, - "239": 2.201796531677246, - "240": 2.1251626014709473, - "241": 1.738152265548706, - "242": 1.7395765781402588, - "243": 1.9702775478363037, - "244": 2.001551628112793, - "245": 2.015592575073242, - "246": 2.0610828399658203, - "247": 2.0354769229888916, - "248": 1.9832501411437988, - "249": 1.9828920364379883, - "250": 1.8790068626403809, - "251": 1.7271981239318848, - "252": 1.811844825744629, - "253": 1.9391674995422363, - "254": 1.960034966468811, - "255": 1.9865453243255615, - "256": 1.986283540725708, - "257": 1.9122625589370728, - "258": 1.892019510269165, - "259": 1.8487002849578857, - "260": 1.751713514328003, - "261": 1.7176132202148438, - "262": 1.8376508951187134, - "263": 2.0424368381500244, - "264": 1.9755114316940308, - "265": 1.726958990097046, - "266": 1.7347079515457153, - "267": 1.7232612371444702, - "268": 1.720772624015808, - "269": 1.835491418838501 - }, - "loss": { - "216": 3.0981698036193848, - "217": 3.1354355812072754, - "218": 3.3637356758117676, - "219": 3.0626955032348633, - "220": 3.0227108001708984, - "221": 3.0190038681030273, - "222": 3.092155933380127, - "223": 3.1007096767425537, - "224": 3.285460948944092, - "225": 2.9957432746887207, - "226": 3.0422234535217285, - "227": 3.03903865814209, - "228": 3.1799209117889404, - "229": 3.0033092498779297, - "230": 3.0493030548095703, - "231": 3.0282745361328125, - "232": 3.1725730895996094, - "233": 2.973391056060791, - "234": 2.9965219497680664, - "235": 2.9718401432037354, - "236": 3.095028877258301, - "237": 2.9446334838867188, - "238": 2.9990029335021973, - "239": 2.9554476737976074, - "240": 3.0799272060394287, - "241": 2.9496467113494873, - "242": 2.9689407348632812, - "243": 2.9472358226776123, - "244": 3.0029308795928955, - "245": 2.9216151237487793, - "246": 3.0016770362854004, - "247": 2.920720338821411, - "248": 3.013965606689453, - "249": 2.942023754119873, - "250": 2.96042799949646, - "251": 2.8988468647003174, - "252": 2.9738144874572754, - "253": 2.889207124710083, - "254": 2.954322338104248, - "255": 2.903627395629883, - "256": 2.9620203971862793, - "257": 2.906479835510254, - "258": 2.93631649017334, - "259": 2.8859314918518066, - "260": 2.903928756713867, - "261": 2.8633675575256348, - "262": 2.911755084991455, - "263": 2.8941762447357178, - "264": 2.9624555110931396, - "265": 2.855073928833008, - "266": 2.8961992263793945, - "267": 2.866568088531494, - "268": 2.891225576400757, - "269": 2.8544578552246094 - }, - "lr": { - "216": 0.1, - "217": 0.1, - "218": 0.1, - "219": 0.1, - "220": 0.1, - "221": 0.1, - "222": 0.1, - "223": 0.1, - "224": 0.1, - "225": 0.1, - "226": 0.1, - "227": 0.1, - "228": 0.1, - "229": 0.1, - "230": 0.1, - "231": 0.1, - "232": 0.1, - "233": 0.1, - "234": 0.1, - "235": 0.1, - "236": 0.1, - "237": 0.1, - "238": 0.1, - "239": 0.1, - "240": 0.1, - "241": 0.1, - "242": 0.1, - "243": 0.1, - "244": 0.1, - "245": 0.1, - "246": 0.1, - "247": 0.1, - "248": 0.1, - "249": 0.1, - "250": 0.1, - "251": 0.1, - "252": 0.1, - "253": 0.1, - "254": 0.1, - "255": 0.1, - "256": 0.1, - "257": 0.1, - "258": 0.1, - "259": 0.1, - "260": 0.1, - "261": 0.1, - "262": 0.1, - "263": 0.1, - "264": 0.1, - "265": 0.1, - "266": 0.1, - "267": 0.1, - "268": 0.1, - "269": 0.1 - } - }, - "train_epoch_time": 4.789099454879761, - "train_loss": 2.8964361739466487, - "train_score": 0.1781754394087251, - "val_loss": 2.9152807860916434, - "val_score": 0.17769176977441725 - }, - { - "epoch": 5, - "grad_norm": 1.263764500617981, - "learning_rate": 0.1, - "model_norm": 87.87779235839844, - "step_logs": { - "grad_norm": { - "270": 1.8306066989898682, - "271": 1.689879059791565, - "272": 1.6475777626037598, - "273": 1.7084754705429077, - "274": 1.7558374404907227, - "275": 1.646780014038086, - "276": 1.6305023431777954, - "277": 1.7640175819396973, - "278": 1.7945770025253296, - "279": 1.5457614660263062, - "280": 1.477850317955017, - "281": 1.6258735656738281, - "282": 1.700717806816101, - "283": 1.5992242097854614, - "284": 1.53714120388031, - "285": 1.4478787183761597, - "286": 1.4500188827514648, - "287": 1.526047706604004, - "288": 1.487168550491333, - "289": 1.3725082874298096, - "290": 1.361341953277588, - "291": 1.4069572687149048, - "292": 1.4598197937011719, - "293": 1.640318512916565, - "294": 1.6395676136016846, - "295": 1.4420651197433472, - "296": 1.4571055173873901, - "297": 1.5050864219665527, - "298": 1.4598448276519775, - "299": 1.4728896617889404, - "300": 1.5455671548843384, - "301": 1.7616255283355713, - "302": 1.6545928716659546, - "303": 1.3951133489608765, - "304": 1.4193669557571411, - "305": 1.414574384689331, - "306": 1.4064468145370483, - "307": 1.388370394706726, - "308": 1.4153351783752441, - "309": 1.5999380350112915, - "310": 1.6131445169448853, - "311": 1.439826250076294, - "312": 1.421922206878662, - "313": 1.4740266799926758, - "314": 1.471842646598816, - "315": 1.5004557371139526, - "316": 1.4952611923217773, - "317": 1.4506856203079224, - "318": 1.4390641450881958, - "319": 1.3931269645690918, - "320": 1.4123622179031372, - "321": 1.3545799255371094, - "322": 1.3484094142913818, - "323": 1.263764500617981 - }, - "loss": { - "270": 2.9038162231445312, - "271": 2.847886562347412, - "272": 2.8594350814819336, - "273": 2.8455896377563477, - "274": 2.875256061553955, - "275": 2.827949047088623, - "276": 2.830427885055542, - "277": 2.841245174407959, - "278": 2.8708105087280273, - "279": 2.8325061798095703, - "280": 2.8047170639038086, - "281": 2.814955711364746, - "282": 2.865180253982544, - "283": 2.8153252601623535, - "284": 2.822352886199951, - "285": 2.7974014282226562, - "286": 2.809227466583252, - "287": 2.782064437866211, - "288": 2.813408851623535, - "289": 2.772481679916382, - "290": 2.7815897464752197, - "291": 2.772855758666992, - "292": 2.793900489807129, - "293": 2.775840997695923, - "294": 2.843992233276367, - "295": 2.772657871246338, - "296": 2.777970552444458, - "297": 2.790752410888672, - "298": 2.803579568862915, - "299": 2.7560462951660156, - "300": 2.7929444313049316, - "301": 2.8014888763427734, - "302": 2.831510543823242, - "303": 2.758927345275879, - "304": 2.78273606300354, - "305": 2.7754967212677, - "306": 2.7944374084472656, - "307": 2.7461819648742676, - "308": 2.778132200241089, - "309": 2.7810895442962646, - "310": 2.79830002784729, - "311": 2.773873805999756, - "312": 2.770893096923828, - "313": 2.755842685699463, - "314": 2.7996699810028076, - "315": 2.762727975845337, - "316": 2.759260654449463, - "317": 2.7637434005737305, - "318": 2.7653982639312744, - "319": 2.744609832763672, - "320": 2.7802443504333496, - "321": 2.743741750717163, - "322": 2.754742383956909, - "323": 2.7466845512390137 - }, - "lr": { - "270": 0.1, - "271": 0.1, - "272": 0.1, - "273": 0.1, - "274": 0.1, - "275": 0.1, - "276": 0.1, - "277": 0.1, - "278": 0.1, - "279": 0.1, - "280": 0.1, - "281": 0.1, - "282": 0.1, - "283": 0.1, - "284": 0.1, - "285": 0.1, - "286": 0.1, - "287": 0.1, - "288": 0.1, - "289": 0.1, - "290": 0.1, - "291": 0.1, - "292": 0.1, - "293": 0.1, - "294": 0.1, - "295": 0.1, - "296": 0.1, - "297": 0.1, - "298": 0.1, - "299": 0.1, - "300": 0.1, - "301": 0.1, - "302": 0.1, - "303": 0.1, - "304": 0.1, - "305": 0.1, - "306": 0.1, - "307": 0.1, - "308": 0.1, - "309": 0.1, - "310": 0.1, - "311": 0.1, - "312": 0.1, - "313": 0.1, - "314": 0.1, - "315": 0.1, - "316": 0.1, - "317": 0.1, - "318": 0.1, - "319": 0.1, - "320": 0.1, - "321": 0.1, - "322": 0.1, - "323": 0.1 - } - }, - "train_epoch_time": 4.7897584438323975, - "train_loss": 2.7485697853000812, - "train_score": 0.23732738516252044, - "val_loss": 2.765530650569158, - "val_score": 0.23109662033249673 - }, - { - "epoch": 6, - "grad_norm": 1.1934714317321777, - "learning_rate": 0.1, - "model_norm": 87.8837890625, - "step_logs": { - "grad_norm": { - "324": 1.212211012840271, - "325": 1.3444231748580933, - "326": 1.3861666917800903, - "327": 1.4266451597213745, - "328": 1.470704197883606, - "329": 1.569632649421692, - "330": 1.5838505029678345, - "331": 1.5241219997406006, - "332": 1.497305989265442, - "333": 1.4816832542419434, - "334": 1.462012767791748, - "335": 1.363344430923462, - "336": 1.3326537609100342, - "337": 1.3990378379821777, - "338": 1.3750602006912231, - "339": 1.2829521894454956, - "340": 1.2796564102172852, - "341": 1.3192286491394043, - "342": 1.3997341394424438, - "343": 1.3237347602844238, - "344": 1.2693843841552734, - "345": 1.2844293117523193, - "346": 1.3697872161865234, - "347": 1.5104541778564453, - "348": 1.4591901302337646, - "349": 1.3731815814971924, - "350": 1.3856337070465088, - "351": 1.3679394721984863, - "352": 1.299026370048523, - "353": 1.2451119422912598, - "354": 1.2331608533859253, - "355": 1.387474536895752, - "356": 1.4008287191390991, - "357": 1.3250088691711426, - "358": 1.2733795642852783, - "359": 1.1614477634429932, - "360": 1.172279953956604, - "361": 1.2362662553787231, - "362": 1.28485107421875, - "363": 1.3219964504241943, - "364": 1.359808325767517, - "365": 1.4506912231445312, - "366": 1.502313256263733, - "367": 1.53559410572052, - "368": 1.5011998414993286, - "369": 1.4331663846969604, - "370": 1.4088222980499268, - "371": 1.4210195541381836, - "372": 1.372637152671814, - "373": 1.2787890434265137, - "374": 1.3293178081512451, - "375": 1.4793283939361572, - "376": 1.3737612962722778, - "377": 1.1934714317321777 - }, - "loss": { - "324": 2.7605576515197754, - "325": 2.768967390060425, - "326": 2.7654802799224854, - "327": 2.748854398727417, - "328": 2.7728044986724854, - "329": 2.743345260620117, - "330": 2.7956578731536865, - "331": 2.756959915161133, - "332": 2.7566184997558594, - "333": 2.7495181560516357, - "334": 2.7496824264526367, - "335": 2.7624990940093994, - "336": 2.744152069091797, - "337": 2.737061023712158, - "338": 2.7730588912963867, - "339": 2.727717161178589, - "340": 2.7479336261749268, - "341": 2.7435193061828613, - "342": 2.738661289215088, - "343": 2.731874465942383, - "344": 2.736388683319092, - "345": 2.722999095916748, - "346": 2.7389166355133057, - "347": 2.747027635574341, - "348": 2.76369309425354, - "349": 2.735213041305542, - "350": 2.745793342590332, - "351": 2.737874984741211, - "352": 2.736140727996826, - "353": 2.7206907272338867, - "354": 2.719184398651123, - "355": 2.7062129974365234, - "356": 2.752830982208252, - "357": 2.724489688873291, - "358": 2.7392067909240723, - "359": 2.7085227966308594, - "360": 2.713923454284668, - "361": 2.7098302841186523, - "362": 2.7200355529785156, - "363": 2.7300682067871094, - "364": 2.7280852794647217, - "365": 2.7351455688476562, - "366": 2.733548164367676, - "367": 2.7467041015625, - "368": 2.7625515460968018, - "369": 2.743311882019043, - "370": 2.7539706230163574, - "371": 2.7306618690490723, - "372": 2.736286163330078, - "373": 2.7176260948181152, - "374": 2.7371134757995605, - "375": 2.718600273132324, - "376": 2.752563953399658, - "377": 2.703352451324463 - }, - "lr": { - "324": 0.1, - "325": 0.1, - "326": 0.1, - "327": 0.1, - "328": 0.1, - "329": 0.1, - "330": 0.1, - "331": 0.1, - "332": 0.1, - "333": 0.1, - "334": 0.1, - "335": 0.1, - "336": 0.1, - "337": 0.1, - "338": 0.1, - "339": 0.1, - "340": 0.1, - "341": 0.1, - "342": 0.1, - "343": 0.1, - "344": 0.1, - "345": 0.1, - "346": 0.1, - "347": 0.1, - "348": 0.1, - "349": 0.1, - "350": 0.1, - "351": 0.1, - "352": 0.1, - "353": 0.1, - "354": 0.1, - "355": 0.1, - "356": 0.1, - "357": 0.1, - "358": 0.1, - "359": 0.1, - "360": 0.1, - "361": 0.1, - "362": 0.1, - "363": 0.1, - "364": 0.1, - "365": 0.1, - "366": 0.1, - "367": 0.1, - "368": 0.1, - "369": 0.1, - "370": 0.1, - "371": 0.1, - "372": 0.1, - "373": 0.1, - "374": 0.1, - "375": 0.1, - "376": 0.1, - "377": 0.1 - } - }, - "train_epoch_time": 4.7895894050598145, - "train_loss": 2.720357865685199, - "train_score": 0.24113948169021387, - "val_loss": 2.739208456843099, - "val_score": 0.23535268352581903 - }, - { - "epoch": 7, - "grad_norm": 1.2527161836624146, - "learning_rate": 0.1, - "model_norm": 87.88997650146484, - "step_logs": { - "grad_norm": { - "378": 1.1807222366333008, - "379": 1.0899643898010254, - "380": 1.0149189233779907, - "381": 1.024212121963501, - "382": 1.056444764137268, - "383": 1.1337379217147827, - "384": 1.2161318063735962, - "385": 1.2631144523620605, - "386": 1.2693703174591064, - "387": 1.309577226638794, - "388": 1.4319877624511719, - "389": 1.4681731462478638, - "390": 1.3460105657577515, - "391": 1.213484764099121, - "392": 1.1490771770477295, - "393": 1.1601296663284302, - "394": 1.2059359550476074, - "395": 1.244826078414917, - "396": 1.192064881324768, - "397": 1.1367443799972534, - "398": 1.1892452239990234, - "399": 1.3832355737686157, - "400": 1.4787708520889282, - "401": 1.5027453899383545, - "402": 1.4790802001953125, - "403": 1.4165902137756348, - "404": 1.4307981729507446, - "405": 1.406058430671692, - "406": 1.4000561237335205, - "407": 1.5383082628250122, - "408": 1.4830294847488403, - "409": 1.154307246208191, - "410": 1.07868492603302, - "411": 1.0703961849212646, - "412": 1.0097122192382812, - "413": 0.9366562366485596, - "414": 0.9800765514373779, - "415": 1.0790947675704956, - "416": 1.1231735944747925, - "417": 1.156981348991394, - "418": 1.1893287897109985, - "419": 1.2710999250411987, - "420": 1.3180030584335327, - "421": 1.2941913604736328, - "422": 1.3021388053894043, - "423": 1.4313437938690186, - "424": 1.3214612007141113, - "425": 1.124120831489563, - "426": 1.1646090745925903, - "427": 1.2193467617034912, - "428": 1.1915640830993652, - "429": 1.2326442003250122, - "430": 1.2742655277252197, - "431": 1.2527161836624146 - }, - "loss": { - "378": 2.7315673828125, - "379": 2.6979758739471436, - "380": 2.7075541019439697, - "381": 2.6885218620300293, - "382": 2.697160243988037, - "383": 2.706719398498535, - "384": 2.719973087310791, - "385": 2.691807270050049, - "386": 2.728854179382324, - "387": 2.708617687225342, - "388": 2.716881275177002, - "389": 2.7154035568237305, - "390": 2.751321792602539, - "391": 2.706925868988037, - "392": 2.703634738922119, - "393": 2.7008211612701416, - "394": 2.69789719581604, - "395": 2.689706802368164, - "396": 2.716561794281006, - "397": 2.6960606575012207, - "398": 2.7129335403442383, - "399": 2.7062387466430664, - "400": 2.7242417335510254, - "401": 2.724435329437256, - "402": 2.740177631378174, - "403": 2.713056802749634, - "404": 2.7444705963134766, - "405": 2.7041895389556885, - "406": 2.7081923484802246, - "407": 2.706737518310547, - "408": 2.7492432594299316, - "409": 2.7043237686157227, - "410": 2.6978580951690674, - "411": 2.698662757873535, - "412": 2.7073848247528076, - "413": 2.6918282508850098, - "414": 2.6825029850006104, - "415": 2.6828842163085938, - "416": 2.6918365955352783, - "417": 2.688469171524048, - "418": 2.7077364921569824, - "419": 2.6961145401000977, - "420": 2.710808277130127, - "421": 2.6898560523986816, - "422": 2.7140374183654785, - "423": 2.717118263244629, - "424": 2.709707736968994, - "425": 2.677877902984619, - "426": 2.682298183441162, - "427": 2.6855239868164062, - "428": 2.684274673461914, - "429": 2.6895804405212402, - "430": 2.6932625770568848, - "431": 2.704105854034424 - }, - "lr": { - "378": 0.1, - "379": 0.1, - "380": 0.1, - "381": 0.1, - "382": 0.1, - "383": 0.1, - "384": 0.1, - "385": 0.1, - "386": 0.1, - "387": 0.1, - "388": 0.1, - "389": 0.1, - "390": 0.1, - "391": 0.1, - "392": 0.1, - "393": 0.1, - "394": 0.1, - "395": 0.1, - "396": 0.1, - "397": 0.1, - "398": 0.1, - "399": 0.1, - "400": 0.1, - "401": 0.1, - "402": 0.1, - "403": 0.1, - "404": 0.1, - "405": 0.1, - "406": 0.1, - "407": 0.1, - "408": 0.1, - "409": 0.1, - "410": 0.1, - "411": 0.1, - "412": 0.1, - "413": 0.1, - "414": 0.1, - "415": 0.1, - "416": 0.1, - "417": 0.1, - "418": 0.1, - "419": 0.1, - "420": 0.1, - "421": 0.1, - "422": 0.1, - "423": 0.1, - "424": 0.1, - "425": 0.1, - "426": 0.1, - "427": 0.1, - "428": 0.1, - "429": 0.1, - "430": 0.1, - "431": 0.1 - } - }, - "train_epoch_time": 4.789607048034668, - "train_loss": 2.698752693671578, - "train_score": 0.24680662656275065, - "val_loss": 2.717042813481748, - "val_score": 0.24142060147429442 - }, - { - "epoch": 8, - "grad_norm": 1.1504567861557007, - "learning_rate": 0.1, - "model_norm": 87.89695739746094, - "step_logs": { - "grad_norm": { - "432": 1.2470049858093262, - "433": 1.289585828781128, - "434": 1.2592664957046509, - "435": 1.1862597465515137, - "436": 1.1151883602142334, - "437": 0.989433765411377, - "438": 1.0614880323410034, - "439": 1.133050560951233, - "440": 1.1500543355941772, - "441": 1.1999155282974243, - "442": 1.3118451833724976, - "443": 1.3277556896209717, - "444": 1.2784500122070312, - "445": 1.2585184574127197, - "446": 1.187974452972412, - "447": 1.1775364875793457, - "448": 1.1956790685653687, - "449": 1.1144866943359375, - "450": 0.97844398021698, - "451": 0.8708642721176147, - "452": 0.8820230960845947, - "453": 0.9199129939079285, - "454": 1.0073786973953247, - "455": 1.1868746280670166, - "456": 1.2747303247451782, - "457": 1.4342340230941772, - "458": 1.608930230140686, - "459": 1.78340744972229, - "460": 1.8361918926239014, - "461": 1.6384437084197998, - "462": 1.328623652458191, - "463": 1.1527019739151, - "464": 1.1041613817214966, - "465": 1.1172404289245605, - "466": 1.0322593450546265, - "467": 0.9617031216621399, - "468": 0.9166104197502136, - "469": 0.9106884598731995, - "470": 0.9628452062606812, - "471": 1.0301531553268433, - "472": 0.9843339323997498, - "473": 0.9334664344787598, - "474": 0.9433060884475708, - "475": 0.9796757102012634, - "476": 1.0614765882492065, - "477": 1.0917699337005615, - "478": 1.0538979768753052, - "479": 1.1657098531723022, - "480": 1.2200924158096313, - "481": 1.1883965730667114, - "482": 1.1627167463302612, - "483": 1.1036138534545898, - "484": 1.1021698713302612, - "485": 1.1504567861557007 - }, - "loss": { - "432": 2.713374614715576, - "433": 2.671968460083008, - "434": 2.7096199989318848, - "435": 2.6906604766845703, - "436": 2.68302321434021, - "437": 2.6825802326202393, - "438": 2.670771837234497, - "439": 2.689758777618408, - "440": 2.6748640537261963, - "441": 2.6890697479248047, - "442": 2.692525625228882, - "443": 2.696845531463623, - "444": 2.7210817337036133, - "445": 2.6859681606292725, - "446": 2.6838626861572266, - "447": 2.6948540210723877, - "448": 2.692143440246582, - "449": 2.6692957878112793, - "450": 2.6747798919677734, - "451": 2.663140058517456, - "452": 2.659381151199341, - "453": 2.6430397033691406, - "454": 2.6683623790740967, - "455": 2.675079822540283, - "456": 2.6917381286621094, - "457": 2.6812214851379395, - "458": 2.715550422668457, - "459": 2.704169750213623, - "460": 2.742527484893799, - "461": 2.7332534790039062, - "462": 2.68533992767334, - "463": 2.67154860496521, - "464": 2.663553476333618, - "465": 2.6680541038513184, - "466": 2.658794641494751, - "467": 2.66171932220459, - "468": 2.677323341369629, - "469": 2.660294771194458, - "470": 2.6684353351593018, - "471": 2.659008741378784, - "472": 2.65541934967041, - "473": 2.661707878112793, - "474": 2.662236452102661, - "475": 2.660902500152588, - "476": 2.6765389442443848, - "477": 2.6452293395996094, - "478": 2.660407781600952, - "479": 2.6676993370056152, - "480": 2.6760501861572266, - "481": 2.648056745529175, - "482": 2.684999704360962, - "483": 2.657564878463745, - "484": 2.650477409362793, - "485": 2.6608033180236816 - }, - "lr": { - "432": 0.1, - "433": 0.1, - "434": 0.1, - "435": 0.1, - "436": 0.1, - "437": 0.1, - "438": 0.1, - "439": 0.1, - "440": 0.1, - "441": 0.1, - "442": 0.1, - "443": 0.1, - "444": 0.1, - "445": 0.1, - "446": 0.1, - "447": 0.1, - "448": 0.1, - "449": 0.1, - "450": 0.1, - "451": 0.1, - "452": 0.1, - "453": 0.1, - "454": 0.1, - "455": 0.1, - "456": 0.1, - "457": 0.1, - "458": 0.1, - "459": 0.1, - "460": 0.1, - "461": 0.1, - "462": 0.1, - "463": 0.1, - "464": 0.1, - "465": 0.1, - "466": 0.1, - "467": 0.1, - "468": 0.1, - "469": 0.1, - "470": 0.1, - "471": 0.1, - "472": 0.1, - "473": 0.1, - "474": 0.1, - "475": 0.1, - "476": 0.1, - "477": 0.1, - "478": 0.1, - "479": 0.1, - "480": 0.1, - "481": 0.1, - "482": 0.1, - "483": 0.1, - "484": 0.1, - "485": 0.1 - } - }, - "train_epoch_time": 4.79047703742981, - "train_loss": 2.6679309994794718, - "train_score": 0.2502947901080294, - "val_loss": 2.6905030358398823, - "val_score": 0.24513849013845354 - }, - { - "epoch": 9, - "grad_norm": 1.0001332759857178, - "learning_rate": 0.1, - "model_norm": 87.90438842773438, - "step_logs": { - "grad_norm": { - "486": 1.1612510681152344, - "487": 1.062957525253296, - "488": 1.078001618385315, - "489": 1.1932837963104248, - "490": 1.1981325149536133, - "491": 1.1555547714233398, - "492": 1.1137977838516235, - "493": 1.1161729097366333, - "494": 1.1411128044128418, - "495": 1.2163426876068115, - "496": 1.2052350044250488, - "497": 1.0639222860336304, - "498": 0.9671124815940857, - "499": 0.9194662570953369, - "500": 0.9014149308204651, - "501": 0.8693838119506836, - "502": 0.910906195640564, - "503": 0.8739429712295532, - "504": 0.8550460934638977, - "505": 0.9850298166275024, - "506": 1.0261270999908447, - "507": 1.009405493736267, - "508": 0.9884840250015259, - "509": 1.0212205648422241, - "510": 1.177075982093811, - "511": 1.287395715713501, - "512": 1.207298994064331, - "513": 1.0814963579177856, - "514": 1.014443039894104, - "515": 1.0257301330566406, - "516": 1.0380085706710815, - "517": 1.0529162883758545, - "518": 1.0382617712020874, - "519": 1.0139249563217163, - "520": 0.9397279620170593, - "521": 0.9486550688743591, - "522": 0.9386362433433533, - "523": 0.8969469666481018, - "524": 0.8797743916511536, - "525": 0.8516401052474976, - "526": 0.8083978891372681, - "527": 0.7750649452209473, - "528": 0.7665368914604187, - "529": 0.8149704933166504, - "530": 0.8175081014633179, - "531": 0.8571537137031555, - "532": 0.8864110708236694, - "533": 0.9360243678092957, - "534": 1.0125856399536133, - "535": 1.0238449573516846, - "536": 1.0681442022323608, - "537": 1.0233536958694458, - "538": 0.9476478099822998, - "539": 1.0001332759857178 - }, - "loss": { - "486": 2.6660804748535156, - "487": 2.6628355979919434, - "488": 2.666457414627075, - "489": 2.6530027389526367, - "490": 2.646252393722534, - "491": 2.6584932804107666, - "492": 2.654371738433838, - "493": 2.6585822105407715, - "494": 2.6463801860809326, - "495": 2.648942708969116, - "496": 2.662529468536377, - "497": 2.64328932762146, - "498": 2.6459012031555176, - "499": 2.6344614028930664, - "500": 2.6539177894592285, - "501": 2.6388461589813232, - "502": 2.6546387672424316, - "503": 2.635089874267578, - "504": 2.638031482696533, - "505": 2.646827459335327, - "506": 2.6532065868377686, - "507": 2.6290059089660645, - "508": 2.6561617851257324, - "509": 2.64959979057312, - "510": 2.648609161376953, - "511": 2.654461622238159, - "512": 2.674318313598633, - "513": 2.6441245079040527, - "514": 2.6411540508270264, - "515": 2.6309056282043457, - "516": 2.6483817100524902, - "517": 2.6443352699279785, - "518": 2.6486361026763916, - "519": 2.6380200386047363, - "520": 2.62508487701416, - "521": 2.643843650817871, - "522": 2.6435084342956543, - "523": 2.6433770656585693, - "524": 2.670839309692383, - "525": 2.6234757900238037, - "526": 2.6334450244903564, - "527": 2.6266393661499023, - "528": 2.6260743141174316, - "529": 2.6110000610351562, - "530": 2.621337413787842, - "531": 2.614391803741455, - "532": 2.635751485824585, - "533": 2.6307735443115234, - "534": 2.6422643661499023, - "535": 2.640986919403076, - "536": 2.653642177581787, - "537": 2.622880458831787, - "538": 2.6292803287506104, - "539": 2.642066717147827 - }, - "lr": { - "486": 0.1, - "487": 0.1, - "488": 0.1, - "489": 0.1, - "490": 0.1, - "491": 0.1, - "492": 0.1, - "493": 0.1, - "494": 0.1, - "495": 0.1, - "496": 0.1, - "497": 0.1, - "498": 0.1, - "499": 0.1, - "500": 0.1, - "501": 0.1, - "502": 0.1, - "503": 0.1, - "504": 0.1, - "505": 0.1, - "506": 0.1, - "507": 0.1, - "508": 0.1, - "509": 0.1, - "510": 0.1, - "511": 0.1, - "512": 0.1, - "513": 0.1, - "514": 0.1, - "515": 0.1, - "516": 0.1, - "517": 0.1, - "518": 0.1, - "519": 0.1, - "520": 0.1, - "521": 0.1, - "522": 0.1, - "523": 0.1, - "524": 0.1, - "525": 0.1, - "526": 0.1, - "527": 0.1, - "528": 0.1, - "529": 0.1, - "530": 0.1, - "531": 0.1, - "532": 0.1, - "533": 0.1, - "534": 0.1, - "535": 0.1, - "536": 0.1, - "537": 0.1, - "538": 0.1, - "539": 0.1 - } - }, - "train_epoch_time": 4.789014577865601, - "train_loss": 2.636936020885342, - "train_score": 0.25737647956378834, - "val_loss": 2.6576797067092564, - "val_score": 0.251358890779804 - }, - { - "epoch": 10, - "grad_norm": 1.3132293224334717, - "learning_rate": 0.1, - "model_norm": 87.91010284423828, - "step_logs": { - "grad_norm": { - "540": 1.0130642652511597, - "541": 1.026869535446167, - "542": 1.059008002281189, - "543": 1.1466665267944336, - "544": 1.166563868522644, - "545": 1.1260216236114502, - "546": 1.1005512475967407, - "547": 1.0809563398361206, - "548": 1.065432071685791, - "549": 0.8896036148071289, - "550": 0.764809787273407, - "551": 0.7022968530654907, - "552": 0.8188730478286743, - "553": 0.9863724708557129, - "554": 1.1375645399093628, - "555": 1.349520206451416, - "556": 1.4614402055740356, - "557": 1.4614065885543823, - "558": 1.5128509998321533, - "559": 1.4895579814910889, - "560": 1.3585960865020752, - "561": 1.3409488201141357, - "562": 1.341147541999817, - "563": 1.2098031044006348, - "564": 1.110071063041687, - "565": 1.2046527862548828, - "566": 1.2403206825256348, - "567": 1.3031007051467896, - "568": 1.327358365058899, - "569": 1.2581506967544556, - "570": 1.1368353366851807, - "571": 1.0242462158203125, - "572": 1.040698528289795, - "573": 1.0046101808547974, - "574": 0.9578930139541626, - "575": 0.9442002177238464, - "576": 0.9903170466423035, - "577": 0.9429300427436829, - "578": 0.946691632270813, - "579": 0.9224731922149658, - "580": 0.8999906778335571, - "581": 0.8905678391456604, - "582": 0.888059675693512, - "583": 0.9678447246551514, - "584": 1.045638084411621, - "585": 1.0738847255706787, - "586": 1.1046018600463867, - "587": 1.049461841583252, - "588": 0.9720369577407837, - "589": 0.988822877407074, - "590": 1.061402440071106, - "591": 1.174731969833374, - "592": 1.2586510181427002, - "593": 1.3132293224334717 - }, - "loss": { - "540": 2.638904333114624, - "541": 2.6240127086639404, - "542": 2.6381092071533203, - "543": 2.6202807426452637, - "544": 2.6436290740966797, - "545": 2.6638031005859375, - "546": 2.64585542678833, - "547": 2.6289055347442627, - "548": 2.6339850425720215, - "549": 2.632993221282959, - "550": 2.6294941902160645, - "551": 2.6152708530426025, - "552": 2.6239538192749023, - "553": 2.6080827713012695, - "554": 2.635983943939209, - "555": 2.6620101928710938, - "556": 2.659130334854126, - "557": 2.6474387645721436, - "558": 2.649404525756836, - "559": 2.6575660705566406, - "560": 2.647495746612549, - "561": 2.6552162170410156, - "562": 2.656172752380371, - "563": 2.6511611938476562, - "564": 2.621337652206421, - "565": 2.610480546951294, - "566": 2.6637675762176514, - "567": 2.63095760345459, - "568": 2.660881996154785, - "569": 2.635842800140381, - "570": 2.6520895957946777, - "571": 2.6140847206115723, - "572": 2.6337008476257324, - "573": 2.582170248031616, - "574": 2.617434024810791, - "575": 2.6204416751861572, - "576": 2.615417003631592, - "577": 2.604065418243408, - "578": 2.621476650238037, - "579": 2.6046745777130127, - "580": 2.6206703186035156, - "581": 2.607785701751709, - "582": 2.598924160003662, - "583": 2.608166217803955, - "584": 2.630323886871338, - "585": 2.616121530532837, - "586": 2.642490863800049, - "587": 2.6267971992492676, - "588": 2.629271984100342, - "589": 2.6087536811828613, - "590": 2.621145725250244, - "591": 2.6333248615264893, - "592": 2.625886917114258, - "593": 2.6297783851623535 - }, - "lr": { - "540": 0.1, - "541": 0.1, - "542": 0.1, - "543": 0.1, - "544": 0.1, - "545": 0.1, - "546": 0.1, - "547": 0.1, - "548": 0.1, - "549": 0.1, - "550": 0.1, - "551": 0.1, - "552": 0.1, - "553": 0.1, - "554": 0.1, - "555": 0.1, - "556": 0.1, - "557": 0.1, - "558": 0.1, - "559": 0.1, - "560": 0.1, - "561": 0.1, - "562": 0.1, - "563": 0.1, - "564": 0.1, - "565": 0.1, - "566": 0.1, - "567": 0.1, - "568": 0.1, - "569": 0.1, - "570": 0.1, - "571": 0.1, - "572": 0.1, - "573": 0.1, - "574": 0.1, - "575": 0.1, - "576": 0.1, - "577": 0.1, - "578": 0.1, - "579": 0.1, - "580": 0.1, - "581": 0.1, - "582": 0.1, - "583": 0.1, - "584": 0.1, - "585": 0.1, - "586": 0.1, - "587": 0.1, - "588": 0.1, - "589": 0.1, - "590": 0.1, - "591": 0.1, - "592": 0.1, - "593": 0.1 - } - }, - "train_epoch_time": 4.7885589599609375, - "train_loss": 2.6399824152717972, - "train_score": 0.24474757884119983, - "val_loss": 2.6626385922546913, - "val_score": 0.2361061278426962 - }, - { - "epoch": 11, - "grad_norm": 0.9859684109687805, - "learning_rate": 0.1, - "model_norm": 87.91594696044922, - "step_logs": { - "grad_norm": { - "594": 1.232816219329834, - "595": 1.0501583814620972, - "596": 0.9624819755554199, - "597": 0.8268987536430359, - "598": 0.8406144976615906, - "599": 0.9287706613540649, - "600": 0.9681162238121033, - "601": 0.9388473629951477, - "602": 0.9350690245628357, - "603": 0.9709102511405945, - "604": 1.012136459350586, - "605": 0.9263124465942383, - "606": 0.8129522204399109, - "607": 0.7992943525314331, - "608": 0.8032410740852356, - "609": 0.7831953167915344, - "610": 0.7809920310974121, - "611": 0.7741714119911194, - "612": 0.8221547603607178, - "613": 0.9485293626785278, - "614": 1.0550944805145264, - "615": 1.223487138748169, - "616": 1.3416365385055542, - "617": 1.3670084476470947, - "618": 1.4177489280700684, - "619": 1.445788025856018, - "620": 1.424302101135254, - "621": 1.3975396156311035, - "622": 1.313917636871338, - "623": 1.2091777324676514, - "624": 1.088362693786621, - "625": 0.9875993728637695, - "626": 0.860623300075531, - "627": 0.7900123000144958, - "628": 0.6933360695838928, - "629": 0.6035811305046082, - "630": 0.5959075093269348, - "631": 0.569287121295929, - "632": 0.5972772836685181, - "633": 0.6479539275169373, - "634": 0.6718242764472961, - "635": 0.708493709564209, - "636": 0.7453250885009766, - "637": 0.8379040956497192, - "638": 0.9716309905052185, - "639": 1.1598995923995972, - "640": 1.145552635192871, - "641": 1.0906648635864258, - "642": 1.1012121438980103, - "643": 1.130255937576294, - "644": 1.1508382558822632, - "645": 1.1003869771957397, - "646": 1.045069932937622, - "647": 0.9859684109687805 - }, - "loss": { - "594": 2.6449217796325684, - "595": 2.603557825088501, - "596": 2.6232662200927734, - "597": 2.5892586708068848, - "598": 2.616605281829834, - "599": 2.5903077125549316, - "600": 2.6083695888519287, - "601": 2.6061222553253174, - "602": 2.6089248657226562, - "603": 2.6281466484069824, - "604": 2.6093266010284424, - "605": 2.604705810546875, - "606": 2.6213996410369873, - "607": 2.6040000915527344, - "608": 2.6044650077819824, - "609": 2.5897090435028076, - "610": 2.597846031188965, - "611": 2.6044387817382812, - "612": 2.5937461853027344, - "613": 2.6094236373901367, - "614": 2.620765209197998, - "615": 2.635242462158203, - "616": 2.6388814449310303, - "617": 2.633263349533081, - "618": 2.62026047706604, - "619": 2.647897481918335, - "620": 2.6325912475585938, - "621": 2.622979164123535, - "622": 2.64648175239563, - "623": 2.6118288040161133, - "624": 2.628385543823242, - "625": 2.6140294075012207, - "626": 2.607260227203369, - "627": 2.5733721256256104, - "628": 2.5911970138549805, - "629": 2.5724167823791504, - "630": 2.598172426223755, - "631": 2.573568820953369, - "632": 2.5882842540740967, - "633": 2.5935275554656982, - "634": 2.6073246002197266, - "635": 2.5996737480163574, - "636": 2.589401960372925, - "637": 2.5997109413146973, - "638": 2.611051082611084, - "639": 2.6028225421905518, - "640": 2.623549461364746, - "641": 2.5914037227630615, - "642": 2.6109440326690674, - "643": 2.6036734580993652, - "644": 2.6183109283447266, - "645": 2.618478775024414, - "646": 2.618661880493164, - "647": 2.6153457164764404 - }, - "lr": { - "594": 0.1, - "595": 0.1, - "596": 0.1, - "597": 0.1, - "598": 0.1, - "599": 0.1, - "600": 0.1, - "601": 0.1, - "602": 0.1, - "603": 0.1, - "604": 0.1, - "605": 0.1, - "606": 0.1, - "607": 0.1, - "608": 0.1, - "609": 0.1, - "610": 0.1, - "611": 0.1, - "612": 0.1, - "613": 0.1, - "614": 0.1, - "615": 0.1, - "616": 0.1, - "617": 0.1, - "618": 0.1, - "619": 0.1, - "620": 0.1, - "621": 0.1, - "622": 0.1, - "623": 0.1, - "624": 0.1, - "625": 0.1, - "626": 0.1, - "627": 0.1, - "628": 0.1, - "629": 0.1, - "630": 0.1, - "631": 0.1, - "632": 0.1, - "633": 0.1, - "634": 0.1, - "635": 0.1, - "636": 0.1, - "637": 0.1, - "638": 0.1, - "639": 0.1, - "640": 0.1, - "641": 0.1, - "642": 0.1, - "643": 0.1, - "644": 0.1, - "645": 0.1, - "646": 0.1, - "647": 0.1 - } - }, - "train_epoch_time": 4.7887489795684814, - "train_loss": 2.604097114095045, - "train_score": 0.260620292255834, - "val_loss": 2.6265195924296854, - "val_score": 0.2520719714608176 - }, - { - "epoch": 12, - "grad_norm": 0.3209591507911682, - "learning_rate": 0.1, - "model_norm": 87.92118072509766, - "step_logs": { - "grad_norm": { - "648": 1.000657558441162, - "649": 1.0662273168563843, - "650": 1.115991473197937, - "651": 1.1540179252624512, - "652": 1.1653028726577759, - "653": 1.1926831007003784, - "654": 1.2208694219589233, - "655": 1.0660916566848755, - "656": 1.0124804973602295, - "657": 0.9115723967552185, - "658": 0.8166388869285583, - "659": 0.8163293600082397, - "660": 0.806090772151947, - "661": 0.6859253644943237, - "662": 0.5863920450210571, - "663": 0.5947525501251221, - "664": 0.5988678336143494, - "665": 0.5380848050117493, - "666": 0.5694063901901245, - "667": 0.5971958041191101, - "668": 0.5856954455375671, - "669": 0.681048572063446, - "670": 0.7289005517959595, - "671": 0.7114271521568298, - "672": 0.7229717969894409, - "673": 0.6649057269096375, - "674": 0.6589520573616028, - "675": 0.606964111328125, - "676": 0.5510775446891785, - "677": 0.5911911725997925, - "678": 0.5577713251113892, - "679": 0.46715670824050903, - "680": 0.5220733880996704, - "681": 0.49557799100875854, - "682": 0.4635457694530487, - "683": 0.46117931604385376, - "684": 0.366115003824234, - "685": 0.3324587941169739, - "686": 0.30120137333869934, - "687": 0.3033061623573303, - "688": 0.27584969997406006, - "689": 0.3256341218948364, - "690": 0.306515634059906, - "691": 0.27305927872657776, - "692": 0.32025620341300964, - "693": 0.30496740341186523, - "694": 0.32456541061401367, - "695": 0.3657022714614868, - "696": 0.28745922446250916, - "697": 0.3022083342075348, - "698": 0.28058817982673645, - "699": 0.298888236284256, - "700": 0.38229817152023315, - "701": 0.3209591507911682 - }, - "loss": { - "648": 2.592336654663086, - "649": 2.6028923988342285, - "650": 2.6087141036987305, - "651": 2.626216173171997, - "652": 2.6205203533172607, - "653": 2.601165771484375, - "654": 2.613736629486084, - "655": 2.6018567085266113, - "656": 2.6031391620635986, - "657": 2.589202880859375, - "658": 2.589087724685669, - "659": 2.5789053440093994, - "660": 2.5687503814697266, - "661": 2.599064826965332, - "662": 2.584836006164551, - "663": 2.5851101875305176, - "664": 2.5766913890838623, - "665": 2.580737352371216, - "666": 2.5948710441589355, - "667": 2.557612657546997, - "668": 2.5727901458740234, - "669": 2.593672275543213, - "670": 2.5946192741394043, - "671": 2.5791306495666504, - "672": 2.5738112926483154, - "673": 2.5791189670562744, - "674": 2.580991506576538, - "675": 2.5737242698669434, - "676": 2.594982147216797, - "677": 2.581864833831787, - "678": 2.581547737121582, - "679": 2.5753448009490967, - "680": 2.5702717304229736, - "681": 2.56905460357666, - "682": 2.5909414291381836, - "683": 2.578022003173828, - "684": 2.5715603828430176, - "685": 2.572442054748535, - "686": 2.5702497959136963, - "687": 2.574857711791992, - "688": 2.5609073638916016, - "689": 2.5848307609558105, - "690": 2.562727928161621, - "691": 2.5779733657836914, - "692": 2.5541539192199707, - "693": 2.5602164268493652, - "694": 2.5699291229248047, - "695": 2.5719079971313477, - "696": 2.5735368728637695, - "697": 2.565096855163574, - "698": 2.555540084838867, - "699": 2.5925405025482178, - "700": 2.568173885345459, - "701": 2.562692403793335 - }, - "lr": { - "648": 0.1, - "649": 0.09938271604938272, - "650": 0.09876543209876543, - "651": 0.09814814814814815, - "652": 0.09753086419753088, - "653": 0.09691358024691359, - "654": 0.09629629629629631, - "655": 0.09567901234567902, - "656": 0.09506172839506173, - "657": 0.09444444444444444, - "658": 0.09382716049382717, - "659": 0.09320987654320989, - "660": 0.0925925925925926, - "661": 0.09197530864197531, - "662": 0.09135802469135804, - "663": 0.09074074074074075, - "664": 0.09012345679012346, - "665": 0.08950617283950618, - "666": 0.08888888888888889, - "667": 0.08827160493827162, - "668": 0.08765432098765433, - "669": 0.08703703703703704, - "670": 0.08641975308641976, - "671": 0.08580246913580247, - "672": 0.0851851851851852, - "673": 0.08456790123456791, - "674": 0.08395061728395062, - "675": 0.08333333333333334, - "676": 0.08271604938271605, - "677": 0.08209876543209876, - "678": 0.08148148148148149, - "679": 0.08086419753086421, - "680": 0.08024691358024692, - "681": 0.07962962962962963, - "682": 0.07901234567901234, - "683": 0.07839506172839507, - "684": 0.07777777777777778, - "685": 0.0771604938271605, - "686": 0.07654320987654323, - "687": 0.07592592592592594, - "688": 0.07530864197530865, - "689": 0.07469135802469136, - "690": 0.07407407407407407, - "691": 0.07345679012345678, - "692": 0.0728395061728395, - "693": 0.07222222222222223, - "694": 0.07160493827160495, - "695": 0.07098765432098766, - "696": 0.07037037037037037, - "697": 0.06975308641975309, - "698": 0.0691358024691358, - "699": 0.06851851851851852, - "700": 0.06790123456790124, - "701": 0.06728395061728396 - } - }, - "train_epoch_time": 4.789390802383423, - "train_loss": 2.5664006419982255, - "train_score": 0.26575838418054787, - "val_loss": 2.590764033123765, - "val_score": 0.25639979217958503 - }, - { - "epoch": 13, - "grad_norm": 0.23563528060913086, - "learning_rate": 0.06666666666666668, - "model_norm": 87.9243392944336, - "step_logs": { - "grad_norm": { - "702": 0.3117081820964813, - "703": 0.28111910820007324, - "704": 0.29745182394981384, - "705": 0.36087968945503235, - "706": 0.2960343658924103, - "707": 0.2654924690723419, - "708": 0.2641964554786682, - "709": 0.30734601616859436, - "710": 0.3318740129470825, - "711": 0.3192457854747772, - "712": 0.3369113504886627, - "713": 0.30742621421813965, - "714": 0.2744104564189911, - "715": 0.3230270445346832, - "716": 0.31964409351348877, - "717": 0.2801240384578705, - "718": 0.35099443793296814, - "719": 0.3327133059501648, - "720": 0.3149246573448181, - "721": 0.3623065650463104, - "722": 0.30663007497787476, - "723": 0.2702566981315613, - "724": 0.32905909419059753, - "725": 0.2621152400970459, - "726": 0.25548678636550903, - "727": 0.24502606689929962, - "728": 0.34794163703918457, - "729": 0.25389766693115234, - "730": 0.28882837295532227, - "731": 0.28239139914512634, - "732": 0.3363243341445923, - "733": 0.33072951436042786, - "734": 0.2719234228134155, - "735": 0.2361488938331604, - "736": 0.25554588437080383, - "737": 0.2818159759044647, - "738": 0.3646887242794037, - "739": 0.34500768780708313, - "740": 0.30105453729629517, - "741": 0.277146577835083, - "742": 0.26946109533309937, - "743": 0.279104620218277, - "744": 0.2642884850502014, - "745": 0.3399738073348999, - "746": 0.27590250968933105, - "747": 0.25552427768707275, - "748": 0.2481745183467865, - "749": 0.250038206577301, - "750": 0.24432861804962158, - "751": 0.24622569978237152, - "752": 0.2436780482530594, - "753": 0.25488924980163574, - "754": 0.29004406929016113, - "755": 0.23563528060913086 - }, - "loss": { - "702": 2.565304756164551, - "703": 2.542484998703003, - "704": 2.554048538208008, - "705": 2.5680859088897705, - "706": 2.568817615509033, - "707": 2.5685794353485107, - "708": 2.569775104522705, - "709": 2.5595221519470215, - "710": 2.5770986080169678, - "711": 2.57230806350708, - "712": 2.538276433944702, - "713": 2.5701746940612793, - "714": 2.5652430057525635, - "715": 2.5728981494903564, - "716": 2.5630252361297607, - "717": 2.553982734680176, - "718": 2.564180850982666, - "719": 2.5603227615356445, - "720": 2.573831081390381, - "721": 2.5586624145507812, - "722": 2.565708637237549, - "723": 2.5375452041625977, - "724": 2.5520243644714355, - "725": 2.5846681594848633, - "726": 2.5612869262695312, - "727": 2.5715951919555664, - "728": 2.544262647628784, - "729": 2.55293345451355, - "730": 2.5561492443084717, - "731": 2.584156036376953, - "732": 2.5557851791381836, - "733": 2.572971820831299, - "734": 2.5559208393096924, - "735": 2.574418067932129, - "736": 2.549515724182129, - "737": 2.5522515773773193, - "738": 2.575105667114258, - "739": 2.562318801879883, - "740": 2.560502767562866, - "741": 2.581686019897461, - "742": 2.5498270988464355, - "743": 2.549985885620117, - "744": 2.5514426231384277, - "745": 2.549697160720825, - "746": 2.5701656341552734, - "747": 2.575108766555786, - "748": 2.5700650215148926, - "749": 2.556898593902588, - "750": 2.5626792907714844, - "751": 2.5578603744506836, - "752": 2.566579818725586, - "753": 2.565284490585327, - "754": 2.5585641860961914, - "755": 2.562068223953247 - }, - "lr": { - "702": 0.06666666666666668, - "703": 0.06604938271604939, - "704": 0.0654320987654321, - "705": 0.06481481481481481, - "706": 0.06419753086419754, - "707": 0.06358024691358025, - "708": 0.06296296296296297, - "709": 0.06234567901234568, - "710": 0.061728395061728406, - "711": 0.061111111111111116, - "712": 0.060493827160493834, - "713": 0.059876543209876544, - "714": 0.05925925925925926, - "715": 0.05864197530864197, - "716": 0.058024691358024696, - "717": 0.05740740740740741, - "718": 0.05679012345679013, - "719": 0.05617283950617285, - "720": 0.05555555555555556, - "721": 0.05493827160493828, - "722": 0.05432098765432099, - "723": 0.0537037037037037, - "724": 0.05308641975308642, - "725": 0.05246913580246914, - "726": 0.051851851851851864, - "727": 0.051234567901234575, - "728": 0.05061728395061729, - "729": 0.05, - "730": 0.04938271604938271, - "731": 0.04876543209876543, - "732": 0.048148148148148155, - "733": 0.047530864197530866, - "734": 0.04691358024691358, - "735": 0.046296296296296294, - "736": 0.04567901234567902, - "737": 0.04506172839506173, - "738": 0.044444444444444446, - "739": 0.04382716049382716, - "740": 0.04320987654320988, - "741": 0.0425925925925926, - "742": 0.04197530864197531, - "743": 0.04135802469135802, - "744": 0.040740740740740744, - "745": 0.04012345679012346, - "746": 0.03950617283950617, - "747": 0.03888888888888889, - "748": 0.038271604938271614, - "749": 0.037654320987654324, - "750": 0.037037037037037035, - "751": 0.03641975308641975, - "752": 0.03580246913580248, - "753": 0.03518518518518519, - "754": 0.0345679012345679, - "755": 0.033950617283950615 - } - }, - "train_epoch_time": 4.788929224014282, - "train_loss": 2.5584420603009175, - "train_score": 0.2657057029530513, - "val_loss": 2.583310950911004, - "val_score": 0.2565433047669079 - }, - { - "epoch": 14, - "grad_norm": 0.2518008351325989, - "learning_rate": 0.03333333333333334, - "model_norm": 87.92538452148438, - "step_logs": { - "grad_norm": { - "756": 0.2775435149669647, - "757": 0.29300960898399353, - "758": 0.3221818506717682, - "759": 0.24240811169147491, - "760": 0.26610898971557617, - "761": 0.30956459045410156, - "762": 0.25148969888687134, - "763": 0.2645169496536255, - "764": 0.328706294298172, - "765": 0.24311308562755585, - "766": 0.24176357686519623, - "767": 0.21268054842948914, - "768": 0.27396246790885925, - "769": 0.254978746175766, - "770": 0.2865082621574402, - "771": 0.24055102467536926, - "772": 0.23608236014842987, - "773": 0.23270298540592194, - "774": 0.22181636095046997, - "775": 0.27683961391448975, - "776": 0.25761187076568604, - "777": 0.20783394575119019, - "778": 0.24660271406173706, - "779": 0.3313913941383362, - "780": 0.279691219329834, - "781": 0.25982221961021423, - "782": 0.22880993783473969, - "783": 0.2237672507762909, - "784": 0.2610894739627838, - "785": 0.2801312506198883, - "786": 0.22010964155197144, - "787": 0.2497442215681076, - "788": 0.2843281626701355, - "789": 0.23611755669116974, - "790": 0.28263595700263977, - "791": 0.23977956175804138, - "792": 0.24657556414604187, - "793": 0.24455679953098297, - "794": 0.2648468613624573, - "795": 0.22714628279209137, - "796": 0.24095964431762695, - "797": 0.22900207340717316, - "798": 0.25532257556915283, - "799": 0.22062742710113525, - "800": 0.246209055185318, - "801": 0.24342700839042664, - "802": 0.25300362706184387, - "803": 0.29027682542800903, - "804": 0.22983090579509735, - "805": 0.23163969814777374, - "806": 0.23459257185459137, - "807": 0.23948901891708374, - "808": 0.2343757599592209, - "809": 0.2518008351325989 - }, - "loss": { - "756": 2.5660817623138428, - "757": 2.5577492713928223, - "758": 2.5660839080810547, - "759": 2.5521488189697266, - "760": 2.554875612258911, - "761": 2.570044994354248, - "762": 2.559504747390747, - "763": 2.546598434448242, - "764": 2.5554494857788086, - "765": 2.552934169769287, - "766": 2.5620369911193848, - "767": 2.5694453716278076, - "768": 2.5737318992614746, - "769": 2.546405553817749, - "770": 2.5901038646698, - "771": 2.5474853515625, - "772": 2.5522875785827637, - "773": 2.557750701904297, - "774": 2.548905849456787, - "775": 2.547020435333252, - "776": 2.5597705841064453, - "777": 2.5656325817108154, - "778": 2.552678346633911, - "779": 2.5623927116394043, - "780": 2.5527384281158447, - "781": 2.5446033477783203, - "782": 2.5598649978637695, - "783": 2.5354132652282715, - "784": 2.5684633255004883, - "785": 2.5687317848205566, - "786": 2.5529067516326904, - "787": 2.5547072887420654, - "788": 2.5824577808380127, - "789": 2.5537703037261963, - "790": 2.547931432723999, - "791": 2.5651164054870605, - "792": 2.5557239055633545, - "793": 2.5407819747924805, - "794": 2.5403971672058105, - "795": 2.5537753105163574, - "796": 2.5584399700164795, - "797": 2.537245273590088, - "798": 2.558483600616455, - "799": 2.5595879554748535, - "800": 2.54250431060791, - "801": 2.546520471572876, - "802": 2.557851791381836, - "803": 2.5625834465026855, - "804": 2.5644705295562744, - "805": 2.568164348602295, - "806": 2.562441349029541, - "807": 2.5585927963256836, - "808": 2.563767910003662, - "809": 2.542076349258423 - }, - "lr": { - "756": 0.03333333333333334, - "757": 0.03271604938271605, - "758": 0.03209876543209877, - "759": 0.03148148148148148, - "760": 0.030864197530864203, - "761": 0.030246913580246917, - "762": 0.02962962962962963, - "763": 0.02901234567901234, - "764": 0.028395061728395066, - "765": 0.02777777777777778, - "766": 0.027160493827160494, - "767": 0.026543209876543208, - "768": 0.025925925925925932, - "769": 0.025308641975308646, - "770": 0.024691358024691357, - "771": 0.02407407407407407, - "772": 0.023456790123456795, - "773": 0.02283950617283951, - "774": 0.022222222222222223, - "775": 0.021604938271604937, - "776": 0.02098765432098766, - "777": 0.020370370370370372, - "778": 0.019753086419753086, - "779": 0.0191358024691358, - "780": 0.018518518518518524, - "781": 0.01790123456790124, - "782": 0.01728395061728395, - "783": 0.016666666666666663, - "784": 0.016049382716049387, - "785": 0.015432098765432101, - "786": 0.014814814814814815, - "787": 0.014197530864197528, - "788": 0.013580246913580252, - "789": 0.012962962962962966, - "790": 0.012345679012345678, - "791": 0.011728395061728392, - "792": 0.011111111111111117, - "793": 0.01049382716049383, - "794": 0.009876543209876543, - "795": 0.009259259259259257, - "796": 0.008641975308641981, - "797": 0.008024691358024694, - "798": 0.007407407407407408, - "799": 0.006790123456790121, - "800": 0.006172839506172845, - "801": 0.005555555555555558, - "802": 0.0049382716049382715, - "803": 0.004320987654320985, - "804": 0.003703703703703709, - "805": 0.0030864197530864226, - "806": 0.0024691358024691358, - "807": 0.001851851851851849, - "808": 0.0012345679012345735, - "809": 0.0006172839506172868 - } - }, - "train_epoch_time": 4.789141893386841, - "train_loss": 2.5557412305555522, - "train_score": 0.2661686243422575, - "val_loss": 2.5808811688669513, - "val_score": 0.25705008597105705 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:26:29.478094", - "final_model_norm": 87.92538452148438, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:24:48.737793", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 4.768748760223389, - "learning_rate": 2.15e-11, - "model_norm": 91.1038818359375, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.512977123260498, - "3": 7.69253396987915, - "4": 21.81309700012207, - "5": 5.747983455657959, - "6": 5.2405686378479, - "7": 4.423764705657959, - "8": 4.456271648406982, - "9": 3.984340190887451, - "10": 4.62080192565918, - "11": 4.915428638458252, - "12": 56.408966064453125, - "13": 467.4852294921875, - "14": 8.032468795776367, - "15": 3.1247873306274414, - "16": 3.5135746002197266, - "17": 4.337405681610107, - "18": 7.781325817108154, - "19": 4.247313499450684, - "20": 5.036606311798096, - "21": 3.811758279800415, - "22": 2.94175386428833, - "23": 8.17628288269043, - "24": 3.4523274898529053, - "25": 5.4133124351501465, - "26": 3.348008632659912, - "27": 14.790365219116211, - "28": 4.12067985534668, - "29": 8.541983604431152, - "30": 3.94637393951416, - "31": 2.8650941848754883, - "32": 4.193454742431641, - "33": 17.27073097229004, - "34": 7.146099090576172, - "35": 4.75208044052124, - "36": 11.106724739074707, - "37": 4.310473918914795, - "38": 2.791454792022705, - "39": 3.749058961868286, - "40": 4.987452030181885, - "41": 3.3146536350250244, - "42": 4.122133731842041, - "43": 12.054593086242676, - "44": 6.384027004241943, - "45": 12.094640731811523, - "46": 4.997376441955566, - "47": 5.145849227905273, - "48": 9.49466609954834, - "49": 4.682456016540527, - "50": 4.668734550476074, - "51": 3.8795957565307617, - "52": 2.3955156803131104, - "53": 4.768748760223389 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.8393378257751465, - "3": 3.6987521648406982, - "4": 4.2365193367004395, - "5": 4.385725975036621, - "6": 3.8917675018310547, - "7": 3.6118927001953125, - "8": 3.4665393829345703, - "9": 3.543761730194092, - "10": 3.3938088417053223, - "11": 3.585111141204834, - "12": 3.5599217414855957, - "13": 5.370888710021973, - "14": 4.655491828918457, - "15": 3.761195659637451, - "16": 3.7823736667633057, - "17": 3.613765239715576, - "18": 4.473066329956055, - "19": 4.02977991104126, - "20": 3.686736583709717, - "21": 4.340450286865234, - "22": 3.4855732917785645, - "23": 3.9735124111175537, - "24": 5.338529586791992, - "25": 4.520724773406982, - "26": 4.07056999206543, - "27": 5.221320152282715, - "28": 7.9278364181518555, - "29": 6.7389140129089355, - "30": 5.49710750579834, - "31": 3.967289924621582, - "32": 4.2562665939331055, - "33": 5.819311141967773, - "34": 9.422431945800781, - "35": 8.984328269958496, - "36": 7.551095485687256, - "37": 7.422612190246582, - "38": 5.4401750564575195, - "39": 5.10221529006958, - "40": 5.74896240234375, - "41": 4.661088943481445, - "42": 4.535565376281738, - "43": 4.940070152282715, - "44": 9.825352668762207, - "45": 9.690672874450684, - "46": 9.897573471069336, - "47": 7.447362899780273, - "48": 6.910978317260742, - "49": 10.103110313415527, - "50": 8.139686584472656, - "51": 6.164586067199707, - "52": 3.775649309158325, - "53": 3.7716851234436035 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "train_epoch_time": 4.790753364562988, - "train_loss": 5.288014858524974, - "train_score": 0.04657236370371609, - "val_loss": 5.269995704173494, - "val_score": 0.04376255736251102 - }, - { - "epoch": 1, - "grad_norm": 1.4773788452148438, - "learning_rate": 0.215, - "model_norm": 90.87535095214844, - "step_logs": { - "grad_norm": { - "54": 3.777482509613037, - "55": 12.372153282165527, - "56": 5.544676303863525, - "57": 4.050521373748779, - "58": 3.751547336578369, - "59": 3.5061357021331787, - "60": 5.444124221801758, - "61": 10.985054969787598, - "62": 4.180367469787598, - "63": 3.997178077697754, - "64": 3.327848434448242, - "65": 5.355532169342041, - "66": 2.1705005168914795, - "67": 11.513355255126953, - "68": 3.6953213214874268, - "69": 2.9278368949890137, - "70": 4.160811901092529, - "71": 2.607463836669922, - "72": 2.622835874557495, - "73": 5.121217250823975, - "74": 3.033362865447998, - "75": 2.4544143676757812, - "76": 1.2142291069030762, - "77": 4.076670169830322, - "78": 2.422085762023926, - "79": 3.275275945663452, - "80": 4.8507914543151855, - "81": 2.527409076690674, - "82": 2.189275026321411, - "83": 0.9653934240341187, - "84": 1.6801722049713135, - "85": 2.077773332595825, - "86": 7.9734320640563965, - "87": 3.171640396118164, - "88": 2.356574296951294, - "89": 2.090884208679199, - "90": 2.095611572265625, - "91": 3.5968568325042725, - "92": 1.9283243417739868, - "93": 1.6904977560043335, - "94": 3.743948459625244, - "95": 2.1623547077178955, - "96": 1.9533060789108276, - "97": 2.341980457305908, - "98": 1.8053877353668213, - "99": 0.530434250831604, - "100": 1.4637936353683472, - "101": 1.803134799003601, - "102": 3.1357672214508057, - "103": 1.8519048690795898, - "104": 1.4711562395095825, - "105": 3.2019054889678955, - "106": 1.7563107013702393, - "107": 1.4773788452148438 - }, - "loss": { - "54": 5.308948993682861, - "55": 5.660882949829102, - "56": 10.037200927734375, - "57": 8.961925506591797, - "58": 7.4425435066223145, - "59": 5.633369445800781, - "60": 4.434139251708984, - "61": 5.658693313598633, - "62": 9.487836837768555, - "63": 7.790412902832031, - "64": 6.259358882904053, - "65": 5.018208980560303, - "66": 4.425844192504883, - "67": 5.726444244384766, - "68": 8.743014335632324, - "69": 7.259713649749756, - "70": 6.063974380493164, - "71": 5.477700233459473, - "72": 4.26163911819458, - "73": 3.714019298553467, - "74": 5.2956132888793945, - "75": 4.524656772613525, - "76": 3.433161735534668, - "77": 3.573251962661743, - "78": 4.77878475189209, - "79": 3.908001184463501, - "80": 3.990009307861328, - "81": 5.180383682250977, - "82": 4.075482368469238, - "83": 3.406607151031494, - "84": 3.387862205505371, - "85": 3.63771390914917, - "86": 4.154426574707031, - "87": 6.24276065826416, - "88": 5.5883002281188965, - "89": 4.471851825714111, - "90": 3.7065000534057617, - "91": 3.6209402084350586, - "92": 4.350677013397217, - "93": 3.6024441719055176, - "94": 3.5279650688171387, - "95": 4.4583210945129395, - "96": 3.7381820678710938, - "97": 3.440885066986084, - "98": 3.8653812408447266, - "99": 3.3540751934051514, - "100": 3.383711338043213, - "101": 3.5982909202575684, - "102": 3.517383575439453, - "103": 4.132829189300537, - "104": 3.5085182189941406, - "105": 3.5050244331359863, - "106": 4.13048791885376, - "107": 3.5065300464630127 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "train_epoch_time": 4.7885589599609375, - "train_loss": 3.4609267206753, - "train_score": 0.1526105183055103, - "val_loss": 3.480834239659709, - "val_score": 0.15114182700861745 - }, - { - "epoch": 2, - "grad_norm": 0.8592105507850647, - "learning_rate": 0.215, - "model_norm": 90.87315368652344, - "step_logs": { - "grad_norm": { - "108": 2.629751443862915, - "109": 1.7028534412384033, - "110": 1.0531786680221558, - "111": 2.445246696472168, - "112": 1.618475317955017, - "113": 0.8111779093742371, - "114": 1.7762572765350342, - "115": 1.5149307250976562, - "116": 0.5596305727958679, - "117": 0.8351070284843445, - "118": 1.8120121955871582, - "119": 1.493711233139038, - "120": 0.4410189986228943, - "121": 0.5326366424560547, - "122": 1.0765730142593384, - "123": 1.1864361763000488, - "124": 1.5656208992004395, - "125": 1.3925890922546387, - "126": 0.7641895413398743, - "127": 0.9468544721603394, - "128": 1.540255069732666, - "129": 1.360662579536438, - "130": 0.7268709540367126, - "131": 0.904043972492218, - "132": 1.4499551057815552, - "133": 1.2613534927368164, - "134": 0.6526756882667542, - "135": 0.7883232235908508, - "136": 1.2192714214324951, - "137": 1.1647212505340576, - "138": 0.9443559646606445, - "139": 0.9905188679695129, - "140": 1.0919480323791504, - "141": 1.0841748714447021, - "142": 1.0498263835906982, - "143": 1.015977144241333, - "144": 0.8942776322364807, - "145": 0.9320101141929626, - "146": 1.0307154655456543, - "147": 0.9822448492050171, - "148": 0.8373734951019287, - "149": 0.8981814384460449, - "150": 1.056370496749878, - "151": 0.9678896069526672, - "152": 0.7213351726531982, - "153": 0.7626000642776489, - "154": 0.8951994776725769, - "155": 0.9377069473266602, - "156": 0.9571329951286316, - "157": 0.9304360151290894, - "158": 0.8673065900802612, - "159": 0.858120858669281, - "160": 0.8175886273384094, - "161": 0.8592105507850647 - }, - "loss": { - "108": 3.4423701763153076, - "109": 3.8930225372314453, - "110": 3.380098819732666, - "111": 3.4664440155029297, - "112": 3.824242115020752, - "113": 3.367769718170166, - "114": 3.4301443099975586, - "115": 3.6164727210998535, - "116": 3.3217084407806396, - "117": 3.3393912315368652, - "118": 3.4138548374176025, - "119": 3.62984561920166, - "120": 3.296369791030884, - "121": 3.2922067642211914, - "122": 3.375063180923462, - "123": 3.457331657409668, - "124": 3.425962448120117, - "125": 3.554396629333496, - "126": 3.363281011581421, - "127": 3.384377956390381, - "128": 3.377870559692383, - "129": 3.49696683883667, - "130": 3.3201117515563965, - "131": 3.3843016624450684, - "132": 3.382411003112793, - "133": 3.4777114391326904, - "134": 3.3196568489074707, - "135": 3.3683416843414307, - "136": 3.3693923950195312, - "137": 3.434567451477051, - "138": 3.3395345211029053, - "139": 3.402350664138794, - "140": 3.3532817363739014, - "141": 3.3957369327545166, - "142": 3.3668744564056396, - "143": 3.4562594890594482, - "144": 3.3420727252960205, - "145": 3.389443874359131, - "146": 3.3524951934814453, - "147": 3.4206604957580566, - "148": 3.3603012561798096, - "149": 3.378995180130005, - "150": 3.3784937858581543, - "151": 3.403463125228882, - "152": 3.3319382667541504, - "153": 3.3676114082336426, - "154": 3.319920539855957, - "155": 3.4096570014953613, - "156": 3.3418140411376953, - "157": 3.3852076530456543, - "158": 3.3415472507476807, - "159": 3.381582260131836, - "160": 3.3125195503234863, - "161": 3.35613751411438 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "train_epoch_time": 4.788424730300903, - "train_loss": 3.3468939742193675, - "train_score": 0.15264974893678723, - "val_loss": 3.365585728709925, - "val_score": 0.15115528137141884 - }, - { - "epoch": 3, - "grad_norm": 0.6087993383407593, - "learning_rate": 0.215, - "model_norm": 90.8847427368164, - "step_logs": { - "grad_norm": { - "162": 0.9361050724983215, - "163": 0.880911111831665, - "164": 0.776106595993042, - "165": 0.7700256705284119, - "166": 0.7481832504272461, - "167": 0.7968769669532776, - "168": 0.8789586424827576, - "169": 0.8540317416191101, - "170": 0.7815343141555786, - "171": 0.7702912092208862, - "172": 0.7692718505859375, - "173": 0.769106924533844, - "174": 0.7554870247840881, - "175": 0.7641161680221558, - "176": 0.7776530981063843, - "177": 0.7733656167984009, - "178": 0.7354782819747925, - "179": 0.7410027384757996, - "180": 0.7778562307357788, - "181": 0.7700233459472656, - "182": 0.7346639633178711, - "183": 0.7208408117294312, - "184": 0.6835162043571472, - "185": 0.6808939576148987, - "186": 0.6911190748214722, - "187": 0.7192779183387756, - "188": 0.731620728969574, - "189": 0.7152380347251892, - "190": 0.7036978006362915, - "191": 0.6851203441619873, - "192": 0.64600670337677, - "193": 0.6500484943389893, - "194": 0.6319776177406311, - "195": 0.6150934100151062, - "196": 0.5809333920478821, - "197": 0.5438845753669739, - "198": 0.43556106090545654, - "199": 0.4299978017807007, - "200": 0.47716495394706726, - "201": 0.4903109073638916, - "202": 0.48185867071151733, - "203": 0.5080997347831726, - "204": 0.5930094122886658, - "205": 0.6333677172660828, - "206": 0.700766384601593, - "207": 0.706038236618042, - "208": 0.6878578066825867, - "209": 0.6737412810325623, - "210": 0.6068543195724487, - "211": 0.5982890129089355, - "212": 0.617000937461853, - "213": 0.6061030626296997, - "214": 0.5725844502449036, - "215": 0.6087993383407593 - }, - "loss": { - "162": 3.330514430999756, - "163": 3.390855312347412, - "164": 3.3226938247680664, - "165": 3.357847213745117, - "166": 3.3357012271881104, - "167": 3.3582639694213867, - "168": 3.369551181793213, - "169": 3.3661460876464844, - "170": 3.3172967433929443, - "171": 3.3735604286193848, - "172": 3.3187832832336426, - "173": 3.358832836151123, - "174": 3.3696632385253906, - "175": 3.3532443046569824, - "176": 3.3249213695526123, - "177": 3.338181257247925, - "178": 3.315990686416626, - "179": 3.335751533508301, - "180": 3.3583521842956543, - "181": 3.349484443664551, - "182": 3.311713457107544, - "183": 3.3350706100463867, - "184": 3.3260340690612793, - "185": 3.3500959873199463, - "186": 3.3126401901245117, - "187": 3.346989393234253, - "188": 3.3468844890594482, - "189": 3.341794490814209, - "190": 3.3432457447052, - "191": 3.358865737915039, - "192": 3.308229684829712, - "193": 3.341928243637085, - "194": 3.314577579498291, - "195": 3.3355393409729004, - "196": 3.3360981941223145, - "197": 3.359072208404541, - "198": 3.2878284454345703, - "199": 3.304107189178467, - "200": 3.3157496452331543, - "201": 3.3576459884643555, - "202": 3.2921295166015625, - "203": 3.3005096912384033, - "204": 3.3229928016662598, - "205": 3.3249733448028564, - "206": 3.323307752609253, - "207": 3.322645664215088, - "208": 3.3047432899475098, - "209": 3.3143444061279297, - "210": 3.282731533050537, - "211": 3.304281711578369, - "212": 3.322359561920166, - "213": 3.3297743797302246, - "214": 3.271000385284424, - "215": 3.283421277999878 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "train_epoch_time": 4.788079738616943, - "train_loss": 3.2949323500246024, - "train_score": 0.15291315464241434, - "val_loss": 3.311749011859828, - "val_score": 0.15153200334166825 - }, - { - "epoch": 4, - "grad_norm": 0.9821807146072388, - "learning_rate": 0.215, - "model_norm": 90.92161560058594, - "step_logs": { - "grad_norm": { - "216": 0.7106828093528748, - "217": 0.7289441227912903, - "218": 0.7488495707511902, - "219": 0.7892098426818848, - "220": 0.8758535385131836, - "221": 0.8916842341423035, - "222": 0.7863014936447144, - "223": 0.7079423069953918, - "224": 0.644995927810669, - "225": 0.6450706124305725, - "226": 0.6772181391716003, - "227": 0.7630394101142883, - "228": 0.8540375828742981, - "229": 0.8938946723937988, - "230": 0.8704829216003418, - "231": 0.7382178902626038, - "232": 0.5688614845275879, - "233": 0.5606990456581116, - "234": 0.6594306230545044, - "235": 0.8704900741577148, - "236": 1.026406168937683, - "237": 1.0585143566131592, - "238": 1.070932149887085, - "239": 1.240985631942749, - "240": 1.182536005973816, - "241": 1.1327226161956787, - "242": 1.0874539613723755, - "243": 1.2034251689910889, - "244": 1.2215259075164795, - "245": 1.2296887636184692, - "246": 1.3253308534622192, - "247": 1.3344638347625732, - "248": 1.2188246250152588, - "249": 1.0702893733978271, - "250": 1.048462152481079, - "251": 1.0182101726531982, - "252": 1.0540645122528076, - "253": 1.1424111127853394, - "254": 1.3030123710632324, - "255": 1.4511394500732422, - "256": 1.334547758102417, - "257": 1.1782286167144775, - "258": 1.0350970029830933, - "259": 1.0300384759902954, - "260": 1.1178781986236572, - "261": 1.012025237083435, - "262": 0.8595377206802368, - "263": 0.873961329460144, - "264": 0.8866082429885864, - "265": 0.9155511260032654, - "266": 0.9512255191802979, - "267": 0.9299663305282593, - "268": 0.9550856351852417, - "269": 0.9821807146072388 - }, - "loss": { - "216": 3.3120460510253906, - "217": 3.2978439331054688, - "218": 3.3191962242126465, - "219": 3.2930498123168945, - "220": 3.3162074089050293, - "221": 3.280308723449707, - "222": 3.2659242153167725, - "223": 3.270326614379883, - "224": 3.2244462966918945, - "225": 3.248737335205078, - "226": 3.21547532081604, - "227": 3.19978666305542, - "228": 3.222573757171631, - "229": 3.186828851699829, - "230": 3.167494773864746, - "231": 3.1894612312316895, - "232": 3.1455721855163574, - "233": 3.1205992698669434, - "234": 3.141139507293701, - "235": 3.115446090698242, - "236": 3.124189853668213, - "237": 3.144502639770508, - "238": 3.1282873153686523, - "239": 3.1211657524108887, - "240": 3.110682487487793, - "241": 3.0861527919769287, - "242": 3.0409159660339355, - "243": 3.0720391273498535, - "244": 3.0308175086975098, - "245": 3.0496511459350586, - "246": 3.045163631439209, - "247": 3.0699145793914795, - "248": 3.0300710201263428, - "249": 3.0218124389648438, - "250": 2.9954240322113037, - "251": 2.9963502883911133, - "252": 2.971977710723877, - "253": 2.99910306930542, - "254": 2.996168375015259, - "255": 3.0508017539978027, - "256": 3.0176072120666504, - "257": 3.0055220127105713, - "258": 2.955789566040039, - "259": 2.9714255332946777, - "260": 2.964890956878662, - "261": 2.98284912109375, - "262": 2.948190689086914, - "263": 2.9432339668273926, - "264": 2.919694423675537, - "265": 2.944222927093506, - "266": 2.935487747192383, - "267": 2.9397125244140625, - "268": 2.9095535278320312, - "269": 2.956695079803467 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "train_epoch_time": 4.788573503494263, - "train_loss": 2.929616554293092, - "train_score": 0.2061939561854476, - "val_loss": 2.9433626835710283, - "val_score": 0.20191859199861162 - }, - { - "epoch": 5, - "grad_norm": 0.7710897922515869, - "learning_rate": 0.215, - "model_norm": 90.93704223632812, - "step_logs": { - "grad_norm": { - "270": 0.9826538562774658, - "271": 0.9667792320251465, - "272": 0.9048634171485901, - "273": 0.9344331622123718, - "274": 0.9651849269866943, - "275": 1.1015145778656006, - "276": 1.1377453804016113, - "277": 1.186976671218872, - "278": 0.9845960736274719, - "279": 0.8418571352958679, - "280": 0.7251552939414978, - "281": 0.6260077357292175, - "282": 0.6355652809143066, - "283": 0.6870912909507751, - "284": 0.7565816640853882, - "285": 0.8322285413742065, - "286": 0.9116373062133789, - "287": 0.9543244242668152, - "288": 0.9194244742393494, - "289": 0.8983859419822693, - "290": 0.942709743976593, - "291": 0.9613945484161377, - "292": 0.956580400466919, - "293": 0.8938926458358765, - "294": 0.8112130761146545, - "295": 0.7788195610046387, - "296": 0.7702645063400269, - "297": 0.7575488686561584, - "298": 0.7975647449493408, - "299": 0.7941131591796875, - "300": 1.005330204963684, - "301": 1.058396816253662, - "302": 1.203217625617981, - "303": 1.216032862663269, - "304": 1.217220425605774, - "305": 1.472736120223999, - "306": 1.3118972778320312, - "307": 1.011157512664795, - "308": 0.8079589605331421, - "309": 0.6990920901298523, - "310": 0.7661309242248535, - "311": 0.8004953265190125, - "312": 1.0540677309036255, - "313": 1.0774898529052734, - "314": 0.9952903985977173, - "315": 1.055246114730835, - "316": 1.1704283952713013, - "317": 1.2923685312271118, - "318": 1.196557879447937, - "319": 1.065063238143921, - "320": 0.9533786177635193, - "321": 0.8603740930557251, - "322": 0.7936952114105225, - "323": 0.7710897922515869 - }, - "loss": { - "270": 2.921260356903076, - "271": 2.9505672454833984, - "272": 2.942061185836792, - "273": 2.927546977996826, - "274": 2.9226222038269043, - "275": 2.938413143157959, - "276": 2.965902328491211, - "277": 2.9459402561187744, - "278": 2.964310646057129, - "279": 2.9288299083709717, - "280": 2.925896644592285, - "281": 2.8850784301757812, - "282": 2.899108409881592, - "283": 2.900360107421875, - "284": 2.909640312194824, - "285": 2.8972275257110596, - "286": 2.9204258918762207, - "287": 2.9014711380004883, - "288": 2.9194884300231934, - "289": 2.910529613494873, - "290": 2.9192309379577637, - "291": 2.9204721450805664, - "292": 2.9077541828155518, - "293": 2.9187631607055664, - "294": 2.888162612915039, - "295": 2.9116618633270264, - "296": 2.892401933670044, - "297": 2.889254093170166, - "298": 2.897447347640991, - "299": 2.885918617248535, - "300": 2.89274263381958, - "301": 2.937957525253296, - "302": 2.9188385009765625, - "303": 2.963953971862793, - "304": 2.934126853942871, - "305": 2.9722161293029785, - "306": 2.9473910331726074, - "307": 2.921459436416626, - "308": 2.894925832748413, - "309": 2.881929397583008, - "310": 2.8746695518493652, - "311": 2.894906759262085, - "312": 2.867159366607666, - "313": 2.92246413230896, - "314": 2.875049114227295, - "315": 2.908360004425049, - "316": 2.9193291664123535, - "317": 2.9422414302825928, - "318": 2.925055503845215, - "319": 2.9113144874572754, - "320": 2.8835608959198, - "321": 2.886540651321411, - "322": 2.868971109390259, - "323": 2.871776580810547 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "train_epoch_time": 4.788305759429932, - "train_loss": 2.8671649144745968, - "train_score": 0.21053174318507892, - "val_loss": 2.889315739564863, - "val_score": 0.204286559834546 - }, - { - "epoch": 6, - "grad_norm": 1.035821557044983, - "learning_rate": 0.215, - "model_norm": 90.9523696899414, - "step_logs": { - "grad_norm": { - "324": 0.7981342077255249, - "325": 0.8987910747528076, - "326": 1.0302822589874268, - "327": 1.0341377258300781, - "328": 0.9448365569114685, - "329": 0.9385917782783508, - "330": 0.9752305150032043, - "331": 1.0387269258499146, - "332": 1.0405452251434326, - "333": 0.977863609790802, - "334": 0.8374691009521484, - "335": 0.7338889241218567, - "336": 0.7356550693511963, - "337": 0.7309194207191467, - "338": 0.7133876085281372, - "339": 0.7473136186599731, - "340": 0.8495583534240723, - "341": 0.9066926836967468, - "342": 0.8879075050354004, - "343": 0.8375911116600037, - "344": 0.8218190670013428, - "345": 0.8624690175056458, - "346": 0.8988082408905029, - "347": 0.9910452365875244, - "348": 0.9790074229240417, - "349": 0.8766512274742126, - "350": 0.8103423714637756, - "351": 0.7999845147132874, - "352": 0.8938480615615845, - "353": 0.9749035835266113, - "354": 1.0497792959213257, - "355": 1.1329431533813477, - "356": 1.0805407762527466, - "357": 1.132259488105774, - "358": 1.0434494018554688, - "359": 0.9649423956871033, - "360": 1.0336040258407593, - "361": 1.1711769104003906, - "362": 1.0905392169952393, - "363": 0.8944039940834045, - "364": 0.9082133173942566, - "365": 0.9736754298210144, - "366": 0.9713060855865479, - "367": 0.9027257561683655, - "368": 0.8863203525543213, - "369": 0.9012054800987244, - "370": 0.8935128450393677, - "371": 0.8855329751968384, - "372": 0.8607180714607239, - "373": 0.9539946913719177, - "374": 1.0477733612060547, - "375": 1.0993893146514893, - "376": 1.1203137636184692, - "377": 1.035821557044983 - }, - "loss": { - "324": 2.857433319091797, - "325": 2.8956727981567383, - "326": 2.8817408084869385, - "327": 2.9009511470794678, - "328": 2.8811378479003906, - "329": 2.864175319671631, - "330": 2.8641550540924072, - "331": 2.880978584289551, - "332": 2.891833543777466, - "333": 2.8694396018981934, - "334": 2.8575727939605713, - "335": 2.8614745140075684, - "336": 2.860243558883667, - "337": 2.850189685821533, - "338": 2.844123363494873, - "339": 2.8437130451202393, - "340": 2.8458852767944336, - "341": 2.877870559692383, - "342": 2.8635194301605225, - "343": 2.841906785964966, - "344": 2.8393115997314453, - "345": 2.8548948764801025, - "346": 2.852095127105713, - "347": 2.8916561603546143, - "348": 2.874770164489746, - "349": 2.8520350456237793, - "350": 2.843285083770752, - "351": 2.847202777862549, - "352": 2.8495430946350098, - "353": 2.861645460128784, - "354": 2.860210418701172, - "355": 2.8598785400390625, - "356": 2.8754961490631104, - "357": 2.8878116607666016, - "358": 2.9009838104248047, - "359": 2.836493968963623, - "360": 2.843789577484131, - "361": 2.8523473739624023, - "362": 2.9021668434143066, - "363": 2.836028814315796, - "364": 2.8244357109069824, - "365": 2.844738483428955, - "366": 2.8751652240753174, - "367": 2.8390631675720215, - "368": 2.847855567932129, - "369": 2.8226897716522217, - "370": 2.8287134170532227, - "371": 2.8229715824127197, - "372": 2.8411402702331543, - "373": 2.8220102787017822, - "374": 2.872851848602295, - "375": 2.8293468952178955, - "376": 2.8655569553375244, - "377": 2.8372063636779785 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "train_epoch_time": 4.78901481628418, - "train_loss": 2.8365759218098274, - "train_score": 0.2067824157443929, - "val_loss": 2.86207695779242, - "val_score": 0.2005552170642071 - }, - { - "epoch": 7, - "grad_norm": 0.7384330034255981, - "learning_rate": 0.215, - "model_norm": 90.97216033935547, - "step_logs": { - "grad_norm": { - "378": 0.9507484436035156, - "379": 0.810348391532898, - "380": 0.716852605342865, - "381": 0.6468346118927002, - "382": 0.5992889404296875, - "383": 0.58887779712677, - "384": 0.6116159558296204, - "385": 0.7137957215309143, - "386": 0.7953575253486633, - "387": 0.972137987613678, - "388": 1.0486242771148682, - "389": 1.1355589628219604, - "390": 1.0920562744140625, - "391": 1.0073076486587524, - "392": 1.081760287284851, - "393": 1.103289246559143, - "394": 1.0707428455352783, - "395": 0.975640058517456, - "396": 0.9401341080665588, - "397": 0.891375720500946, - "398": 0.7809472680091858, - "399": 0.735560953617096, - "400": 0.6912685036659241, - "401": 0.7386559247970581, - "402": 0.7470145225524902, - "403": 0.8052874207496643, - "404": 0.8702596426010132, - "405": 1.0296536684036255, - "406": 1.056732416152954, - "407": 1.0242395401000977, - "408": 0.9787147045135498, - "409": 0.8223461508750916, - "410": 0.7620035409927368, - "411": 0.7719262838363647, - "412": 0.8005836009979248, - "413": 0.8055107593536377, - "414": 0.7846072912216187, - "415": 0.7065761685371399, - "416": 0.7001236081123352, - "417": 0.7254408597946167, - "418": 0.7898728847503662, - "419": 0.9493966102600098, - "420": 0.9976383447647095, - "421": 1.0153205394744873, - "422": 1.0433634519577026, - "423": 1.0602290630340576, - "424": 1.0212446451187134, - "425": 0.9672016501426697, - "426": 1.1142154932022095, - "427": 1.3501923084259033, - "428": 1.3754948377609253, - "429": 1.0360045433044434, - "430": 0.8087738752365112, - "431": 0.7384330034255981 - }, - "loss": { - "378": 2.824772357940674, - "379": 2.805464744567871, - "380": 2.8015615940093994, - "381": 2.783661127090454, - "382": 2.800049066543579, - "383": 2.7849767208099365, - "384": 2.780452251434326, - "385": 2.786926507949829, - "386": 2.793444871902466, - "387": 2.7944281101226807, - "388": 2.842393398284912, - "389": 2.8049488067626953, - "390": 2.8402552604675293, - "391": 2.796936273574829, - "392": 2.8165361881256104, - "393": 2.8305273056030273, - "394": 2.831249713897705, - "395": 2.7945737838745117, - "396": 2.802100658416748, - "397": 2.797694683074951, - "398": 2.794318199157715, - "399": 2.7647833824157715, - "400": 2.7864327430725098, - "401": 2.766012668609619, - "402": 2.7869274616241455, - "403": 2.777254104614258, - "404": 2.7917895317077637, - "405": 2.770083427429199, - "406": 2.818441867828369, - "407": 2.788850784301758, - "408": 2.7916901111602783, - "409": 2.780367851257324, - "410": 2.7838335037231445, - "411": 2.7763659954071045, - "412": 2.772993564605713, - "413": 2.7483928203582764, - "414": 2.7762808799743652, - "415": 2.7538294792175293, - "416": 2.738293409347534, - "417": 2.7267959117889404, - "418": 2.7461116313934326, - "419": 2.752950668334961, - "420": 2.770277500152588, - "421": 2.736666440963745, - "422": 2.769049882888794, - "423": 2.767879009246826, - "424": 2.765526533126831, - "425": 2.7341904640197754, - "426": 2.7779548168182373, - "427": 2.778820514678955, - "428": 2.8092403411865234, - "429": 2.765235424041748, - "430": 2.736257553100586, - "431": 2.7105939388275146 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "train_epoch_time": 4.788388729095459, - "train_loss": 2.7166390637927282, - "train_score": 0.24429698710889694, - "val_loss": 2.7314381952537596, - "val_score": 0.2374381098113569 - }, - { - "epoch": 8, - "grad_norm": 1.186734914779663, - "learning_rate": 0.215, - "model_norm": 90.99371337890625, - "step_logs": { - "grad_norm": { - "432": 0.682357907295227, - "433": 0.7879934310913086, - "434": 0.839699923992157, - "435": 0.9034207463264465, - "436": 0.9427834749221802, - "437": 1.0318924188613892, - "438": 1.0097386837005615, - "439": 0.8537572026252747, - "440": 0.7883947491645813, - "441": 0.7391544580459595, - "442": 0.7961458563804626, - "443": 0.8726483583450317, - "444": 0.8910840749740601, - "445": 0.9529770612716675, - "446": 1.0102248191833496, - "447": 1.102903962135315, - "448": 1.0989457368850708, - "449": 1.0225383043289185, - "450": 0.9723678231239319, - "451": 0.8849908113479614, - "452": 0.8795887231826782, - "453": 0.9208425879478455, - "454": 0.923948347568512, - "455": 0.9137952327728271, - "456": 0.8775184750556946, - "457": 0.8621120452880859, - "458": 0.9566969871520996, - "459": 1.244726538658142, - "460": 1.4193376302719116, - "461": 1.4288270473480225, - "462": 1.2880305051803589, - "463": 1.0228943824768066, - "464": 0.90586918592453, - "465": 0.8441426753997803, - "466": 0.7748436331748962, - "467": 0.7632634043693542, - "468": 0.9062524437904358, - "469": 0.9382572174072266, - "470": 1.0792484283447266, - "471": 1.1229135990142822, - "472": 1.1008241176605225, - "473": 1.0941704511642456, - "474": 1.0628392696380615, - "475": 1.113686203956604, - "476": 1.03805410861969, - "477": 0.9889453053474426, - "478": 0.99018394947052, - "479": 0.9048968553543091, - "480": 0.762435257434845, - "481": 0.7004373669624329, - "482": 0.6968955993652344, - "483": 0.8532578349113464, - "484": 1.0391627550125122, - "485": 1.186734914779663 - }, - "loss": { - "432": 2.711761713027954, - "433": 2.701874256134033, - "434": 2.7173707485198975, - "435": 2.717919111251831, - "436": 2.7440590858459473, - "437": 2.716951847076416, - "438": 2.7477917671203613, - "439": 2.711336612701416, - "440": 2.734084129333496, - "441": 2.686770439147949, - "442": 2.7162363529205322, - "443": 2.704514503479004, - "444": 2.718574047088623, - "445": 2.7034876346588135, - "446": 2.7333061695098877, - "447": 2.7030677795410156, - "448": 2.744272232055664, - "449": 2.7094311714172363, - "450": 2.731375217437744, - "451": 2.689774990081787, - "452": 2.6910858154296875, - "453": 2.6939282417297363, - "454": 2.726001262664795, - "455": 2.691908121109009, - "456": 2.6913273334503174, - "457": 2.6898093223571777, - "458": 2.703033924102783, - "459": 2.7084052562713623, - "460": 2.780323028564453, - "461": 2.760359287261963, - "462": 2.7674484252929688, - "463": 2.703690528869629, - "464": 2.688377618789673, - "465": 2.705472469329834, - "466": 2.6742730140686035, - "467": 2.677372932434082, - "468": 2.6668734550476074, - "469": 2.688608407974243, - "470": 2.682274103164673, - "471": 2.7363436222076416, - "472": 2.7071564197540283, - "473": 2.720700979232788, - "474": 2.679595470428467, - "475": 2.7048511505126953, - "476": 2.6746039390563965, - "477": 2.6931827068328857, - "478": 2.669553518295288, - "479": 2.6869726181030273, - "480": 2.6779532432556152, - "481": 2.6584787368774414, - "482": 2.6361300945281982, - "483": 2.6422805786132812, - "484": 2.6790413856506348, - "485": 2.7129361629486084 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "train_epoch_time": 4.790045499801636, - "train_loss": 2.6991676622689025, - "train_score": 0.2482413468564429, - "val_loss": 2.714708259814642, - "val_score": 0.2427346445320668 - }, - { - "epoch": 9, - "grad_norm": 1.203169822692871, - "learning_rate": 0.215, - "model_norm": 91.01960754394531, - "step_logs": { - "grad_norm": { - "486": 1.0850011110305786, - "487": 0.9896923899650574, - "488": 0.9088544845581055, - "489": 0.8589906692504883, - "490": 0.7930198907852173, - "491": 0.6774317622184753, - "492": 0.6814072728157043, - "493": 0.7896897792816162, - "494": 0.9204282760620117, - "495": 1.031484842300415, - "496": 1.2368268966674805, - "497": 1.2173197269439697, - "498": 1.0859532356262207, - "499": 1.083764910697937, - "500": 1.0810452699661255, - "501": 1.0741888284683228, - "502": 0.927306592464447, - "503": 0.8310501575469971, - "504": 0.7653688192367554, - "505": 0.7235044836997986, - "506": 0.7619850039482117, - "507": 0.8009397983551025, - "508": 0.8710378408432007, - "509": 0.8227996826171875, - "510": 0.8100309371948242, - "511": 0.8168901801109314, - "512": 0.7745519876480103, - "513": 0.7738197445869446, - "514": 0.6978654265403748, - "515": 0.7017480731010437, - "516": 0.7891150712966919, - "517": 0.8656883835792542, - "518": 0.9118044376373291, - "519": 1.0882283449172974, - "520": 1.1870903968811035, - "521": 1.3700114488601685, - "522": 1.319442629814148, - "523": 1.0366289615631104, - "524": 0.9920868277549744, - "525": 0.9393367171287537, - "526": 0.9754626154899597, - "527": 0.9165051579475403, - "528": 0.8340020179748535, - "529": 0.8424265384674072, - "530": 0.8531990051269531, - "531": 0.7943776249885559, - "532": 0.7697798609733582, - "533": 0.7897421717643738, - "534": 0.815986156463623, - "535": 0.8025452494621277, - "536": 0.7521207928657532, - "537": 0.7967057228088379, - "538": 0.901458740234375, - "539": 1.203169822692871 - }, - "loss": { - "486": 2.709219455718994, - "487": 2.6682960987091064, - "488": 2.6564416885375977, - "489": 2.671679973602295, - "490": 2.6597859859466553, - "491": 2.6467785835266113, - "492": 2.6194868087768555, - "493": 2.6616547107696533, - "494": 2.656613349914551, - "495": 2.6583967208862305, - "496": 2.6574866771698, - "497": 2.7099177837371826, - "498": 2.663578510284424, - "499": 2.6736714839935303, - "500": 2.646918296813965, - "501": 2.6775763034820557, - "502": 2.6462886333465576, - "503": 2.6455416679382324, - "504": 2.642223834991455, - "505": 2.601130485534668, - "506": 2.603966236114502, - "507": 2.6414854526519775, - "508": 2.6106271743774414, - "509": 2.635369300842285, - "510": 2.6214137077331543, - "511": 2.624495029449463, - "512": 2.6106646060943604, - "513": 2.629624366760254, - "514": 2.63250732421875, - "515": 2.5941827297210693, - "516": 2.599414348602295, - "517": 2.603377342224121, - "518": 2.6217551231384277, - "519": 2.630720615386963, - "520": 2.655055522918701, - "521": 2.645128011703491, - "522": 2.7116947174072266, - "523": 2.637521743774414, - "524": 2.634775400161743, - "525": 2.626157283782959, - "526": 2.625753164291382, - "527": 2.6087613105773926, - "528": 2.592386484146118, - "529": 2.5810465812683105, - "530": 2.6116766929626465, - "531": 2.584160327911377, - "532": 2.5971899032592773, - "533": 2.58386492729187, - "534": 2.578446388244629, - "535": 2.567394733428955, - "536": 2.559359312057495, - "537": 2.540008306503296, - "538": 2.5873641967773438, - "539": 2.5817277431488037 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "train_epoch_time": 4.78992486000061, - "train_loss": 2.6492610668007237, - "train_score": 0.22535083389265123, - "val_loss": 2.668723805489962, - "val_score": 0.220593247584032 - }, - { - "epoch": 10, - "grad_norm": 0.7871867418289185, - "learning_rate": 0.215, - "model_norm": 91.04473876953125, - "step_logs": { - "grad_norm": { - "540": 1.2602779865264893, - "541": 1.2112749814987183, - "542": 1.174929141998291, - "543": 0.983267605304718, - "544": 1.0428472757339478, - "545": 0.9895178079605103, - "546": 0.9512880444526672, - "547": 0.9754201769828796, - "548": 0.9579707384109497, - "549": 0.9788139462471008, - "550": 0.9489416480064392, - "551": 0.8465482592582703, - "552": 0.7796411514282227, - "553": 0.8723862171173096, - "554": 1.021549105644226, - "555": 1.3432765007019043, - "556": 1.3512208461761475, - "557": 1.0133731365203857, - "558": 0.9505003690719604, - "559": 1.0170173645019531, - "560": 1.0611085891723633, - "561": 0.9612119197845459, - "562": 0.8281588554382324, - "563": 0.7536771893501282, - "564": 0.672906219959259, - "565": 0.6428030729293823, - "566": 0.7232397794723511, - "567": 0.9086180329322815, - "568": 0.9008992910385132, - "569": 0.832752525806427, - "570": 0.866485595703125, - "571": 0.9066757559776306, - "572": 0.9216229319572449, - "573": 0.955962598323822, - "574": 0.9388243556022644, - "575": 0.8178623914718628, - "576": 0.801074743270874, - "577": 0.8566450476646423, - "578": 1.083534598350525, - "579": 1.1811403036117554, - "580": 1.2144383192062378, - "581": 1.2031227350234985, - "582": 1.0034691095352173, - "583": 0.9011889100074768, - "584": 0.8310750722885132, - "585": 0.7468541264533997, - "586": 0.7037428021430969, - "587": 0.646641194820404, - "588": 0.5984599590301514, - "589": 0.6355694532394409, - "590": 0.6957169771194458, - "591": 0.7257423400878906, - "592": 0.7625904083251953, - "593": 0.7871867418289185 - }, - "loss": { - "540": 2.6534512042999268, - "541": 2.593578815460205, - "542": 2.648163318634033, - "543": 2.620131731033325, - "544": 2.587038278579712, - "545": 2.581254720687866, - "546": 2.5697569847106934, - "547": 2.5983409881591797, - "548": 2.571627616882324, - "549": 2.575474739074707, - "550": 2.579862117767334, - "551": 2.562931776046753, - "552": 2.5702977180480957, - "553": 2.5301671028137207, - "554": 2.5761911869049072, - "555": 2.6057610511779785, - "556": 2.6591172218322754, - "557": 2.569087028503418, - "558": 2.5528788566589355, - "559": 2.587589740753174, - "560": 2.551438093185425, - "561": 2.59968638420105, - "562": 2.567417621612549, - "563": 2.5396628379821777, - "564": 2.5480270385742188, - "565": 2.5135445594787598, - "566": 2.529240608215332, - "567": 2.5173680782318115, - "568": 2.565337657928467, - "569": 2.5562033653259277, - "570": 2.5528626441955566, - "571": 2.5378787517547607, - "572": 2.5373783111572266, - "573": 2.5506067276000977, - "574": 2.535454273223877, - "575": 2.527514934539795, - "576": 2.522322416305542, - "577": 2.531142234802246, - "578": 2.5362415313720703, - "579": 2.5890583992004395, - "580": 2.5655345916748047, - "581": 2.574313163757324, - "582": 2.550959587097168, - "583": 2.52030611038208, - "584": 2.530621290206909, - "585": 2.499192714691162, - "586": 2.503462791442871, - "587": 2.510725259780884, - "588": 2.48935866355896, - "589": 2.4809839725494385, - "590": 2.508025646209717, - "591": 2.504624843597412, - "592": 2.4841713905334473, - "593": 2.5031657218933105 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "train_epoch_time": 4.7895379066467285, - "train_loss": 2.5049408083495655, - "train_score": 0.29937791432677585, - "val_loss": 2.524848353027887, - "val_score": 0.2920314301100445 - }, - { - "epoch": 11, - "grad_norm": 1.018774390220642, - "learning_rate": 0.215, - "model_norm": 91.06373596191406, - "step_logs": { - "grad_norm": { - "594": 0.7413973212242126, - "595": 0.6726373434066772, - "596": 0.6823249459266663, - "597": 0.8069524168968201, - "598": 1.0592670440673828, - "599": 1.1828581094741821, - "600": 1.2749831676483154, - "601": 1.3434191942214966, - "602": 1.2307745218276978, - "603": 0.9597514867782593, - "604": 0.6865532994270325, - "605": 0.5563322305679321, - "606": 0.48271387815475464, - "607": 0.4907934069633484, - "608": 0.5307155251502991, - "609": 0.5439010858535767, - "610": 0.5960483551025391, - "611": 0.7206151485443115, - "612": 0.8573791980743408, - "613": 0.9597889184951782, - "614": 1.0009410381317139, - "615": 1.0134549140930176, - "616": 1.0042377710342407, - "617": 0.9839712381362915, - "618": 0.9389065504074097, - "619": 0.8162915706634521, - "620": 0.7601591944694519, - "621": 0.6697289347648621, - "622": 0.6222559213638306, - "623": 0.5901453495025635, - "624": 0.5396410226821899, - "625": 0.567928671836853, - "626": 0.6483227610588074, - "627": 0.7782958149909973, - "628": 0.9330300092697144, - "629": 1.0496703386306763, - "630": 1.0337681770324707, - "631": 1.0632206201553345, - "632": 1.0099393129348755, - "633": 0.9314202070236206, - "634": 0.9370139241218567, - "635": 0.8868887424468994, - "636": 0.9204682111740112, - "637": 0.958300769329071, - "638": 0.9064491987228394, - "639": 0.790294349193573, - "640": 0.7852020263671875, - "641": 0.7949424386024475, - "642": 0.7599385976791382, - "643": 0.8014470338821411, - "644": 0.8337162733078003, - "645": 0.886461615562439, - "646": 0.9523445963859558, - "647": 1.018774390220642 - }, - "loss": { - "594": 2.5035481452941895, - "595": 2.5226058959960938, - "596": 2.4986114501953125, - "597": 2.476590156555176, - "598": 2.530712127685547, - "599": 2.539583683013916, - "600": 2.5293524265289307, - "601": 2.5718021392822266, - "602": 2.563169002532959, - "603": 2.5441746711730957, - "604": 2.479951858520508, - "605": 2.4913501739501953, - "606": 2.483147382736206, - "607": 2.4557957649230957, - "608": 2.462026596069336, - "609": 2.488463878631592, - "610": 2.495217800140381, - "611": 2.477020740509033, - "612": 2.5005712509155273, - "613": 2.4987125396728516, - "614": 2.522017478942871, - "615": 2.518216609954834, - "616": 2.5110747814178467, - "617": 2.5315303802490234, - "618": 2.5348381996154785, - "619": 2.504847526550293, - "620": 2.493001937866211, - "621": 2.4996390342712402, - "622": 2.4671339988708496, - "623": 2.4714293479919434, - "624": 2.472681999206543, - "625": 2.44960880279541, - "626": 2.457479953765869, - "627": 2.498391628265381, - "628": 2.495114326477051, - "629": 2.4832682609558105, - "630": 2.492354393005371, - "631": 2.5123190879821777, - "632": 2.5073142051696777, - "633": 2.4813597202301025, - "634": 2.4902448654174805, - "635": 2.45639705657959, - "636": 2.5060043334960938, - "637": 2.5054495334625244, - "638": 2.5122714042663574, - "639": 2.452633857727051, - "640": 2.446535587310791, - "641": 2.4790456295013428, - "642": 2.4660730361938477, - "643": 2.4774904251098633, - "644": 2.4870049953460693, - "645": 2.4797635078430176, - "646": 2.5009849071502686, - "647": 2.48178768157959 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "train_epoch_time": 4.790262937545776, - "train_loss": 2.498673621551207, - "train_score": 0.2863712786944046, - "val_loss": 2.522901584305528, - "val_score": 0.27530317147071 - }, - { - "epoch": 12, - "grad_norm": 0.33665525913238525, - "learning_rate": 0.215, - "model_norm": 91.08087158203125, - "step_logs": { - "grad_norm": { - "648": 0.9625231623649597, - "649": 0.8293014168739319, - "650": 0.7890959978103638, - "651": 0.7312183976173401, - "652": 0.7107807397842407, - "653": 0.734476625919342, - "654": 0.7616944909095764, - "655": 0.8186032772064209, - "656": 0.8154640197753906, - "657": 0.6950883865356445, - "658": 0.5980730056762695, - "659": 0.5517351031303406, - "660": 0.49539831280708313, - "661": 0.46470922231674194, - "662": 0.5018994808197021, - "663": 0.4947921633720398, - "664": 0.4292982220649719, - "665": 0.40979939699172974, - "666": 0.43825864791870117, - "667": 0.3937656879425049, - "668": 0.37566396594047546, - "669": 0.4169904589653015, - "670": 0.561857283115387, - "671": 0.536394476890564, - "672": 0.37791213393211365, - "673": 0.35284048318862915, - "674": 0.33954349160194397, - "675": 0.3375079035758972, - "676": 0.35773971676826477, - "677": 0.36039799451828003, - "678": 0.3521564304828644, - "679": 0.3392495810985565, - "680": 0.31043189764022827, - "681": 0.3435986340045929, - "682": 0.37561917304992676, - "683": 0.44684740900993347, - "684": 0.41413599252700806, - "685": 0.3778912425041199, - "686": 0.34594035148620605, - "687": 0.3463926613330841, - "688": 0.31765952706336975, - "689": 0.2990760803222656, - "690": 0.32804200053215027, - "691": 0.3435922861099243, - "692": 0.2888297140598297, - "693": 0.28180789947509766, - "694": 0.24968475103378296, - "695": 0.23767679929733276, - "696": 0.29190051555633545, - "697": 0.3467346429824829, - "698": 0.43864163756370544, - "699": 0.45127299427986145, - "700": 0.36476215720176697, - "701": 0.33665525913238525 - }, - "loss": { - "648": 2.490993022918701, - "649": 2.4783997535705566, - "650": 2.4501852989196777, - "651": 2.4527153968811035, - "652": 2.4886221885681152, - "653": 2.463770866394043, - "654": 2.4613871574401855, - "655": 2.4705238342285156, - "656": 2.463290214538574, - "657": 2.446429491043091, - "658": 2.4535303115844727, - "659": 2.4461350440979004, - "660": 2.4382517337799072, - "661": 2.4183411598205566, - "662": 2.437877893447876, - "663": 2.4607834815979004, - "664": 2.441830635070801, - "665": 2.453338384628296, - "666": 2.4214272499084473, - "667": 2.422410488128662, - "668": 2.4281363487243652, - "669": 2.4038569927215576, - "670": 2.429398536682129, - "671": 2.4363975524902344, - "672": 2.4295310974121094, - "673": 2.4072105884552, - "674": 2.4294848442077637, - "675": 2.414706230163574, - "676": 2.413327217102051, - "677": 2.4182066917419434, - "678": 2.4071362018585205, - "679": 2.4100942611694336, - "680": 2.4204585552215576, - "681": 2.4130163192749023, - "682": 2.4163315296173096, - "683": 2.4212396144866943, - "684": 2.4241511821746826, - "685": 2.422750949859619, - "686": 2.4250311851501465, - "687": 2.4284510612487793, - "688": 2.413712501525879, - "689": 2.398535966873169, - "690": 2.420346736907959, - "691": 2.3918254375457764, - "692": 2.394608497619629, - "693": 2.3774867057800293, - "694": 2.4036495685577393, - "695": 2.425084114074707, - "696": 2.3976783752441406, - "697": 2.418344736099243, - "698": 2.3837039470672607, - "699": 2.4117681980133057, - "700": 2.4184937477111816, - "701": 2.3901455402374268 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "train_epoch_time": 4.788414716720581, - "train_loss": 2.4019142271969547, - "train_score": 0.32063531212006274, - "val_loss": 2.4292206153803932, - "val_score": 0.312114308984901 - }, - { - "epoch": 13, - "grad_norm": 0.25203627347946167, - "learning_rate": 0.14333333333333334, - "model_norm": 91.0897216796875, - "step_logs": { - "grad_norm": { - "702": 0.3300390839576721, - "703": 0.322391152381897, - "704": 0.30782660841941833, - "705": 0.2713398039340973, - "706": 0.3217991590499878, - "707": 0.3222607672214508, - "708": 0.2914176285266876, - "709": 0.27889591455459595, - "710": 0.27837029099464417, - "711": 0.3076034188270569, - "712": 0.28038936853408813, - "713": 0.356416791677475, - "714": 0.4073687791824341, - "715": 0.3918367326259613, - "716": 0.3988860845565796, - "717": 0.3759671747684479, - "718": 0.3604384660720825, - "719": 0.29835426807403564, - "720": 0.24581210315227509, - "721": 0.24378716945648193, - "722": 0.26598456501960754, - "723": 0.2564581036567688, - "724": 0.276718407869339, - "725": 0.2902151644229889, - "726": 0.3073812425136566, - "727": 0.27006322145462036, - "728": 0.24807585775852203, - "729": 0.23630265891551971, - "730": 0.23328597843647003, - "731": 0.2882058918476105, - "732": 0.27470651268959045, - "733": 0.22985504567623138, - "734": 0.224942147731781, - "735": 0.2476918250322342, - "736": 0.22961129248142242, - "737": 0.2383800446987152, - "738": 0.25636932253837585, - "739": 0.25010326504707336, - "740": 0.2347656637430191, - "741": 0.2664845585823059, - "742": 0.23746050894260406, - "743": 0.2609820067882538, - "744": 0.2527933418750763, - "745": 0.21657411754131317, - "746": 0.2503909170627594, - "747": 0.2367532104253769, - "748": 0.23744042217731476, - "749": 0.22273008525371552, - "750": 0.26137059926986694, - "751": 0.2341543436050415, - "752": 0.2515299320220947, - "753": 0.23059368133544922, - "754": 0.25316309928894043, - "755": 0.25203627347946167 - }, - "loss": { - "702": 2.387542247772217, - "703": 2.39223575592041, - "704": 2.3969063758850098, - "705": 2.391695499420166, - "706": 2.4152441024780273, - "707": 2.4017415046691895, - "708": 2.4039173126220703, - "709": 2.407346248626709, - "710": 2.4016051292419434, - "711": 2.3947181701660156, - "712": 2.3945980072021484, - "713": 2.407212972640991, - "714": 2.405872344970703, - "715": 2.414902687072754, - "716": 2.4006147384643555, - "717": 2.420238971710205, - "718": 2.415431022644043, - "719": 2.39057993888855, - "720": 2.3832461833953857, - "721": 2.4011502265930176, - "722": 2.4127063751220703, - "723": 2.3960819244384766, - "724": 2.376065254211426, - "725": 2.3859333992004395, - "726": 2.398648738861084, - "727": 2.369152069091797, - "728": 2.383495807647705, - "729": 2.3993654251098633, - "730": 2.4021952152252197, - "731": 2.3936712741851807, - "732": 2.387683391571045, - "733": 2.3873724937438965, - "734": 2.3726930618286133, - "735": 2.3907036781311035, - "736": 2.3885369300842285, - "737": 2.3717007637023926, - "738": 2.3812522888183594, - "739": 2.3822555541992188, - "740": 2.384267568588257, - "741": 2.3919520378112793, - "742": 2.4204275608062744, - "743": 2.3882110118865967, - "744": 2.385528564453125, - "745": 2.389493227005005, - "746": 2.3675942420959473, - "747": 2.397498369216919, - "748": 2.3713765144348145, - "749": 2.3653931617736816, - "750": 2.3721046447753906, - "751": 2.3684959411621094, - "752": 2.3836441040039062, - "753": 2.388432741165161, - "754": 2.3816676139831543, - "755": 2.379958152770996 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "train_epoch_time": 4.789869785308838, - "train_loss": 2.3827254545739938, - "train_score": 0.3249237803595309, - "val_loss": 2.411594339134225, - "val_score": 0.3167201848240862 - }, - { - "epoch": 14, - "grad_norm": 0.2197117954492569, - "learning_rate": 0.07166666666666667, - "model_norm": 91.09257507324219, - "step_logs": { - "grad_norm": { - "756": 0.2761439085006714, - "757": 0.24706952273845673, - "758": 0.22961826622486115, - "759": 0.26658567786216736, - "760": 0.301471084356308, - "761": 0.2331073135137558, - "762": 0.2244357317686081, - "763": 0.23235604166984558, - "764": 0.23938170075416565, - "765": 0.23986923694610596, - "766": 0.24092619121074677, - "767": 0.27001503109931946, - "768": 0.2521958649158478, - "769": 0.21570946276187897, - "770": 0.2512192130088806, - "771": 0.2271350473165512, - "772": 0.2642590403556824, - "773": 0.25384005904197693, - "774": 0.24231238663196564, - "775": 0.22705785930156708, - "776": 0.23054121434688568, - "777": 0.2104002833366394, - "778": 0.2288323938846588, - "779": 0.2242511510848999, - "780": 0.2163362205028534, - "781": 0.20032843947410583, - "782": 0.24287815392017365, - "783": 0.2637001872062683, - "784": 0.20668497681617737, - "785": 0.25995853543281555, - "786": 0.2332717329263687, - "787": 0.27230343222618103, - "788": 0.2583862543106079, - "789": 0.2421560138463974, - "790": 0.2332403063774109, - "791": 0.22376008331775665, - "792": 0.22356942296028137, - "793": 0.2044358104467392, - "794": 0.23896855115890503, - "795": 0.20602770149707794, - "796": 0.23659393191337585, - "797": 0.21350815892219543, - "798": 0.23285506665706635, - "799": 0.2323119193315506, - "800": 0.20167531073093414, - "801": 0.21216879785060883, - "802": 0.23666132986545563, - "803": 0.22863946855068207, - "804": 0.227693572640419, - "805": 0.2767631709575653, - "806": 0.22245074808597565, - "807": 0.2513234615325928, - "808": 0.25680112838745117, - "809": 0.2197117954492569 - }, - "loss": { - "756": 2.3960371017456055, - "757": 2.393949508666992, - "758": 2.398939609527588, - "759": 2.3947665691375732, - "760": 2.3522768020629883, - "761": 2.397989273071289, - "762": 2.395103693008423, - "763": 2.3695473670959473, - "764": 2.3735008239746094, - "765": 2.3729841709136963, - "766": 2.398881435394287, - "767": 2.3834304809570312, - "768": 2.393691301345825, - "769": 2.378028392791748, - "770": 2.3891730308532715, - "771": 2.375607490539551, - "772": 2.381958484649658, - "773": 2.3937265872955322, - "774": 2.400770902633667, - "775": 2.3694400787353516, - "776": 2.3539044857025146, - "777": 2.3629379272460938, - "778": 2.3816232681274414, - "779": 2.3935296535491943, - "780": 2.3777899742126465, - "781": 2.3724355697631836, - "782": 2.372957468032837, - "783": 2.389726161956787, - "784": 2.374579429626465, - "785": 2.376941204071045, - "786": 2.3771867752075195, - "787": 2.385077714920044, - "788": 2.3737740516662598, - "789": 2.3993072509765625, - "790": 2.3594346046447754, - "791": 2.384274959564209, - "792": 2.375826835632324, - "793": 2.3713130950927734, - "794": 2.387220859527588, - "795": 2.37376070022583, - "796": 2.375234842300415, - "797": 2.3680973052978516, - "798": 2.3630502223968506, - "799": 2.3770058155059814, - "800": 2.354630470275879, - "801": 2.3690831661224365, - "802": 2.36740779876709, - "803": 2.371968984603882, - "804": 2.3626205921173096, - "805": 2.3998422622680664, - "806": 2.3764305114746094, - "807": 2.3635830879211426, - "808": 2.403512716293335, - "809": 2.376469850540161 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "train_epoch_time": 4.789149284362793, - "train_loss": 2.3769338460016454, - "train_score": 0.32556940454834676, - "val_loss": 2.4063635773007275, - "val_score": 0.31698030318923714 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:28:10.393864", - "final_model_norm": 91.09257507324219, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:26:29.625420", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 2.429553508758545, - "learning_rate": 2.15e-11, - "model_norm": 89.89427947998047, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.733994483947754, - "3": 8.656057357788086, - "4": 20.534622192382812, - "5": 6.440240859985352, - "6": 5.055216312408447, - "7": 3.8297367095947266, - "8": 4.107420444488525, - "9": 4.2545857429504395, - "10": 4.459016799926758, - "11": 5.157927513122559, - "12": 4.648646831512451, - "13": 71.50898742675781, - "14": 357.7301330566406, - "15": 3.219618320465088, - "16": 4.466315269470215, - "17": 3.305650472640991, - "18": 1.2785460948944092, - "19": 1.3744328022003174, - "20": 2.1724610328674316, - "21": 8.90324878692627, - "22": 3.4738388061523438, - "23": 3.4021096229553223, - "24": 4.153079032897949, - "25": 13.375604629516602, - "26": 4.175424575805664, - "27": 4.691089630126953, - "28": 3.8858461380004883, - "29": 3.612222909927368, - "30": 11.23609733581543, - "31": 4.208217144012451, - "32": 3.885915756225586, - "33": 3.747910499572754, - "34": 7.2109599113464355, - "35": 15.730438232421875, - "36": 6.669101238250732, - "37": 5.69589376449585, - "38": 4.972746849060059, - "39": 7.909084320068359, - "40": 4.504845142364502, - "41": 13.990959167480469, - "42": 6.469381332397461, - "43": 4.600672721862793, - "44": 4.368273735046387, - "45": 4.056450366973877, - "46": 3.6727654933929443, - "47": 14.389432907104492, - "48": 7.941073894500732, - "49": 4.657362461090088, - "50": 4.3772382736206055, - "51": 3.771397113800049, - "52": 3.2211754322052, - "53": 2.429553508758545 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.8340747356414795, - "3": 3.709585666656494, - "4": 4.2086381912231445, - "5": 4.403763771057129, - "6": 3.881211519241333, - "7": 3.5605177879333496, - "8": 3.4620606899261475, - "9": 3.549063205718994, - "10": 3.461212635040283, - "11": 3.5233871936798096, - "12": 3.3178398609161377, - "13": 3.916867256164551, - "14": 5.120562553405762, - "15": 4.033894062042236, - "16": 3.7446446418762207, - "17": 4.082850933074951, - "18": 3.466677188873291, - "19": 3.383971691131592, - "20": 3.504695415496826, - "21": 3.826809883117676, - "22": 5.354043960571289, - "23": 4.4896559715271, - "24": 3.637166738510132, - "25": 4.800013542175293, - "26": 7.301655292510986, - "27": 6.175267696380615, - "28": 5.3836870193481445, - "29": 4.045472145080566, - "30": 4.214983940124512, - "31": 7.163357734680176, - "32": 6.275996208190918, - "33": 4.783047199249268, - "34": 3.9009814262390137, - "35": 7.055430889129639, - "36": 11.038154602050781, - "37": 9.603250503540039, - "38": 7.779790878295898, - "39": 6.452442169189453, - "40": 5.659243583679199, - "41": 6.63803243637085, - "42": 10.90620231628418, - "43": 9.19009017944336, - "44": 7.223609924316406, - "45": 5.790656089782715, - "46": 4.32966423034668, - "47": 6.517666339874268, - "48": 10.89421272277832, - "49": 10.847827911376953, - "50": 8.655927658081055, - "51": 6.700267791748047, - "52": 4.685117721557617, - "53": 3.6778411865234375 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "train_epoch_time": 4.789998531341553, - "train_loss": 3.778417190007192, - "train_score": 0.13783514165296787, - "val_loss": 3.776106384126245, - "val_score": 0.13452568864097003 - }, - { - "epoch": 1, - "grad_norm": 0.6967712044715881, - "learning_rate": 0.215, - "model_norm": 89.84645080566406, - "step_logs": { - "grad_norm": { - "54": 1.7600692510604858, - "55": 6.95321798324585, - "56": 3.4420149326324463, - "57": 2.800166130065918, - "58": 2.639221668243408, - "59": 2.2791171073913574, - "60": 2.3787615299224854, - "61": 5.642912864685059, - "62": 2.712333917617798, - "63": 2.2643442153930664, - "64": 1.639472246170044, - "65": 4.633570671081543, - "66": 2.0411229133605957, - "67": 1.8406808376312256, - "68": 5.9409379959106445, - "69": 2.588007926940918, - "70": 1.9243590831756592, - "71": 1.6075596809387207, - "72": 0.7534778118133545, - "73": 1.6058335304260254, - "74": 9.571342468261719, - "75": 2.1735141277313232, - "76": 0.8565264344215393, - "77": 1.656238079071045, - "78": 9.02771282196045, - "79": 4.570710182189941, - "80": 2.4140424728393555, - "81": 1.9558131694793701, - "82": 1.6673396825790405, - "83": 1.4261865615844727, - "84": 0.6584086418151855, - "85": 1.119946837425232, - "86": 1.3256529569625854, - "87": 2.158568859100342, - "88": 1.4991145133972168, - "89": 0.5431514382362366, - "90": 0.9848825931549072, - "91": 1.1982654333114624, - "92": 2.015868902206421, - "93": 1.463441014289856, - "94": 0.33475884795188904, - "95": 0.46374404430389404, - "96": 0.6652947664260864, - "97": 1.3676626682281494, - "98": 1.327052354812622, - "99": 1.0718811750411987, - "100": 1.1613391637802124, - "101": 1.4255242347717285, - "102": 1.275913953781128, - "103": 0.7472560405731201, - "104": 0.8954002261161804, - "105": 1.3840980529785156, - "106": 1.2118228673934937, - "107": 0.6967712044715881 - }, - "loss": { - "54": 3.782855987548828, - "55": 3.918456554412842, - "56": 6.271005630493164, - "57": 5.484335899353027, - "58": 4.090142250061035, - "59": 3.589216709136963, - "60": 3.927196979522705, - "61": 3.7309350967407227, - "62": 5.297145843505859, - "63": 4.52878999710083, - "64": 3.600069046020508, - "65": 3.644352674484253, - "66": 4.423394680023193, - "67": 3.611009120941162, - "68": 3.770536184310913, - "69": 5.148294448852539, - "70": 4.328619956970215, - "71": 3.677562713623047, - "72": 3.3822851181030273, - "73": 3.479600429534912, - "74": 4.305522918701172, - "75": 4.096094131469727, - "76": 3.3589093685150146, - "77": 3.4561381340026855, - "78": 4.316786766052246, - "79": 6.879027366638184, - "80": 6.11417818069458, - "81": 5.050774097442627, - "82": 4.30275297164917, - "83": 3.7803187370300293, - "84": 3.4178764820098877, - "85": 3.3938331604003906, - "86": 3.49715518951416, - "87": 3.4492218494415283, - "88": 3.701573371887207, - "89": 3.3722610473632812, - "90": 3.3772826194763184, - "91": 3.4302945137023926, - "92": 3.4313745498657227, - "93": 3.6659979820251465, - "94": 3.3707165718078613, - "95": 3.333737373352051, - "96": 3.3669753074645996, - "97": 3.392742156982422, - "98": 3.5068230628967285, - "99": 3.3893918991088867, - "100": 3.437774658203125, - "101": 3.3910889625549316, - "102": 3.5048890113830566, - "103": 3.3261966705322266, - "104": 3.4144062995910645, - "105": 3.3719491958618164, - "106": 3.473080635070801, - "107": 3.329042911529541 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "train_epoch_time": 4.78846001625061, - "train_loss": 3.3715476909703126, - "train_score": 0.08542974356941377, - "val_loss": 3.3898103467085057, - "val_score": 0.08248869838657116 - }, - { - "epoch": 2, - "grad_norm": 0.6376702785491943, - "learning_rate": 0.215, - "model_norm": 89.86298370361328, - "step_logs": { - "grad_norm": { - "108": 0.863978922367096, - "109": 1.4014583826065063, - "110": 1.223598599433899, - "111": 0.5734009146690369, - "112": 0.6804056167602539, - "113": 1.1065216064453125, - "114": 1.067942500114441, - "115": 0.8983179926872253, - "116": 0.9495359063148499, - "117": 1.0689358711242676, - "118": 1.0224673748016357, - "119": 0.8617774248123169, - "120": 0.854465663433075, - "121": 0.8484242558479309, - "122": 0.8702432513237, - "123": 0.8752347230911255, - "124": 0.9114002585411072, - "125": 1.0401203632354736, - "126": 0.9841242432594299, - "127": 0.8059213757514954, - "128": 0.8353803157806396, - "129": 0.9692856669425964, - "130": 0.9037263989448547, - "131": 0.6930402517318726, - "132": 0.7376375794410706, - "133": 0.862440288066864, - "134": 0.8603819012641907, - "135": 0.8117675185203552, - "136": 0.7948665022850037, - "137": 0.7000136971473694, - "138": 0.7195701003074646, - "139": 0.8498217463493347, - "140": 0.8268479108810425, - "141": 0.7609023451805115, - "142": 0.7767173647880554, - "143": 0.7971246242523193, - "144": 0.7868272066116333, - "145": 0.8084514141082764, - "146": 0.8119480609893799, - "147": 0.808192253112793, - "148": 0.838414192199707, - "149": 0.8663260340690613, - "150": 0.8310330510139465, - "151": 0.7649414539337158, - "152": 0.7240082621574402, - "153": 0.6730470657348633, - "154": 0.6659411787986755, - "155": 0.6326982378959656, - "156": 0.6284421682357788, - "157": 0.5721337795257568, - "158": 0.5678961873054504, - "159": 0.570471465587616, - "160": 0.5975511074066162, - "161": 0.6376702785491943 - }, - "loss": { - "108": 3.346294641494751, - "109": 3.3726882934570312, - "110": 3.5092005729675293, - "111": 3.295762538909912, - "112": 3.3364429473876953, - "113": 3.3828094005584717, - "114": 3.4595792293548584, - "115": 3.351487636566162, - "116": 3.4365453720092773, - "117": 3.3558294773101807, - "118": 3.389944553375244, - "119": 3.3285160064697266, - "120": 3.3939123153686523, - "121": 3.3478825092315674, - "122": 3.3591840267181396, - "123": 3.3183021545410156, - "124": 3.3832719326019287, - "125": 3.3444571495056152, - "126": 3.416065216064453, - "127": 3.3270702362060547, - "128": 3.390368700027466, - "129": 3.3990607261657715, - "130": 3.425734043121338, - "131": 3.3351869583129883, - "132": 3.3583717346191406, - "133": 3.3464856147766113, - "134": 3.365583896636963, - "135": 3.3311767578125, - "136": 3.3878769874572754, - "137": 3.2899458408355713, - "138": 3.3204338550567627, - "139": 3.3566741943359375, - "140": 3.348726749420166, - "141": 3.3130178451538086, - "142": 3.327881336212158, - "143": 3.321107864379883, - "144": 3.3257904052734375, - "145": 3.3359487056732178, - "146": 3.3460803031921387, - "147": 3.312148094177246, - "148": 3.316803455352783, - "149": 3.3127455711364746, - "150": 3.350639581680298, - "151": 3.3072245121002197, - "152": 3.3171613216400146, - "153": 3.307497501373291, - "154": 3.3380799293518066, - "155": 3.293321132659912, - "156": 3.2975564002990723, - "157": 3.255033493041992, - "158": 3.2581634521484375, - "159": 3.2486581802368164, - "160": 3.240307092666626, - "161": 3.2430601119995117 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "train_epoch_time": 4.7881128787994385, - "train_loss": 3.2409612587226855, - "train_score": 0.15480182924691369, - "val_loss": 3.2546099600917846, - "val_score": 0.1502314148177783 - }, - { - "epoch": 3, - "grad_norm": 0.6691382527351379, - "learning_rate": 0.215, - "model_norm": 89.89545440673828, - "step_logs": { - "grad_norm": { - "162": 0.6216800212860107, - "163": 0.5795353651046753, - "164": 0.5379090905189514, - "165": 0.5064986944198608, - "166": 0.5122657418251038, - "167": 0.5290499329566956, - "168": 0.6136422157287598, - "169": 0.6900139451026917, - "170": 0.6494053602218628, - "171": 0.6496060490608215, - "172": 0.8002558946609497, - "173": 0.8278506398200989, - "174": 0.7854248285293579, - "175": 0.7602683305740356, - "176": 0.8265597224235535, - "177": 0.9743447303771973, - "178": 0.9849590063095093, - "179": 0.9550031423568726, - "180": 0.9158548712730408, - "181": 0.8523992300033569, - "182": 0.8869295120239258, - "183": 0.8968014121055603, - "184": 0.8628901839256287, - "185": 0.83315110206604, - "186": 0.7910624146461487, - "187": 0.7288994789123535, - "188": 0.7334252595901489, - "189": 0.7752164006233215, - "190": 0.7915940284729004, - "191": 0.8068891763687134, - "192": 0.8567906022071838, - "193": 0.8967894911766052, - "194": 0.8805527091026306, - "195": 0.8582144975662231, - "196": 0.7758252620697021, - "197": 0.6706097722053528, - "198": 0.6795414686203003, - "199": 0.7499302625656128, - "200": 0.735964834690094, - "201": 0.6751560568809509, - "202": 0.6224300861358643, - "203": 0.6208912134170532, - "204": 0.6566837430000305, - "205": 0.6400348544120789, - "206": 0.7055973410606384, - "207": 0.7964776754379272, - "208": 0.725046694278717, - "209": 0.6219080090522766, - "210": 0.6124748587608337, - "211": 0.6556366086006165, - "212": 0.6675980091094971, - "213": 0.6274852752685547, - "214": 0.611222505569458, - "215": 0.6691382527351379 - }, - "loss": { - "162": 3.2319462299346924, - "163": 3.219452381134033, - "164": 3.2075250148773193, - "165": 3.201167106628418, - "166": 3.1718106269836426, - "167": 3.144150972366333, - "168": 3.1475160121917725, - "169": 3.1607632637023926, - "170": 3.177461624145508, - "171": 3.130967140197754, - "172": 3.136554718017578, - "173": 3.1449737548828125, - "174": 3.1375277042388916, - "175": 3.099252223968506, - "176": 3.1178438663482666, - "177": 3.1382901668548584, - "178": 3.156012535095215, - "179": 3.0988309383392334, - "180": 3.131251811981201, - "181": 3.110795259475708, - "182": 3.096956253051758, - "183": 3.0759496688842773, - "184": 3.0825419425964355, - "185": 3.060507297515869, - "186": 3.076098918914795, - "187": 3.0639452934265137, - "188": 3.046398162841797, - "189": 3.065018653869629, - "190": 3.0406084060668945, - "191": 3.0320305824279785, - "192": 3.0403335094451904, - "193": 3.0507290363311768, - "194": 3.07023024559021, - "195": 3.052114486694336, - "196": 3.0559685230255127, - "197": 3.005636215209961, - "198": 3.0089898109436035, - "199": 3.009814500808716, - "200": 3.015291452407837, - "201": 2.9974365234375, - "202": 2.9953560829162598, - "203": 3.005653142929077, - "204": 2.987767219543457, - "205": 2.9744162559509277, - "206": 2.9824533462524414, - "207": 2.9882657527923584, - "208": 3.0003840923309326, - "209": 2.9617490768432617, - "210": 2.9827628135681152, - "211": 2.975161552429199, - "212": 2.96435546875, - "213": 2.9418067932128906, - "214": 2.985100030899048, - "215": 2.9532151222229004 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "train_epoch_time": 4.788455963134766, - "train_loss": 2.970902385328558, - "train_score": 0.17343077473609655, - "val_loss": 2.990309273471241, - "val_score": 0.17139512791494005 - }, - { - "epoch": 4, - "grad_norm": 0.7655919790267944, - "learning_rate": 0.215, - "model_norm": 89.91508483886719, - "step_logs": { - "grad_norm": { - "216": 0.6569374799728394, - "217": 0.5437264442443848, - "218": 0.48358988761901855, - "219": 0.4807252585887909, - "220": 0.4809775650501251, - "221": 0.4261274039745331, - "222": 0.4274457097053528, - "223": 0.43906083703041077, - "224": 0.4313525855541229, - "225": 0.5159060955047607, - "226": 0.6199490427970886, - "227": 0.6708369851112366, - "228": 0.7206170558929443, - "229": 0.8778797388076782, - "230": 1.0434292554855347, - "231": 1.012017011642456, - "232": 0.8054400086402893, - "233": 0.6809650659561157, - "234": 0.5923705101013184, - "235": 0.589709997177124, - "236": 0.643692672252655, - "237": 0.7189840078353882, - "238": 0.8345604538917542, - "239": 0.821356475353241, - "240": 0.758682370185852, - "241": 0.7388073801994324, - "242": 0.7493603229522705, - "243": 0.7750712633132935, - "244": 0.7239165902137756, - "245": 0.7388054132461548, - "246": 0.7830007672309875, - "247": 0.7371181845664978, - "248": 0.6785892248153687, - "249": 0.7478886842727661, - "250": 0.7603949904441833, - "251": 0.6679902076721191, - "252": 0.6268831491470337, - "253": 0.745229184627533, - "254": 0.8539769053459167, - "255": 0.849469780921936, - "256": 0.8347134590148926, - "257": 0.8746486306190491, - "258": 0.8160930275917053, - "259": 0.69259113073349, - "260": 0.5686430335044861, - "261": 0.5244439244270325, - "262": 0.4675756096839905, - "263": 0.42535269260406494, - "264": 0.39161190390586853, - "265": 0.414936900138855, - "266": 0.5057854652404785, - "267": 0.6051677465438843, - "268": 0.6840121746063232, - "269": 0.7655919790267944 - }, - "loss": { - "216": 2.959449052810669, - "217": 2.928818702697754, - "218": 2.9616847038269043, - "219": 2.955103635787964, - "220": 2.963385581970215, - "221": 2.9368133544921875, - "222": 2.9342844486236572, - "223": 2.9423367977142334, - "224": 2.9325857162475586, - "225": 2.9307456016540527, - "226": 2.9643044471740723, - "227": 2.920894145965576, - "228": 2.9425387382507324, - "229": 2.960052490234375, - "230": 2.969963788986206, - "231": 2.980578899383545, - "232": 2.957820415496826, - "233": 2.952867031097412, - "234": 2.9020159244537354, - "235": 2.9305548667907715, - "236": 2.9248666763305664, - "237": 2.936262369155884, - "238": 2.9247281551361084, - "239": 2.9441075325012207, - "240": 2.9048638343811035, - "241": 2.928342342376709, - "242": 2.9197912216186523, - "243": 2.9355950355529785, - "244": 2.9159674644470215, - "245": 2.9187963008880615, - "246": 2.9043824672698975, - "247": 2.91329288482666, - "248": 2.8938283920288086, - "249": 2.909095287322998, - "250": 2.889674425125122, - "251": 2.898662567138672, - "252": 2.888793468475342, - "253": 2.9009788036346436, - "254": 2.8788414001464844, - "255": 2.9252638816833496, - "256": 2.891234874725342, - "257": 2.902353286743164, - "258": 2.9111204147338867, - "259": 2.9013242721557617, - "260": 2.8581056594848633, - "261": 2.898707628250122, - "262": 2.8596737384796143, - "263": 2.858137845993042, - "264": 2.863687515258789, - "265": 2.8653366565704346, - "266": 2.884807586669922, - "267": 2.874598979949951, - "268": 2.8718433380126953, - "269": 2.878401279449463 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "train_epoch_time": 4.788722276687622, - "train_loss": 2.8829704161524945, - "train_score": 0.20100542499311, - "val_loss": 2.9084927285716673, - "val_score": 0.19775670960152053 - }, - { - "epoch": 5, - "grad_norm": 0.5735481381416321, - "learning_rate": 0.215, - "model_norm": 89.9293212890625, - "step_logs": { - "grad_norm": { - "270": 0.796120285987854, - "271": 0.8118538856506348, - "272": 0.7633218765258789, - "273": 0.6854509711265564, - "274": 0.6247511506080627, - "275": 0.6082665920257568, - "276": 0.6928306221961975, - "277": 0.7025632262229919, - "278": 0.6927878260612488, - "279": 0.7126378417015076, - "280": 0.6892486214637756, - "281": 0.6750990152359009, - "282": 0.7017691135406494, - "283": 0.770732581615448, - "284": 0.7970418334007263, - "285": 0.7432883381843567, - "286": 0.7211617231369019, - "287": 0.7396063804626465, - "288": 0.7051949501037598, - "289": 0.6790603995323181, - "290": 0.6845196485519409, - "291": 0.7265959978103638, - "292": 0.7176300883293152, - "293": 0.6816290020942688, - "294": 0.6067876815795898, - "295": 0.5847397446632385, - "296": 0.6183571219444275, - "297": 0.6058297753334045, - "298": 0.5975099205970764, - "299": 0.5996508002281189, - "300": 0.5979251861572266, - "301": 0.5816647410392761, - "302": 0.578787088394165, - "303": 0.5957953929901123, - "304": 0.6086187958717346, - "305": 0.671294093132019, - "306": 0.7799011468887329, - "307": 0.74623703956604, - "308": 0.6684860587120056, - "309": 0.6856642365455627, - "310": 0.7539567947387695, - "311": 0.6747231483459473, - "312": 0.6263933181762695, - "313": 0.6678944230079651, - "314": 0.7819444537162781, - "315": 0.7640763521194458, - "316": 0.644639790058136, - "317": 0.6193239688873291, - "318": 0.624279260635376, - "319": 0.6400131583213806, - "320": 0.6569095849990845, - "321": 0.661838173866272, - "322": 0.598699688911438, - "323": 0.5735481381416321 - }, - "loss": { - "270": 2.885068893432617, - "271": 2.892613649368286, - "272": 2.874335765838623, - "273": 2.882042407989502, - "274": 2.8603515625, - "275": 2.863640069961548, - "276": 2.8464183807373047, - "277": 2.8889899253845215, - "278": 2.8550949096679688, - "279": 2.880115509033203, - "280": 2.8660616874694824, - "281": 2.8706936836242676, - "282": 2.8713297843933105, - "283": 2.8797903060913086, - "284": 2.8528199195861816, - "285": 2.8667685985565186, - "286": 2.8620505332946777, - "287": 2.860910177230835, - "288": 2.847256660461426, - "289": 2.860405921936035, - "290": 2.8828535079956055, - "291": 2.8549137115478516, - "292": 2.8421316146850586, - "293": 2.835737705230713, - "294": 2.843327045440674, - "295": 2.849231719970703, - "296": 2.857534408569336, - "297": 2.848630905151367, - "298": 2.8949599266052246, - "299": 2.8469786643981934, - "300": 2.8290023803710938, - "301": 2.850537061691284, - "302": 2.8316574096679688, - "303": 2.8586585521698, - "304": 2.845435619354248, - "305": 2.846201181411743, - "306": 2.838244915008545, - "307": 2.85735821723938, - "308": 2.8405256271362305, - "309": 2.830526828765869, - "310": 2.8481059074401855, - "311": 2.8509984016418457, - "312": 2.828366279602051, - "313": 2.840717315673828, - "314": 2.8361728191375732, - "315": 2.869767665863037, - "316": 2.8425450325012207, - "317": 2.833724021911621, - "318": 2.8254990577697754, - "319": 2.840945243835449, - "320": 2.8350791931152344, - "321": 2.832787036895752, - "322": 2.8447256088256836, - "323": 2.8329739570617676 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "train_epoch_time": 4.788723707199097, - "train_loss": 2.822763564562695, - "train_score": 0.229455478807946, - "val_loss": 2.847779504467234, - "val_score": 0.22466543510162734 - }, - { - "epoch": 6, - "grad_norm": 0.721624493598938, - "learning_rate": 0.215, - "model_norm": 89.94625854492188, - "step_logs": { - "grad_norm": { - "324": 0.5964891314506531, - "325": 0.6091309189796448, - "326": 0.6144530773162842, - "327": 0.5864863395690918, - "328": 0.5359792113304138, - "329": 0.5493559241294861, - "330": 0.5981965661048889, - "331": 0.5725377202033997, - "332": 0.5112264752388, - "333": 0.5338640809059143, - "334": 0.595308244228363, - "335": 0.6108565926551819, - "336": 0.6135203242301941, - "337": 0.6320286989212036, - "338": 0.6825681924819946, - "339": 0.7148227095603943, - "340": 0.6866228580474854, - "341": 0.6864261031150818, - "342": 0.6876449584960938, - "343": 0.721110999584198, - "344": 0.7774609923362732, - "345": 0.8076304793357849, - "346": 0.8222858309745789, - "347": 0.8402779698371887, - "348": 0.8327051997184753, - "349": 0.8058454394340515, - "350": 0.723849356174469, - "351": 0.687146782875061, - "352": 0.8715532422065735, - "353": 0.731410801410675, - "354": 0.7635579705238342, - "355": 0.7563762068748474, - "356": 0.7085869312286377, - "357": 0.6681196689605713, - "358": 0.581696093082428, - "359": 0.6046619415283203, - "360": 0.6671164035797119, - "361": 0.6966533660888672, - "362": 0.7446925044059753, - "363": 0.7331202626228333, - "364": 0.696814775466919, - "365": 0.7409701943397522, - "366": 0.7478776574134827, - "367": 0.7537860870361328, - "368": 0.7984800934791565, - "369": 0.7961688041687012, - "370": 0.80507493019104, - "371": 0.7771061658859253, - "372": 0.7710264921188354, - "373": 0.7771677374839783, - "374": 0.7087388634681702, - "375": 0.6863659024238586, - "376": 0.67284095287323, - "377": 0.721624493598938 - }, - "loss": { - "324": 2.8285293579101562, - "325": 2.838834524154663, - "326": 2.8331103324890137, - "327": 2.8147382736206055, - "328": 2.8175084590911865, - "329": 2.8149991035461426, - "330": 2.823151111602783, - "331": 2.8318426609039307, - "332": 2.8063690662384033, - "333": 2.806149959564209, - "334": 2.8080615997314453, - "335": 2.794569492340088, - "336": 2.796664237976074, - "337": 2.8131580352783203, - "338": 2.815187454223633, - "339": 2.848173141479492, - "340": 2.8063628673553467, - "341": 2.833559036254883, - "342": 2.827437400817871, - "343": 2.821094274520874, - "344": 2.8275089263916016, - "345": 2.8373918533325195, - "346": 2.840057849884033, - "347": 2.8302066326141357, - "348": 2.8216428756713867, - "349": 2.822509288787842, - "350": 2.8014183044433594, - "351": 2.8154940605163574, - "352": 2.8080015182495117, - "353": 2.8117311000823975, - "354": 2.810521125793457, - "355": 2.8014698028564453, - "356": 2.785714864730835, - "357": 2.8231570720672607, - "358": 2.808838129043579, - "359": 2.8177099227905273, - "360": 2.7885584831237793, - "361": 2.806155204772949, - "362": 2.8029279708862305, - "363": 2.8152644634246826, - "364": 2.810152053833008, - "365": 2.8166346549987793, - "366": 2.797429323196411, - "367": 2.7932543754577637, - "368": 2.79209566116333, - "369": 2.8221168518066406, - "370": 2.7892518043518066, - "371": 2.804565906524658, - "372": 2.7962522506713867, - "373": 2.7812294960021973, - "374": 2.7683475017547607, - "375": 2.7984366416931152, - "376": 2.7574472427368164, - "377": 2.7779312133789062 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "train_epoch_time": 4.788815021514893, - "train_loss": 2.7804879842563888, - "train_score": 0.24339580349036236, - "val_loss": 2.8054805831164633, - "val_score": 0.23691787501427117 - }, - { - "epoch": 7, - "grad_norm": 0.8514337539672852, - "learning_rate": 0.215, - "model_norm": 89.96620178222656, - "step_logs": { - "grad_norm": { - "378": 0.7090153694152832, - "379": 0.6544874906539917, - "380": 0.6257517337799072, - "381": 0.658334493637085, - "382": 0.7453734874725342, - "383": 0.7668471336364746, - "384": 0.7865362167358398, - "385": 0.8101611137390137, - "386": 0.8240614533424377, - "387": 0.802543580532074, - "388": 0.7697931528091431, - "389": 0.745103120803833, - "390": 0.7492871284484863, - "391": 0.7381245493888855, - "392": 0.6822201013565063, - "393": 0.6685869097709656, - "394": 0.6939062476158142, - "395": 0.8011731505393982, - "396": 0.8890218734741211, - "397": 0.9615203142166138, - "398": 0.9776833057403564, - "399": 0.9332268238067627, - "400": 0.7564985752105713, - "401": 0.6738104820251465, - "402": 0.6722136735916138, - "403": 0.7064204216003418, - "404": 0.8017610311508179, - "405": 0.8405110239982605, - "406": 0.7676529884338379, - "407": 0.7388855218887329, - "408": 0.7953731417655945, - "409": 0.8301306366920471, - "410": 0.8346095085144043, - "411": 0.8095090389251709, - "412": 0.8254018425941467, - "413": 0.8188120722770691, - "414": 0.8567057847976685, - "415": 0.8541874289512634, - "416": 0.8077358603477478, - "417": 0.7974807620048523, - "418": 0.7672068476676941, - "419": 0.7511753439903259, - "420": 0.6982523202896118, - "421": 0.6772213578224182, - "422": 0.7334694862365723, - "423": 0.7740538716316223, - "424": 0.792039155960083, - "425": 0.8051450848579407, - "426": 0.8875683546066284, - "427": 0.8709783554077148, - "428": 0.7662844061851501, - "429": 0.7867828607559204, - "430": 0.8641607761383057, - "431": 0.8514337539672852 - }, - "loss": { - "378": 2.795520305633545, - "379": 2.774301767349243, - "380": 2.7756214141845703, - "381": 2.783689260482788, - "382": 2.8028249740600586, - "383": 2.7717487812042236, - "384": 2.7767343521118164, - "385": 2.800795555114746, - "386": 2.772435426712036, - "387": 2.792219400405884, - "388": 2.7631430625915527, - "389": 2.779628276824951, - "390": 2.7496178150177, - "391": 2.783785343170166, - "392": 2.7655348777770996, - "393": 2.7732787132263184, - "394": 2.762220859527588, - "395": 2.766599655151367, - "396": 2.7918457984924316, - "397": 2.785118579864502, - "398": 2.786693572998047, - "399": 2.8107094764709473, - "400": 2.7763500213623047, - "401": 2.7645668983459473, - "402": 2.738198757171631, - "403": 2.7353878021240234, - "404": 2.7589430809020996, - "405": 2.7586965560913086, - "406": 2.7454304695129395, - "407": 2.741732597351074, - "408": 2.742398738861084, - "409": 2.767534017562866, - "410": 2.760796546936035, - "411": 2.7551798820495605, - "412": 2.7555508613586426, - "413": 2.758802890777588, - "414": 2.7387495040893555, - "415": 2.760998249053955, - "416": 2.7372655868530273, - "417": 2.754990816116333, - "418": 2.7356529235839844, - "419": 2.7401700019836426, - "420": 2.7333812713623047, - "421": 2.7174830436706543, - "422": 2.735703706741333, - "423": 2.7497267723083496, - "424": 2.7364587783813477, - "425": 2.7370429039001465, - "426": 2.742906093597412, - "427": 2.7440524101257324, - "428": 2.7095909118652344, - "429": 2.7307381629943848, - "430": 2.7078957557678223, - "431": 2.7399444580078125 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "train_epoch_time": 4.78853440284729, - "train_loss": 2.717233853634324, - "train_score": 0.24648381458378932, - "val_loss": 2.7367622192636833, - "val_score": 0.24217853074985313 - }, - { - "epoch": 8, - "grad_norm": 0.7189271450042725, - "learning_rate": 0.215, - "model_norm": 89.98765563964844, - "step_logs": { - "grad_norm": { - "432": 0.7926420569419861, - "433": 0.7989777326583862, - "434": 0.8231915235519409, - "435": 0.8223428130149841, - "436": 0.777944803237915, - "437": 0.8000281453132629, - "438": 0.790794312953949, - "439": 0.8025705814361572, - "440": 0.9105353951454163, - "441": 0.9671114683151245, - "442": 0.963681697845459, - "443": 1.0239083766937256, - "444": 0.8694352507591248, - "445": 0.689085066318512, - "446": 0.6770842671394348, - "447": 0.7280488014221191, - "448": 0.7739311456680298, - "449": 0.795528769493103, - "450": 0.8310406804084778, - "451": 0.8493906855583191, - "452": 0.8626106381416321, - "453": 0.8665022850036621, - "454": 0.9031314253807068, - "455": 0.9011504054069519, - "456": 0.8754758834838867, - "457": 0.8766058683395386, - "458": 0.8711204528808594, - "459": 0.8336485624313354, - "460": 0.8183372020721436, - "461": 0.8496360778808594, - "462": 0.8110702633857727, - "463": 0.8053188323974609, - "464": 0.8417520523071289, - "465": 0.8457974791526794, - "466": 0.7822505831718445, - "467": 0.762436032295227, - "468": 0.7585875988006592, - "469": 0.7191105484962463, - "470": 0.7579717636108398, - "471": 0.8460325598716736, - "472": 0.7916812896728516, - "473": 0.7571032047271729, - "474": 0.8186747431755066, - "475": 0.833448052406311, - "476": 0.8681373000144958, - "477": 0.8861498236656189, - "478": 0.8977588415145874, - "479": 0.8764435052871704, - "480": 0.8431791663169861, - "481": 0.8432224988937378, - "482": 0.7808829545974731, - "483": 0.7327014207839966, - "484": 0.7027769088745117, - "485": 0.7189271450042725 - }, - "loss": { - "432": 2.726034164428711, - "433": 2.7230072021484375, - "434": 2.716484308242798, - "435": 2.7352614402770996, - "436": 2.7156851291656494, - "437": 2.7152185440063477, - "438": 2.6940643787384033, - "439": 2.694014072418213, - "440": 2.7165210247039795, - "441": 2.7441444396972656, - "442": 2.7323758602142334, - "443": 2.7198245525360107, - "444": 2.74222993850708, - "445": 2.678779363632202, - "446": 2.6904664039611816, - "447": 2.6978626251220703, - "448": 2.719388008117676, - "449": 2.7081289291381836, - "450": 2.6950571537017822, - "451": 2.71567440032959, - "452": 2.706803321838379, - "453": 2.720440626144409, - "454": 2.686661720275879, - "455": 2.707317590713501, - "456": 2.682138442993164, - "457": 2.7008464336395264, - "458": 2.6916580200195312, - "459": 2.696120500564575, - "460": 2.6658854484558105, - "461": 2.6969757080078125, - "462": 2.6969947814941406, - "463": 2.6731390953063965, - "464": 2.6696786880493164, - "465": 2.7056665420532227, - "466": 2.6599316596984863, - "467": 2.682149887084961, - "468": 2.683171272277832, - "469": 2.678713083267212, - "470": 2.658048629760742, - "471": 2.7085988521575928, - "472": 2.696321964263916, - "473": 2.6821703910827637, - "474": 2.6862521171569824, - "475": 2.6759538650512695, - "476": 2.654883861541748, - "477": 2.690098524093628, - "478": 2.662038564682007, - "479": 2.6642403602600098, - "480": 2.680803060531616, - "481": 2.669417142868042, - "482": 2.657824754714966, - "483": 2.6759777069091797, - "484": 2.6603567600250244, - "485": 2.6720380783081055 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "train_epoch_time": 4.788131475448608, - "train_loss": 2.6499642800395424, - "train_score": 0.2541775018447099, - "val_loss": 2.6721480404605273, - "val_score": 0.24951115837395532 - }, - { - "epoch": 9, - "grad_norm": 1.1421939134597778, - "learning_rate": 0.215, - "model_norm": 90.0042724609375, - "step_logs": { - "grad_norm": { - "486": 0.774936854839325, - "487": 0.789344310760498, - "488": 0.7899917364120483, - "489": 0.839102029800415, - "490": 0.8981372714042664, - "491": 0.8626739978790283, - "492": 0.8392364978790283, - "493": 0.8226275444030762, - "494": 0.7573186159133911, - "495": 0.7592282295227051, - "496": 0.8314062356948853, - "497": 0.8425015807151794, - "498": 0.8403638601303101, - "499": 0.8894930481910706, - "500": 0.8961754441261292, - "501": 0.8464363813400269, - "502": 0.8113922476768494, - "503": 0.8303462266921997, - "504": 0.8036644458770752, - "505": 0.8080917596817017, - "506": 0.838226318359375, - "507": 0.8522719144821167, - "508": 0.8235961198806763, - "509": 0.8319439888000488, - "510": 0.8286922574043274, - "511": 0.8342664241790771, - "512": 0.8681519031524658, - "513": 0.8117028474807739, - "514": 0.7368595600128174, - "515": 0.7533247470855713, - "516": 0.7707027792930603, - "517": 0.7643579244613647, - "518": 0.7609565854072571, - "519": 0.7944704294204712, - "520": 0.83812016248703, - "521": 0.8342036604881287, - "522": 0.8647300601005554, - "523": 0.8340088129043579, - "524": 0.8161774277687073, - "525": 0.8764876127243042, - "526": 0.8609760999679565, - "527": 0.7913983464241028, - "528": 0.7784112691879272, - "529": 0.787605345249176, - "530": 0.7245925664901733, - "531": 0.7029888033866882, - "532": 0.7153397798538208, - "533": 0.7838276028633118, - "534": 0.8455857634544373, - "535": 0.9038657546043396, - "536": 0.9485446810722351, - "537": 0.9726511836051941, - "538": 1.2298110723495483, - "539": 1.1421939134597778 - }, - "loss": { - "486": 2.6547884941101074, - "487": 2.6654915809631348, - "488": 2.651487350463867, - "489": 2.669064998626709, - "490": 2.6671881675720215, - "491": 2.6770849227905273, - "492": 2.6650609970092773, - "493": 2.6781563758850098, - "494": 2.6454010009765625, - "495": 2.651963472366333, - "496": 2.643042802810669, - "497": 2.6682839393615723, - "498": 2.636284351348877, - "499": 2.6732192039489746, - "500": 2.6559107303619385, - "501": 2.661837577819824, - "502": 2.6275672912597656, - "503": 2.6706929206848145, - "504": 2.636566638946533, - "505": 2.6518187522888184, - "506": 2.6540379524230957, - "507": 2.644232749938965, - "508": 2.646148920059204, - "509": 2.6418371200561523, - "510": 2.632054090499878, - "511": 2.6597626209259033, - "512": 2.644534111022949, - "513": 2.657193660736084, - "514": 2.649960994720459, - "515": 2.6479477882385254, - "516": 2.634392261505127, - "517": 2.6228082180023193, - "518": 2.641427516937256, - "519": 2.640104293823242, - "520": 2.6271142959594727, - "521": 2.636256456375122, - "522": 2.6182384490966797, - "523": 2.6398000717163086, - "524": 2.6404244899749756, - "525": 2.6555655002593994, - "526": 2.6381704807281494, - "527": 2.6360859870910645, - "528": 2.634208917617798, - "529": 2.6462559700012207, - "530": 2.6401264667510986, - "531": 2.6096601486206055, - "532": 2.604496479034424, - "533": 2.6160969734191895, - "534": 2.6260948181152344, - "535": 2.639922618865967, - "536": 2.6462202072143555, - "537": 2.658841609954834, - "538": 2.6615941524505615, - "539": 2.685661792755127 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "train_epoch_time": 4.78758978843689, - "train_loss": 2.642064675915361, - "train_score": 0.2591261656417737, - "val_loss": 2.673168515775836, - "val_score": 0.25316177539518864 - }, - { - "epoch": 10, - "grad_norm": 0.8105792999267578, - "learning_rate": 0.215, - "model_norm": 90.02066802978516, - "step_logs": { - "grad_norm": { - "540": 0.9202944040298462, - "541": 0.6527591943740845, - "542": 0.47710418701171875, - "543": 0.4099416136741638, - "544": 0.3978358209133148, - "545": 0.4463668465614319, - "546": 0.5331204533576965, - "547": 0.6135859489440918, - "548": 0.7367595434188843, - "549": 0.8629884719848633, - "550": 0.942812979221344, - "551": 0.8674833178520203, - "552": 0.8434673547744751, - "553": 0.8539221286773682, - "554": 0.8579002618789673, - "555": 0.8397010564804077, - "556": 0.8100090026855469, - "557": 0.7524319887161255, - "558": 0.6755796074867249, - "559": 0.6974266171455383, - "560": 0.7552376985549927, - "561": 0.8239412307739258, - "562": 0.8400392532348633, - "563": 0.7862880229949951, - "564": 0.7592045664787292, - "565": 0.7833024859428406, - "566": 0.8161395192146301, - "567": 0.8042528629302979, - "568": 0.8144784569740295, - "569": 0.854443371295929, - "570": 0.8920965790748596, - "571": 0.86241614818573, - "572": 0.8849621415138245, - "573": 0.8547306656837463, - "574": 0.7955521941184998, - "575": 0.8058813810348511, - "576": 0.7878369688987732, - "577": 0.7980623841285706, - "578": 0.806784451007843, - "579": 0.8099424242973328, - "580": 0.800332248210907, - "581": 0.8077536225318909, - "582": 0.82627934217453, - "583": 0.8378875255584717, - "584": 0.8036258220672607, - "585": 0.7895596027374268, - "586": 0.8051947951316833, - "587": 0.7983424067497253, - "588": 0.7736822962760925, - "589": 0.8170568346977234, - "590": 0.798283576965332, - "591": 0.7759832739830017, - "592": 0.8374855518341064, - "593": 0.8105792999267578 - }, - "loss": { - "540": 2.645414352416992, - "541": 2.601228713989258, - "542": 2.607617139816284, - "543": 2.6148698329925537, - "544": 2.577263116836548, - "545": 2.618373155593872, - "546": 2.604825019836426, - "547": 2.5996720790863037, - "548": 2.6001129150390625, - "549": 2.6322121620178223, - "550": 2.6262269020080566, - "551": 2.649817943572998, - "552": 2.603966474533081, - "553": 2.626373291015625, - "554": 2.602977991104126, - "555": 2.6050095558166504, - "556": 2.6211352348327637, - "557": 2.617664098739624, - "558": 2.582747220993042, - "559": 2.6031644344329834, - "560": 2.599234104156494, - "561": 2.621400833129883, - "562": 2.604050397872925, - "563": 2.6263318061828613, - "564": 2.6078262329101562, - "565": 2.6009013652801514, - "566": 2.611826181411743, - "567": 2.6159708499908447, - "568": 2.6037800312042236, - "569": 2.6126961708068848, - "570": 2.606269598007202, - "571": 2.6246185302734375, - "572": 2.6028943061828613, - "573": 2.607581377029419, - "574": 2.586949110031128, - "575": 2.6003947257995605, - "576": 2.6079659461975098, - "577": 2.6256375312805176, - "578": 2.587466239929199, - "579": 2.6293957233428955, - "580": 2.59682035446167, - "581": 2.6400249004364014, - "582": 2.595548152923584, - "583": 2.598919630050659, - "584": 2.6017112731933594, - "585": 2.6250505447387695, - "586": 2.603376865386963, - "587": 2.5864858627319336, - "588": 2.558115243911743, - "589": 2.610131025314331, - "590": 2.5858097076416016, - "591": 2.5904438495635986, - "592": 2.5815930366516113, - "593": 2.5887889862060547 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "train_epoch_time": 4.787614583969116, - "train_loss": 2.575866939347649, - "train_score": 0.277403156256231, - "val_loss": 2.5959476907272427, - "val_score": 0.27063002368065303 - }, - { - "epoch": 11, - "grad_norm": 0.875197172164917, - "learning_rate": 0.215, - "model_norm": 90.03768920898438, - "step_logs": { - "grad_norm": { - "594": 0.7318904399871826, - "595": 0.7436156272888184, - "596": 0.8266604542732239, - "597": 0.8615882992744446, - "598": 0.9162756204605103, - "599": 0.9221148490905762, - "600": 0.8109691739082336, - "601": 0.7648108601570129, - "602": 0.8120312690734863, - "603": 0.8307549357414246, - "604": 0.8280071020126343, - "605": 0.8337923288345337, - "606": 0.8870125412940979, - "607": 0.9061279296875, - "608": 0.8689539432525635, - "609": 0.8609936833381653, - "610": 0.8323072195053101, - "611": 0.8224025368690491, - "612": 0.7654007077217102, - "613": 0.7466140985488892, - "614": 0.7352055907249451, - "615": 0.6889374256134033, - "616": 0.6608432531356812, - "617": 0.681706428527832, - "618": 0.7127463221549988, - "619": 0.7802812457084656, - "620": 0.7767740488052368, - "621": 0.7628665566444397, - "622": 0.8329023122787476, - "623": 0.8388475179672241, - "624": 0.8315045237541199, - "625": 0.8319355249404907, - "626": 0.8590131402015686, - "627": 0.9036568403244019, - "628": 0.9744861125946045, - "629": 0.9878485202789307, - "630": 0.9991350769996643, - "631": 0.9696122407913208, - "632": 0.8828770518302917, - "633": 0.8702228665351868, - "634": 0.8099728226661682, - "635": 0.7859927415847778, - "636": 0.8151223063468933, - "637": 0.7930382490158081, - "638": 0.7298247218132019, - "639": 0.7442294359207153, - "640": 0.780197024345398, - "641": 0.8113732933998108, - "642": 0.8424757122993469, - "643": 0.8369328379631042, - "644": 0.9091742634773254, - "645": 0.8744722604751587, - "646": 0.8243760466575623, - "647": 0.875197172164917 - }, - "loss": { - "594": 2.5986640453338623, - "595": 2.5922470092773438, - "596": 2.559065341949463, - "597": 2.6092607975006104, - "598": 2.5705878734588623, - "599": 2.611008644104004, - "600": 2.5883731842041016, - "601": 2.584822177886963, - "602": 2.5769855976104736, - "603": 2.6051034927368164, - "604": 2.558088541030884, - "605": 2.5871388912200928, - "606": 2.5882492065429688, - "607": 2.5978493690490723, - "608": 2.5725479125976562, - "609": 2.5780062675476074, - "610": 2.572591543197632, - "611": 2.5934290885925293, - "612": 2.554013729095459, - "613": 2.574382781982422, - "614": 2.566502332687378, - "615": 2.560737133026123, - "616": 2.5686984062194824, - "617": 2.558824300765991, - "618": 2.5420079231262207, - "619": 2.588202953338623, - "620": 2.547837495803833, - "621": 2.565319776535034, - "622": 2.576639175415039, - "623": 2.59043288230896, - "624": 2.5701470375061035, - "625": 2.563173294067383, - "626": 2.548001289367676, - "627": 2.594208240509033, - "628": 2.5640463829040527, - "629": 2.588613510131836, - "630": 2.5525963306427, - "631": 2.5972185134887695, - "632": 2.557283401489258, - "633": 2.572479248046875, - "634": 2.5519425868988037, - "635": 2.575077533721924, - "636": 2.5363049507141113, - "637": 2.56215238571167, - "638": 2.533780574798584, - "639": 2.554736852645874, - "640": 2.5582571029663086, - "641": 2.580899715423584, - "642": 2.553868293762207, - "643": 2.5631532669067383, - "644": 2.552365779876709, - "645": 2.572369337081909, - "646": 2.542363166809082, - "647": 2.5645699501037598 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "train_epoch_time": 4.789721488952637, - "train_loss": 2.5492118271727815, - "train_score": 0.27290956780763403, - "val_loss": 2.5740746077659074, - "val_score": 0.2664860799378287 - }, - { - "epoch": 12, - "grad_norm": 0.333244651556015, - "learning_rate": 0.215, - "model_norm": 90.05404663085938, - "step_logs": { - "grad_norm": { - "648": 0.8464791178703308, - "649": 0.792945384979248, - "650": 0.7463672161102295, - "651": 0.7440354824066162, - "652": 0.6671098470687866, - "653": 0.6137361526489258, - "654": 0.6120710372924805, - "655": 0.6230190992355347, - "656": 0.6977333426475525, - "657": 0.7608842849731445, - "658": 0.7870320081710815, - "659": 0.7720927000045776, - "660": 0.7534703612327576, - "661": 0.7125488519668579, - "662": 0.6258469223976135, - "663": 0.5617893934249878, - "664": 0.5211414694786072, - "665": 0.5212739109992981, - "666": 0.5903427600860596, - "667": 0.614560067653656, - "668": 0.513554573059082, - "669": 0.48795396089553833, - "670": 0.4889700412750244, - "671": 0.4966708719730377, - "672": 0.5107088088989258, - "673": 0.5752347111701965, - "674": 0.6768213510513306, - "675": 0.6287813186645508, - "676": 0.5406306982040405, - "677": 0.5165386199951172, - "678": 0.5147174000740051, - "679": 0.5726690292358398, - "680": 0.5681961178779602, - "681": 0.5488607287406921, - "682": 0.5158001184463501, - "683": 0.49433648586273193, - "684": 0.5104917287826538, - "685": 0.5056292414665222, - "686": 0.452014684677124, - "687": 0.4608464539051056, - "688": 0.3932307958602905, - "689": 0.34603723883628845, - "690": 0.350154846906662, - "691": 0.32048991322517395, - "692": 0.32272493839263916, - "693": 0.3508996069431305, - "694": 0.34015968441963196, - "695": 0.3036709129810333, - "696": 0.21490435302257538, - "697": 0.26322346925735474, - "698": 0.28995272517204285, - "699": 0.30863478779792786, - "700": 0.3334333300590515, - "701": 0.333244651556015 - }, - "loss": { - "648": 2.5739521980285645, - "649": 2.5625720024108887, - "650": 2.528660774230957, - "651": 2.5539798736572266, - "652": 2.539076805114746, - "653": 2.5287158489227295, - "654": 2.5322656631469727, - "655": 2.5453648567199707, - "656": 2.515843391418457, - "657": 2.5385658740997314, - "658": 2.540787935256958, - "659": 2.5231783390045166, - "660": 2.5068016052246094, - "661": 2.531193733215332, - "662": 2.537111520767212, - "663": 2.515406608581543, - "664": 2.5116560459136963, - "665": 2.5265791416168213, - "666": 2.5038695335388184, - "667": 2.5283470153808594, - "668": 2.4902427196502686, - "669": 2.511512517929077, - "670": 2.5232579708099365, - "671": 2.5061254501342773, - "672": 2.4852712154388428, - "673": 2.5059726238250732, - "674": 2.4989097118377686, - "675": 2.5028374195098877, - "676": 2.4783995151519775, - "677": 2.5110583305358887, - "678": 2.505390167236328, - "679": 2.479796886444092, - "680": 2.4870598316192627, - "681": 2.50077748298645, - "682": 2.518289804458618, - "683": 2.4920525550842285, - "684": 2.497824192047119, - "685": 2.5083065032958984, - "686": 2.4897947311401367, - "687": 2.505855083465576, - "688": 2.4958443641662598, - "689": 2.481900453567505, - "690": 2.4656410217285156, - "691": 2.494762897491455, - "692": 2.4867098331451416, - "693": 2.487244129180908, - "694": 2.487565040588379, - "695": 2.4959309101104736, - "696": 2.486945629119873, - "697": 2.4767005443573, - "698": 2.4904942512512207, - "699": 2.4781301021575928, - "700": 2.491523265838623, - "701": 2.475743532180786 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "train_epoch_time": 4.789546966552734, - "train_loss": 2.4798699770288453, - "train_score": 0.2917907102463748, - "val_loss": 2.5084203365339066, - "val_score": 0.2836717860912209 - }, - { - "epoch": 13, - "grad_norm": 0.22255468368530273, - "learning_rate": 0.14333333333333334, - "model_norm": 90.06339263916016, - "step_logs": { - "grad_norm": { - "702": 0.3254527747631073, - "703": 0.32196152210235596, - "704": 0.27880069613456726, - "705": 0.2983052730560303, - "706": 0.2600209712982178, - "707": 0.24989959597587585, - "708": 0.2855205237865448, - "709": 0.2425023913383484, - "710": 0.19059450924396515, - "711": 0.2019641250371933, - "712": 0.23242764174938202, - "713": 0.2383408099412918, - "714": 0.3065362870693207, - "715": 0.28599047660827637, - "716": 0.24889793992042542, - "717": 0.23252075910568237, - "718": 0.2174641191959381, - "719": 0.23333929479122162, - "720": 0.2253527045249939, - "721": 0.2340705394744873, - "722": 0.21060937643051147, - "723": 0.19206549227237701, - "724": 0.2401646226644516, - "725": 0.22520342469215393, - "726": 0.2765664756298065, - "727": 0.2403375655412674, - "728": 0.23163574934005737, - "729": 0.2577357292175293, - "730": 0.28781864047050476, - "731": 0.28929200768470764, - "732": 0.2649271488189697, - "733": 0.19876720011234283, - "734": 0.23014399409294128, - "735": 0.22949522733688354, - "736": 0.25297853350639343, - "737": 0.2293279469013214, - "738": 0.244782954454422, - "739": 0.26518508791923523, - "740": 0.2911612391471863, - "741": 0.2876875102519989, - "742": 0.2208746075630188, - "743": 0.2016288936138153, - "744": 0.18806782364845276, - "745": 0.18521235883235931, - "746": 0.22986558079719543, - "747": 0.24523615837097168, - "748": 0.22743189334869385, - "749": 0.24057932198047638, - "750": 0.2770741879940033, - "751": 0.22961671650409698, - "752": 0.2245391607284546, - "753": 0.2719363868236542, - "754": 0.2566705644130707, - "755": 0.22255468368530273 - }, - "loss": { - "702": 2.470973014831543, - "703": 2.4694724082946777, - "704": 2.4753260612487793, - "705": 2.4650182723999023, - "706": 2.4860806465148926, - "707": 2.4775500297546387, - "708": 2.471518039703369, - "709": 2.446056365966797, - "710": 2.4666614532470703, - "711": 2.4795753955841064, - "712": 2.463106632232666, - "713": 2.4601449966430664, - "714": 2.4911739826202393, - "715": 2.475454330444336, - "716": 2.4524683952331543, - "717": 2.479682445526123, - "718": 2.475834369659424, - "719": 2.4757492542266846, - "720": 2.4753825664520264, - "721": 2.457169771194458, - "722": 2.4689249992370605, - "723": 2.4699013233184814, - "724": 2.483585834503174, - "725": 2.4806008338928223, - "726": 2.4481563568115234, - "727": 2.4571986198425293, - "728": 2.4966092109680176, - "729": 2.4555840492248535, - "730": 2.464932918548584, - "731": 2.4793448448181152, - "732": 2.4816951751708984, - "733": 2.462080717086792, - "734": 2.4785118103027344, - "735": 2.477837085723877, - "736": 2.442711114883423, - "737": 2.467097282409668, - "738": 2.464322566986084, - "739": 2.4767346382141113, - "740": 2.455671787261963, - "741": 2.4706153869628906, - "742": 2.4600114822387695, - "743": 2.462299346923828, - "744": 2.461491823196411, - "745": 2.4541378021240234, - "746": 2.469048023223877, - "747": 2.4606869220733643, - "748": 2.4661974906921387, - "749": 2.463500499725342, - "750": 2.463406562805176, - "751": 2.45944881439209, - "752": 2.4614057540893555, - "753": 2.4608306884765625, - "754": 2.4598333835601807, - "755": 2.4613046646118164 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "train_epoch_time": 4.788499593734741, - "train_loss": 2.458518195118076, - "train_score": 0.29523515953566115, - "val_loss": 2.4877452877714767, - "val_score": 0.28768567082255087 - }, - { - "epoch": 14, - "grad_norm": 0.22655870020389557, - "learning_rate": 0.07166666666666667, - "model_norm": 90.0665054321289, - "step_logs": { - "grad_norm": { - "756": 0.20486435294151306, - "757": 0.20474794507026672, - "758": 0.2312590479850769, - "759": 0.21431821584701538, - "760": 0.27988582849502563, - "761": 0.23682889342308044, - "762": 0.2059798687696457, - "763": 0.21797338128089905, - "764": 0.24932338297367096, - "765": 0.21827971935272217, - "766": 0.22083665430545807, - "767": 0.2079319953918457, - "768": 0.21095088124275208, - "769": 0.25421908497810364, - "770": 0.22950629889965057, - "771": 0.25423556566238403, - "772": 0.2013881653547287, - "773": 0.21997787058353424, - "774": 0.23208023607730865, - "775": 0.21095694601535797, - "776": 0.21547789871692657, - "777": 0.2107398808002472, - "778": 0.22477035224437714, - "779": 0.2206610143184662, - "780": 0.2200593799352646, - "781": 0.2470172494649887, - "782": 0.19685353338718414, - "783": 0.22865822911262512, - "784": 0.21985742449760437, - "785": 0.21865254640579224, - "786": 0.22141164541244507, - "787": 0.24514709413051605, - "788": 0.21211248636245728, - "789": 0.21584874391555786, - "790": 0.23548944294452667, - "791": 0.22890590131282806, - "792": 0.22032247483730316, - "793": 0.19007331132888794, - "794": 0.2175266444683075, - "795": 0.20073430240154266, - "796": 0.22327978909015656, - "797": 0.20514175295829773, - "798": 0.2098139226436615, - "799": 0.23038193583488464, - "800": 0.1975613236427307, - "801": 0.19325576722621918, - "802": 0.2310025691986084, - "803": 0.17658105492591858, - "804": 0.23319245874881744, - "805": 0.231744185090065, - "806": 0.21699704229831696, - "807": 0.2077859342098236, - "808": 0.20882068574428558, - "809": 0.22655870020389557 - }, - "loss": { - "756": 2.4470129013061523, - "757": 2.4508137702941895, - "758": 2.481356143951416, - "759": 2.4673657417297363, - "760": 2.426910638809204, - "761": 2.4617691040039062, - "762": 2.462177038192749, - "763": 2.451254367828369, - "764": 2.4547553062438965, - "765": 2.4492552280426025, - "766": 2.455984592437744, - "767": 2.457019090652466, - "768": 2.4542086124420166, - "769": 2.451037645339966, - "770": 2.4701530933380127, - "771": 2.475334644317627, - "772": 2.452157974243164, - "773": 2.4461469650268555, - "774": 2.465477466583252, - "775": 2.4510555267333984, - "776": 2.4737071990966797, - "777": 2.466977119445801, - "778": 2.432931900024414, - "779": 2.4575607776641846, - "780": 2.4424209594726562, - "781": 2.4508864879608154, - "782": 2.4603219032287598, - "783": 2.461171865463257, - "784": 2.464177131652832, - "785": 2.4428091049194336, - "786": 2.441136360168457, - "787": 2.4483089447021484, - "788": 2.4741387367248535, - "789": 2.44865083694458, - "790": 2.4641923904418945, - "791": 2.447673797607422, - "792": 2.467134475708008, - "793": 2.446427822113037, - "794": 2.445901870727539, - "795": 2.4542627334594727, - "796": 2.4518728256225586, - "797": 2.447606086730957, - "798": 2.449295997619629, - "799": 2.440582752227783, - "800": 2.471240997314453, - "801": 2.4618782997131348, - "802": 2.440150737762451, - "803": 2.450505018234253, - "804": 2.4535796642303467, - "805": 2.453066825866699, - "806": 2.4409408569335938, - "807": 2.4557628631591797, - "808": 2.443204879760742, - "809": 2.4749069213867188 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "train_epoch_time": 4.788120269775391, - "train_loss": 2.452035622822503, - "train_score": 0.29824807216310434, - "val_loss": 2.4824917954226997, - "val_score": 0.2904707235118415 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:29:51.343361", - "final_model_norm": 90.0665054321289, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:28:10.527306", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.215, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 6.077920913696289, - "learning_rate": 2.15e-11, - "model_norm": 87.36822509765625, - "step_logs": { - "grad_norm": { - "0": 22.7664794921875, - "1": 23.4499454498291, - "2": 6.6349334716796875, - "3": 7.6453537940979, - "4": 21.053424835205078, - "5": 5.934286594390869, - "6": 5.640627861022949, - "7": 4.451308250427246, - "8": 4.475229263305664, - "9": 4.164397716522217, - "10": 3.7405574321746826, - "11": 4.641786575317383, - "12": 49.5823860168457, - "13": 9.11623764038086, - "14": 9.79990291595459, - "15": 4.567177772521973, - "16": 4.766762733459473, - "17": 9.39215087890625, - "18": 4.285684108734131, - "19": 4.2459893226623535, - "20": 2.2008137702941895, - "21": 2.864260673522949, - "22": 3.6269586086273193, - "23": 7.04875373840332, - "24": 4.5517730712890625, - "25": 5.148194313049316, - "26": 6.476187705993652, - "27": 6.546270370483398, - "28": 4.287119388580322, - "29": 7.839882850646973, - "30": 4.186831951141357, - "31": 6.000692844390869, - "32": 4.175516605377197, - "33": 15.996603965759277, - "34": 9.080464363098145, - "35": 7.6884284019470215, - "36": 38.54820251464844, - "37": 14.596907615661621, - "38": 6.987175464630127, - "39": 19.94704818725586, - "40": 7.987712383270264, - "41": 5.601441383361816, - "42": 8.12180233001709, - "43": 17.737707138061523, - "44": 9.772014617919922, - "45": 5.679656982421875, - "46": 4.703907012939453, - "47": 7.149433135986328, - "48": 12.810194969177246, - "49": 5.0451154708862305, - "50": 4.432951927185059, - "51": 4.247447490692139, - "52": 3.1106362342834473, - "53": 6.077920913696289 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.8218994140625, - "3": 3.692150592803955, - "4": 4.159581184387207, - "5": 4.390280723571777, - "6": 3.900965690612793, - "7": 3.615814208984375, - "8": 3.503880500793457, - "9": 3.5965518951416016, - "10": 3.383478879928589, - "11": 3.4530043601989746, - "12": 3.501619815826416, - "13": 3.548842430114746, - "14": 6.311364650726318, - "15": 4.368919849395752, - "16": 3.7320752143859863, - "17": 4.103618144989014, - "18": 5.041207313537598, - "19": 4.1853837966918945, - "20": 3.632913112640381, - "21": 3.43510103225708, - "22": 3.862799882888794, - "23": 3.7428183555603027, - "24": 5.015302658081055, - "25": 4.486359119415283, - "26": 4.012516975402832, - "27": 5.328915596008301, - "28": 4.898715019226074, - "29": 4.456262588500977, - "30": 5.987215042114258, - "31": 4.812840461730957, - "32": 4.575559616088867, - "33": 6.3855366706848145, - "34": 9.660634994506836, - "35": 7.86287260055542, - "36": 15.256529808044434, - "37": 8.436349868774414, - "38": 6.197444438934326, - "39": 21.0192813873291, - "40": 11.476614952087402, - "41": 9.14688777923584, - "42": 6.709882736206055, - "43": 9.87257194519043, - "44": 13.530729293823242, - "45": 11.448413848876953, - "46": 8.412544250488281, - "47": 6.6663665771484375, - "48": 5.991023063659668, - "49": 11.85226058959961, - "50": 9.765589714050293, - "51": 8.009904861450195, - "52": 5.777992248535156, - "53": 4.64279842376709 - }, - "lr": { - "0": 2.15e-11, - "1": 0.00430000002107, - "2": 0.00860000002064, - "3": 0.01290000002021, - "4": 0.017200000019779997, - "5": 0.02150000001935, - "6": 0.025800000018919998, - "7": 0.03010000001849, - "8": 0.03440000001806, - "9": 0.03870000001763, - "10": 0.0430000000172, - "11": 0.04730000001677, - "12": 0.05160000001634, - "13": 0.055900000015909994, - "14": 0.060200000015479996, - "15": 0.06450000001505, - "16": 0.06880000001462, - "17": 0.07310000001419, - "18": 0.07740000001376, - "19": 0.08170000001333, - "20": 0.08600000001290001, - "21": 0.09030000001246999, - "22": 0.09460000001204, - "23": 0.09890000001161, - "24": 0.10320000001118, - "25": 0.10750000001074997, - "26": 0.11180000001031999, - "27": 0.11610000000989, - "28": 0.12040000000946, - "29": 0.12470000000902998, - "30": 0.12900000000859999, - "31": 0.13330000000817, - "32": 0.13760000000774, - "33": 0.14190000000730998, - "34": 0.14620000000687997, - "35": 0.15050000000645, - "36": 0.15480000000602, - "37": 0.15910000000559, - "38": 0.16340000000516, - "39": 0.16770000000472998, - "40": 0.17200000000430002, - "41": 0.17630000000387, - "42": 0.18060000000343998, - "43": 0.18490000000301, - "44": 0.18920000000258, - "45": 0.19350000000214998, - "46": 0.19780000000172002, - "47": 0.20210000000129, - "48": 0.20640000000086, - "49": 0.21070000000043, - "50": 0.215, - "51": 0.215, - "52": 0.215, - "53": 0.215 - } - }, - "train_epoch_time": 4.789698362350464, - "train_loss": 6.529137916783863, - "train_score": 0.15261163918068968, - "val_loss": 6.497304850139246, - "val_score": 0.15114631179621793 - }, - { - "epoch": 1, - "grad_norm": 1.4856921434402466, - "learning_rate": 0.215, - "model_norm": 87.27825927734375, - "step_logs": { - "grad_norm": { - "54": 11.869610786437988, - "55": 4.589770793914795, - "56": 3.871187448501587, - "57": 3.0290331840515137, - "58": 2.436267852783203, - "59": 2.2911205291748047, - "60": 1.804243803024292, - "61": 2.240053415298462, - "62": 7.117811679840088, - "63": 2.5063371658325195, - "64": 2.2628278732299805, - "65": 1.9130676984786987, - "66": 1.442577838897705, - "67": 4.810521125793457, - "68": 1.9549905061721802, - "69": 1.8040640354156494, - "70": 1.366626262664795, - "71": 4.335608005523682, - "72": 1.8876115083694458, - "73": 1.7165403366088867, - "74": 0.7243217825889587, - "75": 2.042759656906128, - "76": 1.7447822093963623, - "77": 0.6170192360877991, - "78": 0.7040872573852539, - "79": 1.5890698432922363, - "80": 1.6113613843917847, - "81": 1.5932040214538574, - "82": 1.6757556200027466, - "83": 1.8084766864776611, - "84": 1.5848227739334106, - "85": 0.8979315161705017, - "86": 1.2454639673233032, - "87": 2.7386019229888916, - "88": 1.6894149780273438, - "89": 1.1755759716033936, - "90": 2.214646339416504, - "91": 1.549574613571167, - "92": 0.48825210332870483, - "93": 0.8731581568717957, - "94": 1.1210391521453857, - "95": 2.0358450412750244, - "96": 1.5302635431289673, - "97": 0.4016644060611725, - "98": 0.30150073766708374, - "99": 0.37550848722457886, - "100": 0.6430637240409851, - "101": 0.8840423226356506, - "102": 1.7102267742156982, - "103": 1.507495403289795, - "104": 0.7491609454154968, - "105": 0.9745044112205505, - "106": 1.8706775903701782, - "107": 1.4856921434402466 - }, - "loss": { - "54": 6.523248672485352, - "55": 9.864269256591797, - "56": 8.262983322143555, - "57": 6.586701393127441, - "58": 5.057945251464844, - "59": 4.078909873962402, - "60": 3.469339609146118, - "61": 3.7730026245117188, - "62": 4.014268398284912, - "63": 5.833746910095215, - "64": 5.0863566398620605, - "65": 4.142653465270996, - "66": 3.459670066833496, - "67": 3.709359645843506, - "68": 4.762309551239014, - "69": 4.044641494750977, - "70": 3.460516929626465, - "71": 3.6896283626556396, - "72": 4.4954023361206055, - "73": 3.896549701690674, - "74": 3.3746585845947266, - "75": 3.424943447113037, - "76": 3.7681851387023926, - "77": 3.3631649017333984, - "78": 3.356832504272461, - "79": 3.42862868309021, - "80": 3.6133546829223633, - "81": 3.4071061611175537, - "82": 3.5914642810821533, - "83": 3.437199115753174, - "84": 3.650853395462036, - "85": 3.3495874404907227, - "86": 3.4265880584716797, - "87": 3.49777889251709, - "88": 3.8932418823242188, - "89": 3.388164758682251, - "90": 3.4002089500427246, - "91": 3.7453536987304688, - "92": 3.344038724899292, - "93": 3.35149884223938, - "94": 3.4281797409057617, - "95": 3.447147846221924, - "96": 3.7067110538482666, - "97": 3.359945058822632, - "98": 3.2945384979248047, - "99": 3.339402198791504, - "100": 3.316775321960449, - "101": 3.402829170227051, - "102": 3.37821626663208, - "103": 3.588418960571289, - "104": 3.3727869987487793, - "105": 3.369229793548584, - "106": 3.4336323738098145, - "107": 3.6263248920440674 - }, - "lr": { - "54": 0.215, - "55": 0.215, - "56": 0.215, - "57": 0.215, - "58": 0.215, - "59": 0.215, - "60": 0.215, - "61": 0.215, - "62": 0.215, - "63": 0.215, - "64": 0.215, - "65": 0.215, - "66": 0.215, - "67": 0.215, - "68": 0.215, - "69": 0.215, - "70": 0.215, - "71": 0.215, - "72": 0.215, - "73": 0.215, - "74": 0.215, - "75": 0.215, - "76": 0.215, - "77": 0.215, - "78": 0.215, - "79": 0.215, - "80": 0.215, - "81": 0.215, - "82": 0.215, - "83": 0.215, - "84": 0.215, - "85": 0.215, - "86": 0.215, - "87": 0.215, - "88": 0.215, - "89": 0.215, - "90": 0.215, - "91": 0.215, - "92": 0.215, - "93": 0.215, - "94": 0.215, - "95": 0.215, - "96": 0.215, - "97": 0.215, - "98": 0.215, - "99": 0.215, - "100": 0.215, - "101": 0.215, - "102": 0.215, - "103": 0.215, - "104": 0.215, - "105": 0.215, - "106": 0.215, - "107": 0.215 - } - }, - "train_epoch_time": 4.786825180053711, - "train_loss": 3.3327819862529915, - "train_score": 0.15260939743033097, - "val_loss": 3.3497142756163183, - "val_score": 0.151137342221017 - }, - { - "epoch": 2, - "grad_norm": 0.8111432790756226, - "learning_rate": 0.215, - "model_norm": 87.2788314819336, - "step_logs": { - "grad_norm": { - "108": 0.35535019636154175, - "109": 0.41004061698913574, - "110": 0.7887808084487915, - "111": 1.0005075931549072, - "112": 1.6472957134246826, - "113": 1.4260789155960083, - "114": 0.6731420755386353, - "115": 0.8426632285118103, - "116": 1.4507036209106445, - "117": 1.3046455383300781, - "118": 0.8133808374404907, - "119": 0.9756836891174316, - "120": 1.4995774030685425, - "121": 1.268728494644165, - "122": 0.5787860751152039, - "123": 0.7328672409057617, - "124": 1.1708590984344482, - "125": 1.1593525409698486, - "126": 1.08345365524292, - "127": 1.107581615447998, - "128": 1.1313707828521729, - "129": 1.1563996076583862, - "130": 1.1990396976470947, - "131": 1.165459394454956, - "132": 1.0536150932312012, - "133": 1.0777044296264648, - "134": 1.1467459201812744, - "135": 1.1163371801376343, - "136": 1.0541691780090332, - "137": 1.0202151536941528, - "138": 0.8965868353843689, - "139": 0.9138199090957642, - "140": 0.9571571946144104, - "141": 0.9860230088233948, - "142": 1.0593401193618774, - "143": 0.9975009560585022, - "144": 0.8001709580421448, - "145": 0.8473645448684692, - "146": 0.9612659811973572, - "147": 0.9572203159332275, - "148": 0.948639452457428, - "149": 0.9353222250938416, - "150": 0.8946100473403931, - "151": 0.866615891456604, - "152": 0.8099010586738586, - "153": 0.8457209467887878, - "154": 0.9313750267028809, - "155": 0.9235068559646606, - "156": 0.8846747279167175, - "157": 0.8261868953704834, - "158": 0.6901465058326721, - "159": 0.7314639091491699, - "160": 0.8012805581092834, - "161": 0.8111432790756226 - }, - "loss": { - "108": 3.322842836380005, - "109": 3.31815242767334, - "110": 3.356515407562256, - "111": 3.3720946311950684, - "112": 3.3815221786499023, - "113": 3.5712978839874268, - "114": 3.3611392974853516, - "115": 3.365431785583496, - "116": 3.387702703475952, - "117": 3.51013445854187, - "118": 3.331129550933838, - "119": 3.3873703479766846, - "120": 3.3873519897460938, - "121": 3.5078282356262207, - "122": 3.356292247772217, - "123": 3.340384006500244, - "124": 3.3751261234283447, - "125": 3.46814227104187, - "126": 3.3612375259399414, - "127": 3.417555570602417, - "128": 3.380253314971924, - "129": 3.452554225921631, - "130": 3.3590869903564453, - "131": 3.4244987964630127, - "132": 3.348438262939453, - "133": 3.4079668521881104, - "134": 3.361523151397705, - "135": 3.4007935523986816, - "136": 3.3805172443389893, - "137": 3.425121784210205, - "138": 3.351166248321533, - "139": 3.390328884124756, - "140": 3.319578170776367, - "141": 3.360882043838501, - "142": 3.3449981212615967, - "143": 3.430695056915283, - "144": 3.33786940574646, - "145": 3.371558666229248, - "146": 3.346402406692505, - "147": 3.3841986656188965, - "148": 3.3241076469421387, - "149": 3.3841922283172607, - "150": 3.3525712490081787, - "151": 3.3927950859069824, - "152": 3.3303329944610596, - "153": 3.368870973587036, - "154": 3.353760242462158, - "155": 3.4009079933166504, - "156": 3.3530325889587402, - "157": 3.388096570968628, - "158": 3.3189611434936523, - "159": 3.330489158630371, - "160": 3.362576961517334, - "161": 3.3486876487731934 - }, - "lr": { - "108": 0.215, - "109": 0.215, - "110": 0.215, - "111": 0.215, - "112": 0.215, - "113": 0.215, - "114": 0.215, - "115": 0.215, - "116": 0.215, - "117": 0.215, - "118": 0.215, - "119": 0.215, - "120": 0.215, - "121": 0.215, - "122": 0.215, - "123": 0.215, - "124": 0.215, - "125": 0.215, - "126": 0.215, - "127": 0.215, - "128": 0.215, - "129": 0.215, - "130": 0.215, - "131": 0.215, - "132": 0.215, - "133": 0.215, - "134": 0.215, - "135": 0.215, - "136": 0.215, - "137": 0.215, - "138": 0.215, - "139": 0.215, - "140": 0.215, - "141": 0.215, - "142": 0.215, - "143": 0.215, - "144": 0.215, - "145": 0.215, - "146": 0.215, - "147": 0.215, - "148": 0.215, - "149": 0.215, - "150": 0.215, - "151": 0.215, - "152": 0.215, - "153": 0.215, - "154": 0.215, - "155": 0.215, - "156": 0.215, - "157": 0.215, - "158": 0.215, - "159": 0.215, - "160": 0.215, - "161": 0.215 - } - }, - "train_epoch_time": 4.787140846252441, - "train_loss": 3.3408212109651934, - "train_score": 0.1526071556799723, - "val_loss": 3.3590463866596245, - "val_score": 0.15113285743341653 - }, - { - "epoch": 3, - "grad_norm": 0.6169420480728149, - "learning_rate": 0.215, - "model_norm": 87.28771209716797, - "step_logs": { - "grad_norm": { - "162": 0.8387348651885986, - "163": 0.8527382016181946, - "164": 0.858231246471405, - "165": 0.8544841408729553, - "166": 0.8558735847473145, - "167": 0.8337991833686829, - "168": 0.7569968700408936, - "169": 0.7420404553413391, - "170": 0.7194749712944031, - "171": 0.709821343421936, - "172": 0.6832669973373413, - "173": 0.7073488235473633, - "174": 0.7284060120582581, - "175": 0.7330428957939148, - "176": 0.7531108260154724, - "177": 0.7508716583251953, - "178": 0.729932963848114, - "179": 0.7399073839187622, - "180": 0.7604870200157166, - "181": 0.7344769835472107, - "182": 0.6657344698905945, - "183": 0.6793492436408997, - "184": 1.359244465827942, - "185": 0.7296868562698364, - "186": 0.7522320747375488, - "187": 0.7540530562400818, - "188": 0.7392151951789856, - "189": 0.7214963436126709, - "190": 0.7019885778427124, - "191": 0.6850538849830627, - "192": 0.6184571385383606, - "193": 0.6071142554283142, - "194": 0.644235372543335, - "195": 0.5772275924682617, - "196": 0.5074628591537476, - "197": 0.50543612241745, - "198": 0.55255526304245, - "199": 0.559185802936554, - "200": 0.5517669320106506, - "201": 0.5493614077568054, - "202": 0.560684323310852, - "203": 0.58799147605896, - "204": 0.5828995108604431, - "205": 0.5954332947731018, - "206": 0.6386681795120239, - "207": 0.6341294050216675, - "208": 2.3786919116973877, - "209": 0.5914960503578186, - "210": 0.6234338283538818, - "211": 0.6651337146759033, - "212": 0.7441654205322266, - "213": 0.6836565732955933, - "214": 0.5970624089241028, - "215": 0.6169420480728149 - }, - "loss": { - "162": 3.3511502742767334, - "163": 3.3699984550476074, - "164": 3.310316562652588, - "165": 3.377857208251953, - "166": 3.3292713165283203, - "167": 3.403754711151123, - "168": 3.349243640899658, - "169": 3.3550338745117188, - "170": 3.331726312637329, - "171": 3.3729991912841797, - "172": 3.3522112369537354, - "173": 3.3064165115356445, - "174": 3.3293087482452393, - "175": 3.3332948684692383, - "176": 3.3127431869506836, - "177": 3.356052875518799, - "178": 3.3308143615722656, - "179": 3.335836887359619, - "180": 3.331758499145508, - "181": 3.370661973953247, - "182": 3.3110573291778564, - "183": 3.3385353088378906, - "184": 3.3235414028167725, - "185": 3.3248844146728516, - "186": 3.3646793365478516, - "187": 3.3493258953094482, - "188": 3.339038848876953, - "189": 3.359717607498169, - "190": 3.3311684131622314, - "191": 3.3507838249206543, - "192": 3.3271584510803223, - "193": 3.356938362121582, - "194": 3.3362069129943848, - "195": 3.3590471744537354, - "196": 3.3082900047302246, - "197": 3.328854560852051, - "198": 3.3504397869110107, - "199": 3.314671516418457, - "200": 3.3251729011535645, - "201": 3.328035831451416, - "202": 3.3087635040283203, - "203": 3.354644298553467, - "204": 3.3108410835266113, - "205": 3.290799379348755, - "206": 3.3224592208862305, - "207": 3.345951557159424, - "208": 3.299952507019043, - "209": 3.3060054779052734, - "210": 3.3326518535614014, - "211": 3.305673122406006, - "212": 3.3199119567871094, - "213": 3.316722869873047, - "214": 3.3202450275421143, - "215": 3.31882643699646 - }, - "lr": { - "162": 0.215, - "163": 0.215, - "164": 0.215, - "165": 0.215, - "166": 0.215, - "167": 0.215, - "168": 0.215, - "169": 0.215, - "170": 0.215, - "171": 0.215, - "172": 0.215, - "173": 0.215, - "174": 0.215, - "175": 0.215, - "176": 0.215, - "177": 0.215, - "178": 0.215, - "179": 0.215, - "180": 0.215, - "181": 0.215, - "182": 0.215, - "183": 0.215, - "184": 0.215, - "185": 0.215, - "186": 0.215, - "187": 0.215, - "188": 0.215, - "189": 0.215, - "190": 0.215, - "191": 0.215, - "192": 0.215, - "193": 0.215, - "194": 0.215, - "195": 0.215, - "196": 0.215, - "197": 0.215, - "198": 0.215, - "199": 0.215, - "200": 0.215, - "201": 0.215, - "202": 0.215, - "203": 0.215, - "204": 0.215, - "205": 0.215, - "206": 0.215, - "207": 0.215, - "208": 0.215, - "209": 0.215, - "210": 0.215, - "211": 0.215, - "212": 0.215, - "213": 0.215, - "214": 0.215, - "215": 0.215 - } - }, - "train_epoch_time": 4.787456750869751, - "train_loss": 3.313899508234076, - "train_score": 0.15282348453399955, - "val_loss": 3.3315236439797964, - "val_score": 0.15133018813916108 - }, - { - "epoch": 4, - "grad_norm": 0.670421838760376, - "learning_rate": 0.215, - "model_norm": 87.3119125366211, - "step_logs": { - "grad_norm": { - "216": 0.6451330780982971, - "217": 0.6348541378974915, - "218": 0.6159845590591431, - "219": 0.6299844980239868, - "220": 0.6784602403640747, - "221": 0.6624956130981445, - "222": 0.5949603915214539, - "223": 0.574101984500885, - "224": 0.5231589674949646, - "225": 0.541008472442627, - "226": 0.5974878668785095, - "227": 0.586403489112854, - "228": 0.5651665329933167, - "229": 0.5600197911262512, - "230": 0.6042607426643372, - "231": 0.7281671762466431, - "232": 0.5431760549545288, - "233": 0.539922297000885, - "234": 0.5826413631439209, - "235": 0.7212216258049011, - "236": 0.6261200904846191, - "237": 0.6035612225532532, - "238": 0.6246703267097473, - "239": 0.6139011979103088, - "240": 0.6408724188804626, - "241": 0.6072050929069519, - "242": 0.537537157535553, - "243": 0.5610190033912659, - "244": 0.5322518348693848, - "245": 0.5885132551193237, - "246": 0.4615285098552704, - "247": 0.43787333369255066, - "248": 0.40506094694137573, - "249": 0.3916375935077667, - "250": 0.41418108344078064, - "251": 0.405314564704895, - "252": 0.4265989065170288, - "253": 0.44390663504600525, - "254": 0.43615999817848206, - "255": 0.46891316771507263, - "256": 0.42196711897850037, - "257": 0.5520937442779541, - "258": 0.4568729102611542, - "259": 0.4163910448551178, - "260": 0.4757370948791504, - "261": 0.532852828502655, - "262": 0.5347477793693542, - "263": 0.5436654686927795, - "264": 0.5505630373954773, - "265": 0.6072302460670471, - "266": 0.6586366295814514, - "267": 0.7452479004859924, - "268": 0.7439396977424622, - "269": 0.670421838760376 - }, - "loss": { - "216": 3.317852020263672, - "217": 3.3241753578186035, - "218": 3.280233860015869, - "219": 3.3255691528320312, - "220": 3.330249547958374, - "221": 3.3232483863830566, - "222": 3.3002681732177734, - "223": 3.3094522953033447, - "224": 3.2819571495056152, - "225": 3.2664742469787598, - "226": 3.294806480407715, - "227": 3.289361000061035, - "228": 3.2745213508605957, - "229": 3.252540349960327, - "230": 3.2487754821777344, - "231": 3.2643327713012695, - "232": 3.265249490737915, - "233": 3.2404260635375977, - "234": 3.244673490524292, - "235": 3.247864246368408, - "236": 3.2547900676727295, - "237": 3.2230396270751953, - "238": 3.237241744995117, - "239": 3.2245707511901855, - "240": 3.2629575729370117, - "241": 3.2289626598358154, - "242": 3.2242543697357178, - "243": 3.218991279602051, - "244": 3.202751874923706, - "245": 3.196383476257324, - "246": 3.228240489959717, - "247": 3.188025712966919, - "248": 3.221277952194214, - "249": 3.206367254257202, - "250": 3.1918442249298096, - "251": 3.1852059364318848, - "252": 3.2247707843780518, - "253": 3.1700127124786377, - "254": 3.1840429306030273, - "255": 3.1898441314697266, - "256": 3.1908950805664062, - "257": 3.1921744346618652, - "258": 3.18982195854187, - "259": 3.17565655708313, - "260": 3.1605029106140137, - "261": 3.164581775665283, - "262": 3.1729331016540527, - "263": 3.1780571937561035, - "264": 3.1998534202575684, - "265": 3.162940740585327, - "266": 3.182992696762085, - "267": 3.195838212966919, - "268": 3.1946492195129395, - "269": 3.1778159141540527 - }, - "lr": { - "216": 0.215, - "217": 0.215, - "218": 0.215, - "219": 0.215, - "220": 0.215, - "221": 0.215, - "222": 0.215, - "223": 0.215, - "224": 0.215, - "225": 0.215, - "226": 0.215, - "227": 0.215, - "228": 0.215, - "229": 0.215, - "230": 0.215, - "231": 0.215, - "232": 0.215, - "233": 0.215, - "234": 0.215, - "235": 0.215, - "236": 0.215, - "237": 0.215, - "238": 0.215, - "239": 0.215, - "240": 0.215, - "241": 0.215, - "242": 0.215, - "243": 0.215, - "244": 0.215, - "245": 0.215, - "246": 0.215, - "247": 0.215, - "248": 0.215, - "249": 0.215, - "250": 0.215, - "251": 0.215, - "252": 0.215, - "253": 0.215, - "254": 0.215, - "255": 0.215, - "256": 0.215, - "257": 0.215, - "258": 0.215, - "259": 0.215, - "260": 0.215, - "261": 0.215, - "262": 0.215, - "263": 0.215, - "264": 0.215, - "265": 0.215, - "266": 0.215, - "267": 0.215, - "268": 0.215, - "269": 0.215 - } - }, - "train_epoch_time": 4.786958932876587, - "train_loss": 3.161458277668125, - "train_score": 0.16830501256662941, - "val_loss": 3.1793044029503, - "val_score": 0.16916170339907352 - }, - { - "epoch": 5, - "grad_norm": 0.37339940667152405, - "learning_rate": 0.215, - "model_norm": 87.32972717285156, - "step_logs": { - "grad_norm": { - "270": 0.6625267863273621, - "271": 0.6968127489089966, - "272": 0.6935476660728455, - "273": 0.71907639503479, - "274": 0.6943307518959045, - "275": 0.7740764617919922, - "276": 0.7148994207382202, - "277": 0.6267349720001221, - "278": 0.5778032541275024, - "279": 0.6179059147834778, - "280": 0.6499062180519104, - "281": 0.6469667553901672, - "282": 0.6240212917327881, - "283": 0.6231783032417297, - "284": 0.6064325571060181, - "285": 0.569333016872406, - "286": 0.5686502456665039, - "287": 0.597974419593811, - "288": 0.675082266330719, - "289": 0.6956878900527954, - "290": 0.723746657371521, - "291": 2.3303587436676025, - "292": 0.7077440619468689, - "293": 0.6254588961601257, - "294": 0.6035510301589966, - "295": 0.606689989566803, - "296": 0.5411074757575989, - "297": 0.5205525159835815, - "298": 0.5259056687355042, - "299": 0.5359814763069153, - "300": 0.5090657472610474, - "301": 0.4810810089111328, - "302": 0.47699999809265137, - "303": 0.4793867766857147, - "304": 0.46788397431373596, - "305": 0.49155518412590027, - "306": 0.4893914759159088, - "307": 0.4411744773387909, - "308": 0.3958076536655426, - "309": 0.38335585594177246, - "310": 0.3874962329864502, - "311": 0.39907410740852356, - "312": 0.36685293912887573, - "313": 0.36812588572502136, - "314": 0.37550675868988037, - "315": 0.3682321012020111, - "316": 0.29608356952667236, - "317": 0.3181905150413513, - "318": 0.3577767014503479, - "319": 0.34146150946617126, - "320": 0.31448543071746826, - "321": 0.3274773955345154, - "322": 0.32901275157928467, - "323": 0.37339940667152405 - }, - "loss": { - "270": 3.1653189659118652, - "271": 3.1713690757751465, - "272": 3.168196678161621, - "273": 3.178093194961548, - "274": 3.163637638092041, - "275": 3.1799283027648926, - "276": 3.146660804748535, - "277": 3.1673648357391357, - "278": 3.1741108894348145, - "279": 3.1767468452453613, - "280": 3.129758358001709, - "281": 3.155074119567871, - "282": 3.169614315032959, - "283": 3.1428141593933105, - "284": 3.1488687992095947, - "285": 3.1413421630859375, - "286": 3.1328420639038086, - "287": 3.135307788848877, - "288": 3.145845413208008, - "289": 3.144890785217285, - "290": 3.13344407081604, - "291": 3.157616138458252, - "292": 3.139730453491211, - "293": 3.1229124069213867, - "294": 3.156095266342163, - "295": 3.1524593830108643, - "296": 3.1093201637268066, - "297": 3.1507599353790283, - "298": 3.1365773677825928, - "299": 3.1154351234436035, - "300": 3.119210958480835, - "301": 3.1349916458129883, - "302": 3.118986129760742, - "303": 3.114738702774048, - "304": 3.1073477268218994, - "305": 3.1248555183410645, - "306": 3.148867130279541, - "307": 3.1083621978759766, - "308": 3.0962090492248535, - "309": 3.102027416229248, - "310": 3.0883021354675293, - "311": 3.1197428703308105, - "312": 3.0965030193328857, - "313": 3.0827300548553467, - "314": 3.1131014823913574, - "315": 3.1007065773010254, - "316": 3.0781543254852295, - "317": 3.0970852375030518, - "318": 3.0916857719421387, - "319": 3.0961599349975586, - "320": 3.0929479598999023, - "321": 3.0957860946655273, - "322": 3.0800256729125977, - "323": 3.0998568534851074 - }, - "lr": { - "270": 0.215, - "271": 0.215, - "272": 0.215, - "273": 0.215, - "274": 0.215, - "275": 0.215, - "276": 0.215, - "277": 0.215, - "278": 0.215, - "279": 0.215, - "280": 0.215, - "281": 0.215, - "282": 0.215, - "283": 0.215, - "284": 0.215, - "285": 0.215, - "286": 0.215, - "287": 0.215, - "288": 0.215, - "289": 0.215, - "290": 0.215, - "291": 0.215, - "292": 0.215, - "293": 0.215, - "294": 0.215, - "295": 0.215, - "296": 0.215, - "297": 0.215, - "298": 0.215, - "299": 0.215, - "300": 0.215, - "301": 0.215, - "302": 0.215, - "303": 0.215, - "304": 0.215, - "305": 0.215, - "306": 0.215, - "307": 0.215, - "308": 0.215, - "309": 0.215, - "310": 0.215, - "311": 0.215, - "312": 0.215, - "313": 0.215, - "314": 0.215, - "315": 0.215, - "316": 0.215, - "317": 0.215, - "318": 0.215, - "319": 0.215, - "320": 0.215, - "321": 0.215, - "322": 0.215, - "323": 0.215 - } - }, - "train_epoch_time": 4.78779411315918, - "train_loss": 3.0909181351299093, - "train_score": 0.16923197631856463, - "val_loss": 3.1075858760509645, - "val_score": 0.1697133325305516 - }, - { - "epoch": 6, - "grad_norm": 0.6102023124694824, - "learning_rate": 0.215, - "model_norm": 87.34899139404297, - "step_logs": { - "grad_norm": { - "324": 0.4444461464881897, - "325": 0.4435453414916992, - "326": 0.41333329677581787, - "327": 0.4126593768596649, - "328": 0.41375643014907837, - "329": 0.4180811941623688, - "330": 0.40402382612228394, - "331": 0.3396933078765869, - "332": 0.3120470345020294, - "333": 0.289199560880661, - "334": 0.30204156041145325, - "335": 0.37522488832473755, - "336": 0.36297503113746643, - "337": 0.34802213311195374, - "338": 0.3163204789161682, - "339": 0.39584481716156006, - "340": 0.4930034279823303, - "341": 0.5016665458679199, - "342": 0.5021680593490601, - "343": 0.5064347982406616, - "344": 0.5202027559280396, - "345": 0.5447956323623657, - "346": 0.5895446538925171, - "347": 0.5841565728187561, - "348": 0.5752105116844177, - "349": 0.5648969411849976, - "350": 0.5351980924606323, - "351": 0.4993586242198944, - "352": 0.44063395261764526, - "353": 0.42633867263793945, - "354": 0.421481192111969, - "355": 0.44679000973701477, - "356": 0.48287343978881836, - "357": 0.5100218653678894, - "358": 0.5513139963150024, - "359": 0.5653921365737915, - "360": 0.5791242122650146, - "361": 0.5630341172218323, - "362": 0.559499979019165, - "363": 0.5815654993057251, - "364": 0.6149219274520874, - "365": 0.5965008735656738, - "366": 0.5366994738578796, - "367": 0.5196714997291565, - "368": 0.543583333492279, - "369": 0.5505325198173523, - "370": 0.5447538495063782, - "371": 0.5405067801475525, - "372": 0.5433398485183716, - "373": 0.5742666721343994, - "374": 0.591201663017273, - "375": 0.566830039024353, - "376": 0.5739427804946899, - "377": 0.6102023124694824 - }, - "loss": { - "324": 3.1055169105529785, - "325": 3.1025147438049316, - "326": 3.094877004623413, - "327": 3.0828020572662354, - "328": 3.089111804962158, - "329": 3.076788902282715, - "330": 3.093977451324463, - "331": 3.090378761291504, - "332": 3.0599541664123535, - "333": 3.074714183807373, - "334": 3.0523428916931152, - "335": 3.0880205631256104, - "336": 3.055826425552368, - "337": 3.0632164478302, - "338": 3.0672097206115723, - "339": 3.062894105911255, - "340": 3.0573248863220215, - "341": 3.0769617557525635, - "342": 3.0491747856140137, - "343": 3.07951021194458, - "344": 3.061558485031128, - "345": 3.0667731761932373, - "346": 3.0706470012664795, - "347": 3.071831226348877, - "348": 3.061471462249756, - "349": 3.0732688903808594, - "350": 3.054048538208008, - "351": 3.064836025238037, - "352": 3.049079656600952, - "353": 3.054973602294922, - "354": 3.0346391201019287, - "355": 3.032949686050415, - "356": 3.04341721534729, - "357": 3.034526824951172, - "358": 3.0418925285339355, - "359": 3.0504884719848633, - "360": 3.0313611030578613, - "361": 3.0420703887939453, - "362": 3.0334463119506836, - "363": 3.057742118835449, - "364": 3.040750503540039, - "365": 3.0545032024383545, - "366": 3.0207877159118652, - "367": 3.045848846435547, - "368": 3.0533385276794434, - "369": 3.048614501953125, - "370": 3.0379717350006104, - "371": 3.0152246952056885, - "372": 3.030602216720581, - "373": 3.0384011268615723, - "374": 3.0256872177124023, - "375": 3.020007610321045, - "376": 3.0139355659484863, - "377": 3.010154962539673 - }, - "lr": { - "324": 0.215, - "325": 0.215, - "326": 0.215, - "327": 0.215, - "328": 0.215, - "329": 0.215, - "330": 0.215, - "331": 0.215, - "332": 0.215, - "333": 0.215, - "334": 0.215, - "335": 0.215, - "336": 0.215, - "337": 0.215, - "338": 0.215, - "339": 0.215, - "340": 0.215, - "341": 0.215, - "342": 0.215, - "343": 0.215, - "344": 0.215, - "345": 0.215, - "346": 0.215, - "347": 0.215, - "348": 0.215, - "349": 0.215, - "350": 0.215, - "351": 0.215, - "352": 0.215, - "353": 0.215, - "354": 0.215, - "355": 0.215, - "356": 0.215, - "357": 0.215, - "358": 0.215, - "359": 0.215, - "360": 0.215, - "361": 0.215, - "362": 0.215, - "363": 0.215, - "364": 0.215, - "365": 0.215, - "366": 0.215, - "367": 0.215, - "368": 0.215, - "369": 0.215, - "370": 0.215, - "371": 0.215, - "372": 0.215, - "373": 0.215, - "374": 0.215, - "375": 0.215, - "376": 0.215, - "377": 0.215 - } - }, - "train_epoch_time": 4.787391901016235, - "train_loss": 3.0113053153542912, - "train_score": 0.20032841648313202, - "val_loss": 3.025356713173447, - "val_score": 0.19772083085590608 - }, - { - "epoch": 7, - "grad_norm": 0.6647846102714539, - "learning_rate": 0.215, - "model_norm": 87.36872100830078, - "step_logs": { - "grad_norm": { - "378": 0.6336457133293152, - "379": 0.6626040935516357, - "380": 0.7448855638504028, - "381": 0.7292468547821045, - "382": 0.6713274717330933, - "383": 0.6519176363945007, - "384": 0.6270025372505188, - "385": 0.6080090403556824, - "386": 0.5886038541793823, - "387": 0.6071902513504028, - "388": 0.5993844866752625, - "389": 0.586733341217041, - "390": 0.6157594919204712, - "391": 0.6306374669075012, - "392": 0.647163450717926, - "393": 0.613325297832489, - "394": 0.5771270990371704, - "395": 0.5612251162528992, - "396": 0.5745760798454285, - "397": 0.5865729451179504, - "398": 0.6132022738456726, - "399": 0.6045475602149963, - "400": 0.6059160232543945, - "401": 0.5982393622398376, - "402": 0.6250435709953308, - "403": 0.6244946122169495, - "404": 0.5974777340888977, - "405": 0.6061265468597412, - "406": 0.6592593789100647, - "407": 0.6593686938285828, - "408": 0.6627476811408997, - "409": 0.7173672318458557, - "410": 0.7597169280052185, - "411": 0.7351342439651489, - "412": 0.7842375040054321, - "413": 0.8148161172866821, - "414": 0.7393429279327393, - "415": 0.6865562200546265, - "416": 0.7091619372367859, - "417": 0.7017120122909546, - "418": 0.7307785749435425, - "419": 0.6986539959907532, - "420": 0.7098849415779114, - "421": 0.7611108422279358, - "422": 0.7074168920516968, - "423": 0.6444043517112732, - "424": 0.6126922965049744, - "425": 0.6457280516624451, - "426": 0.6926257014274597, - "427": 0.7298979163169861, - "428": 0.7551694512367249, - "429": 0.7180377840995789, - "430": 0.6376101970672607, - "431": 0.6647846102714539 - }, - "loss": { - "378": 3.012711524963379, - "379": 3.0218725204467773, - "380": 3.0113372802734375, - "381": 3.0132715702056885, - "382": 2.9926042556762695, - "383": 3.015521764755249, - "384": 2.9965782165527344, - "385": 2.985593318939209, - "386": 2.9920315742492676, - "387": 2.9951610565185547, - "388": 2.9654879570007324, - "389": 2.964205265045166, - "390": 2.97655987739563, - "391": 2.9747135639190674, - "392": 2.9611973762512207, - "393": 2.963017463684082, - "394": 2.9406800270080566, - "395": 2.9470438957214355, - "396": 2.9419054985046387, - "397": 2.952143669128418, - "398": 2.9493112564086914, - "399": 2.942966938018799, - "400": 2.9240810871124268, - "401": 2.9421606063842773, - "402": 2.938958168029785, - "403": 2.942342758178711, - "404": 2.924873113632202, - "405": 2.9249110221862793, - "406": 2.9123377799987793, - "407": 2.928295850753784, - "408": 2.9285576343536377, - "409": 2.948155403137207, - "410": 2.9168591499328613, - "411": 2.93835186958313, - "412": 2.934396743774414, - "413": 2.952394485473633, - "414": 2.9147510528564453, - "415": 2.9214444160461426, - "416": 2.915121555328369, - "417": 2.9249324798583984, - "418": 2.9088242053985596, - "419": 2.9207215309143066, - "420": 2.908618450164795, - "421": 2.9163992404937744, - "422": 2.902296781539917, - "423": 2.9183013439178467, - "424": 2.8893632888793945, - "425": 2.884275436401367, - "426": 2.89687442779541, - "427": 2.9012069702148438, - "428": 2.883091449737549, - "429": 2.9020495414733887, - "430": 2.883294105529785, - "431": 2.9085745811462402 - }, - "lr": { - "378": 0.215, - "379": 0.215, - "380": 0.215, - "381": 0.215, - "382": 0.215, - "383": 0.215, - "384": 0.215, - "385": 0.215, - "386": 0.215, - "387": 0.215, - "388": 0.215, - "389": 0.215, - "390": 0.215, - "391": 0.215, - "392": 0.215, - "393": 0.215, - "394": 0.215, - "395": 0.215, - "396": 0.215, - "397": 0.215, - "398": 0.215, - "399": 0.215, - "400": 0.215, - "401": 0.215, - "402": 0.215, - "403": 0.215, - "404": 0.215, - "405": 0.215, - "406": 0.215, - "407": 0.215, - "408": 0.215, - "409": 0.215, - "410": 0.215, - "411": 0.215, - "412": 0.215, - "413": 0.215, - "414": 0.215, - "415": 0.215, - "416": 0.215, - "417": 0.215, - "418": 0.215, - "419": 0.215, - "420": 0.215, - "421": 0.215, - "422": 0.215, - "423": 0.215, - "424": 0.215, - "425": 0.215, - "426": 0.215, - "427": 0.215, - "428": 0.215, - "429": 0.215, - "430": 0.215, - "431": 0.215 - } - }, - "train_epoch_time": 4.788618803024292, - "train_loss": 2.8893617737412964, - "train_score": 0.19945077111936538, - "val_loss": 2.895953709441403, - "val_score": 0.19649648409760231 - }, - { - "epoch": 8, - "grad_norm": 0.8645181655883789, - "learning_rate": 0.215, - "model_norm": 87.3841552734375, - "step_logs": { - "grad_norm": { - "432": 0.7320230603218079, - "433": 0.7574577331542969, - "434": 0.757548987865448, - "435": 0.7315129637718201, - "436": 0.6912124156951904, - "437": 0.6866511702537537, - "438": 0.6837176084518433, - "439": 0.658955454826355, - "440": 0.6572275757789612, - "441": 0.6956648230552673, - "442": 0.761847198009491, - "443": 0.8084657192230225, - "444": 0.7601466774940491, - "445": 0.7445797324180603, - "446": 0.7134843468666077, - "447": 0.6999015212059021, - "448": 0.6812036037445068, - "449": 0.7135583162307739, - "450": 0.8219456076622009, - "451": 0.8512318134307861, - "452": 0.8112087845802307, - "453": 0.7589200139045715, - "454": 0.6968483328819275, - "455": 0.71698397397995, - "456": 0.7640736699104309, - "457": 0.7552897334098816, - "458": 0.7140202522277832, - "459": 0.7089266180992126, - "460": 0.7449092268943787, - "461": 0.8189528584480286, - "462": 0.7386939525604248, - "463": 0.6635098457336426, - "464": 0.7072518467903137, - "465": 0.7312489151954651, - "466": 0.7184119820594788, - "467": 0.7435293197631836, - "468": 0.7844306230545044, - "469": 0.7601557970046997, - "470": 0.6535776853561401, - "471": 0.6401973962783813, - "472": 0.7259147763252258, - "473": 0.7636087536811829, - "474": 0.8167770504951477, - "475": 0.8350787162780762, - "476": 0.9137019515037537, - "477": 0.8558284044265747, - "478": 0.7120906114578247, - "479": 0.6557267308235168, - "480": 0.6116313338279724, - "481": 0.6060966849327087, - "482": 0.6451256275177002, - "483": 0.7006394863128662, - "484": 0.8169694542884827, - "485": 0.8645181655883789 - }, - "loss": { - "432": 2.895479679107666, - "433": 2.8825843334198, - "434": 2.895631790161133, - "435": 2.913215160369873, - "436": 2.8721237182617188, - "437": 2.9042539596557617, - "438": 2.8725738525390625, - "439": 2.895151138305664, - "440": 2.8678359985351562, - "441": 2.8971800804138184, - "442": 2.88449764251709, - "443": 2.8970119953155518, - "444": 2.9025702476501465, - "445": 2.892518997192383, - "446": 2.8612895011901855, - "447": 2.884847402572632, - "448": 2.870945453643799, - "449": 2.8805625438690186, - "450": 2.8783106803894043, - "451": 2.8899383544921875, - "452": 2.8653080463409424, - "453": 2.8676810264587402, - "454": 2.857863187789917, - "455": 2.874101400375366, - "456": 2.863027811050415, - "457": 2.876707077026367, - "458": 2.862279176712036, - "459": 2.854330062866211, - "460": 2.8601717948913574, - "461": 2.892487049102783, - "462": 2.8549087047576904, - "463": 2.857929229736328, - "464": 2.8356876373291016, - "465": 2.8624653816223145, - "466": 2.846660852432251, - "467": 2.8655991554260254, - "468": 2.877556800842285, - "469": 2.8712148666381836, - "470": 2.844513416290283, - "471": 2.837743043899536, - "472": 2.8294878005981445, - "473": 2.8706116676330566, - "474": 2.847562789916992, - "475": 2.862053394317627, - "476": 2.851762533187866, - "477": 2.846886157989502, - "478": 2.8304171562194824, - "479": 2.844226360321045, - "480": 2.8177504539489746, - "481": 2.815992593765259, - "482": 2.8315796852111816, - "483": 2.8190722465515137, - "484": 2.810741901397705, - "485": 2.8421401977539062 - }, - "lr": { - "432": 0.215, - "433": 0.215, - "434": 0.215, - "435": 0.215, - "436": 0.215, - "437": 0.215, - "438": 0.215, - "439": 0.215, - "440": 0.215, - "441": 0.215, - "442": 0.215, - "443": 0.215, - "444": 0.215, - "445": 0.215, - "446": 0.215, - "447": 0.215, - "448": 0.215, - "449": 0.215, - "450": 0.215, - "451": 0.215, - "452": 0.215, - "453": 0.215, - "454": 0.215, - "455": 0.215, - "456": 0.215, - "457": 0.215, - "458": 0.215, - "459": 0.215, - "460": 0.215, - "461": 0.215, - "462": 0.215, - "463": 0.215, - "464": 0.215, - "465": 0.215, - "466": 0.215, - "467": 0.215, - "468": 0.215, - "469": 0.215, - "470": 0.215, - "471": 0.215, - "472": 0.215, - "473": 0.215, - "474": 0.215, - "475": 0.215, - "476": 0.215, - "477": 0.215, - "478": 0.215, - "479": 0.215, - "480": 0.215, - "481": 0.215, - "482": 0.215, - "483": 0.215, - "484": 0.215, - "485": 0.215 - } - }, - "train_epoch_time": 4.788419485092163, - "train_loss": 2.8321788505981096, - "train_score": 0.2014952474935154, - "val_loss": 2.844468199974086, - "val_score": 0.19819173386190023 - }, - { - "epoch": 9, - "grad_norm": 0.7967838644981384, - "learning_rate": 0.215, - "model_norm": 87.40331268310547, - "step_logs": { - "grad_norm": { - "486": 0.863577663898468, - "487": 0.910273015499115, - "488": 1.0217578411102295, - "489": 1.0543659925460815, - "490": 0.9987280368804932, - "491": 0.8875438570976257, - "492": 0.763034999370575, - "493": 0.7586895823478699, - "494": 0.8020071387290955, - "495": 0.7677304148674011, - "496": 0.6512123942375183, - "497": 0.6617212891578674, - "498": 0.7435228228569031, - "499": 0.7903182506561279, - "500": 0.9293562769889832, - "501": 0.923592209815979, - "502": 0.8962411880493164, - "503": 0.9309093952178955, - "504": 1.055930256843567, - "505": 1.0557267665863037, - "506": 0.9419136643409729, - "507": 0.8568878769874573, - "508": 0.7049791216850281, - "509": 0.671477735042572, - "510": 0.6625528335571289, - "511": 0.749578595161438, - "512": 0.8767014145851135, - "513": 0.8458848595619202, - "514": 0.8194774985313416, - "515": 0.8010562062263489, - "516": 0.7702640891075134, - "517": 0.7862281203269958, - "518": 0.7974769473075867, - "519": 0.8601404428482056, - "520": 0.891409158706665, - "521": 0.9281553030014038, - "522": 0.954971194267273, - "523": 0.9194952249526978, - "524": 0.9138132333755493, - "525": 0.9203317165374756, - "526": 0.8772714138031006, - "527": 0.7488911151885986, - "528": 0.6080586314201355, - "529": 0.5554305911064148, - "530": 0.4985148012638092, - "531": 0.5306365489959717, - "532": 0.5902933478355408, - "533": 0.6542091369628906, - "534": 0.8878393769264221, - "535": 0.9390087127685547, - "536": 0.8490703701972961, - "537": 0.7996656894683838, - "538": 0.7763608694076538, - "539": 0.7967838644981384 - }, - "loss": { - "486": 2.8398008346557617, - "487": 2.846127986907959, - "488": 2.8413970470428467, - "489": 2.855076551437378, - "490": 2.822486400604248, - "491": 2.8407158851623535, - "492": 2.81491756439209, - "493": 2.825246810913086, - "494": 2.803802013397217, - "495": 2.813525438308716, - "496": 2.8082432746887207, - "497": 2.804034471511841, - "498": 2.803358554840088, - "499": 2.811717987060547, - "500": 2.8228657245635986, - "501": 2.836787462234497, - "502": 2.824258327484131, - "503": 2.825194835662842, - "504": 2.8211379051208496, - "505": 2.8480353355407715, - "506": 2.819782257080078, - "507": 2.8179264068603516, - "508": 2.7950034141540527, - "509": 2.804069995880127, - "510": 2.771759033203125, - "511": 2.792710781097412, - "512": 2.8026788234710693, - "513": 2.805750846862793, - "514": 2.7909491062164307, - "515": 2.7906928062438965, - "516": 2.7877557277679443, - "517": 2.7926082611083984, - "518": 2.776503562927246, - "519": 2.7887425422668457, - "520": 2.765568733215332, - "521": 2.8041157722473145, - "522": 2.7904062271118164, - "523": 2.8032431602478027, - "524": 2.8205058574676514, - "525": 2.802842617034912, - "526": 2.7756612300872803, - "527": 2.7845804691314697, - "528": 2.759321689605713, - "529": 2.74727201461792, - "530": 2.7412500381469727, - "531": 2.7346391677856445, - "532": 2.7556943893432617, - "533": 2.7593154907226562, - "534": 2.766733169555664, - "535": 2.7949352264404297, - "536": 2.7737178802490234, - "537": 2.7629737854003906, - "538": 2.752772092819214, - "539": 2.7690720558166504 - }, - "lr": { - "486": 0.215, - "487": 0.215, - "488": 0.215, - "489": 0.215, - "490": 0.215, - "491": 0.215, - "492": 0.215, - "493": 0.215, - "494": 0.215, - "495": 0.215, - "496": 0.215, - "497": 0.215, - "498": 0.215, - "499": 0.215, - "500": 0.215, - "501": 0.215, - "502": 0.215, - "503": 0.215, - "504": 0.215, - "505": 0.215, - "506": 0.215, - "507": 0.215, - "508": 0.215, - "509": 0.215, - "510": 0.215, - "511": 0.215, - "512": 0.215, - "513": 0.215, - "514": 0.215, - "515": 0.215, - "516": 0.215, - "517": 0.215, - "518": 0.215, - "519": 0.215, - "520": 0.215, - "521": 0.215, - "522": 0.215, - "523": 0.215, - "524": 0.215, - "525": 0.215, - "526": 0.215, - "527": 0.215, - "528": 0.215, - "529": 0.215, - "530": 0.215, - "531": 0.215, - "532": 0.215, - "533": 0.215, - "534": 0.215, - "535": 0.215, - "536": 0.215, - "537": 0.215, - "538": 0.215, - "539": 0.215 - } - }, - "train_epoch_time": 4.788261651992798, - "train_loss": 2.75392352334054, - "train_score": 0.21601394363164558, - "val_loss": 2.7689981208739955, - "val_score": 0.21132319180207193 - }, - { - "epoch": 10, - "grad_norm": 0.6906720399856567, - "learning_rate": 0.215, - "model_norm": 87.4227523803711, - "step_logs": { - "grad_norm": { - "540": 0.7674277424812317, - "541": 0.8204943537712097, - "542": 0.8144788146018982, - "543": 0.7926110029220581, - "544": 0.7511706352233887, - "545": 0.8522761464118958, - "546": 0.8249024152755737, - "547": 0.7127631306648254, - "548": 0.6826196908950806, - "549": 0.632779598236084, - "550": 0.6577969789505005, - "551": 0.7654299736022949, - "552": 0.8071280121803284, - "553": 0.843177855014801, - "554": 0.8526422381401062, - "555": 0.8966861963272095, - "556": 0.7629060745239258, - "557": 0.5559282898902893, - "558": 0.5548087954521179, - "559": 0.5962457060813904, - "560": 0.6299737691879272, - "561": 0.6545103192329407, - "562": 0.7233629822731018, - "563": 0.794723629951477, - "564": 0.9696587920188904, - "565": 1.0049703121185303, - "566": 0.8969777226448059, - "567": 0.7934767007827759, - "568": 0.6555570363998413, - "569": 0.6655522584915161, - "570": 0.7455049157142639, - "571": 0.7585878372192383, - "572": 0.711405336856842, - "573": 0.7046390771865845, - "574": 0.7649972438812256, - "575": 0.7420599460601807, - "576": 0.6338107585906982, - "577": 0.5890849232673645, - "578": 0.6227665543556213, - "579": 0.626175045967102, - "580": 0.6935070753097534, - "581": 0.7580548524856567, - "582": 0.869880735874176, - "583": 0.883039653301239, - "584": 0.8479851484298706, - "585": 0.8208180665969849, - "586": 0.822525680065155, - "587": 0.8165813088417053, - "588": 0.8054198622703552, - "589": 0.79904705286026, - "590": 0.7519515156745911, - "591": 0.7321115136146545, - "592": 0.7165383696556091, - "593": 0.6906720399856567 - }, - "loss": { - "540": 2.7537412643432617, - "541": 2.7477316856384277, - "542": 2.7607250213623047, - "543": 2.744685411453247, - "544": 2.7443807125091553, - "545": 2.771444797515869, - "546": 2.768183708190918, - "547": 2.7401552200317383, - "548": 2.7484447956085205, - "549": 2.750321865081787, - "550": 2.7463276386260986, - "551": 2.7468857765197754, - "552": 2.7507147789001465, - "553": 2.7330169677734375, - "554": 2.744373083114624, - "555": 2.7623205184936523, - "556": 2.750678539276123, - "557": 2.715502977371216, - "558": 2.698390007019043, - "559": 2.7112960815429688, - "560": 2.7187483310699463, - "561": 2.7352845668792725, - "562": 2.7246944904327393, - "563": 2.7442188262939453, - "564": 2.714545726776123, - "565": 2.746157169342041, - "566": 2.750765323638916, - "567": 2.7406365871429443, - "568": 2.7229270935058594, - "569": 2.7272002696990967, - "570": 2.7348685264587402, - "571": 2.717965841293335, - "572": 2.722810745239258, - "573": 2.681318759918213, - "574": 2.7119460105895996, - "575": 2.7244019508361816, - "576": 2.69827938079834, - "577": 2.6981313228607178, - "578": 2.7124061584472656, - "579": 2.708868980407715, - "580": 2.71474289894104, - "581": 2.7171144485473633, - "582": 2.705875873565674, - "583": 2.7219858169555664, - "584": 2.7225890159606934, - "585": 2.7190616130828857, - "586": 2.727053165435791, - "587": 2.735574960708618, - "588": 2.714120864868164, - "589": 2.7138586044311523, - "590": 2.7019567489624023, - "591": 2.7239129543304443, - "592": 2.6868321895599365, - "593": 2.704176902770996 - }, - "lr": { - "540": 0.215, - "541": 0.215, - "542": 0.215, - "543": 0.215, - "544": 0.215, - "545": 0.215, - "546": 0.215, - "547": 0.215, - "548": 0.215, - "549": 0.215, - "550": 0.215, - "551": 0.215, - "552": 0.215, - "553": 0.215, - "554": 0.215, - "555": 0.215, - "556": 0.215, - "557": 0.215, - "558": 0.215, - "559": 0.215, - "560": 0.215, - "561": 0.215, - "562": 0.215, - "563": 0.215, - "564": 0.215, - "565": 0.215, - "566": 0.215, - "567": 0.215, - "568": 0.215, - "569": 0.215, - "570": 0.215, - "571": 0.215, - "572": 0.215, - "573": 0.215, - "574": 0.215, - "575": 0.215, - "576": 0.215, - "577": 0.215, - "578": 0.215, - "579": 0.215, - "580": 0.215, - "581": 0.215, - "582": 0.215, - "583": 0.215, - "584": 0.215, - "585": 0.215, - "586": 0.215, - "587": 0.215, - "588": 0.215, - "589": 0.215, - "590": 0.215, - "591": 0.215, - "592": 0.215, - "593": 0.215 - } - }, - "train_epoch_time": 4.788338899612427, - "train_loss": 2.6928715892638504, - "train_score": 0.23654053084648494, - "val_loss": 2.7124274439433957, - "val_score": 0.2324241178043948 - }, - { - "epoch": 11, - "grad_norm": 0.8027198910713196, - "learning_rate": 0.215, - "model_norm": 87.44013977050781, - "step_logs": { - "grad_norm": { - "594": 0.6872090101242065, - "595": 0.7225258350372314, - "596": 0.8049277663230896, - "597": 0.8187502026557922, - "598": 0.7759734392166138, - "599": 0.7756792902946472, - "600": 0.8381021618843079, - "601": 0.8250980973243713, - "602": 0.7214074730873108, - "603": 0.6632557511329651, - "604": 0.6125983595848083, - "605": 0.6200022101402283, - "606": 0.6515315175056458, - "607": 0.6949056386947632, - "608": 0.7261806726455688, - "609": 0.7122187614440918, - "610": 0.6823997497558594, - "611": 0.7193889021873474, - "612": 0.8079313635826111, - "613": 0.9209291338920593, - "614": 1.0537763833999634, - "615": 1.1957826614379883, - "616": 0.9810210466384888, - "617": 0.7510979771614075, - "618": 0.6561508178710938, - "619": 0.6144815683364868, - "620": 0.6295700073242188, - "621": 0.6692794561386108, - "622": 0.6943836212158203, - "623": 0.6960968375205994, - "624": 0.7200412154197693, - "625": 0.7527978420257568, - "626": 0.7561212778091431, - "627": 0.7079261541366577, - "628": 0.6200481653213501, - "629": 0.5895781517028809, - "630": 0.623550295829773, - "631": 0.6208155155181885, - "632": 0.6302371025085449, - "633": 0.7330285310745239, - "634": 0.9005295634269714, - "635": 0.9371844530105591, - "636": 0.839468777179718, - "637": 0.7934845089912415, - "638": 0.7483586668968201, - "639": 0.6826823949813843, - "640": 0.6983129382133484, - "641": 0.7323094606399536, - "642": 0.762589156627655, - "643": 0.8058868050575256, - "644": 0.8357001543045044, - "645": 0.839078962802887, - "646": 0.8175479173660278, - "647": 0.8027198910713196 - }, - "loss": { - "594": 2.6956052780151367, - "595": 2.692734718322754, - "596": 2.703125, - "597": 2.6973822116851807, - "598": 2.7055504322052, - "599": 2.6921238899230957, - "600": 2.693716526031494, - "601": 2.7087883949279785, - "602": 2.6817100048065186, - "603": 2.7040693759918213, - "604": 2.674466609954834, - "605": 2.675713539123535, - "606": 2.698209762573242, - "607": 2.6889586448669434, - "608": 2.6803107261657715, - "609": 2.6847589015960693, - "610": 2.6696243286132812, - "611": 2.693406820297241, - "612": 2.672724485397339, - "613": 2.711531162261963, - "614": 2.7127370834350586, - "615": 2.7407872676849365, - "616": 2.722634792327881, - "617": 2.689072608947754, - "618": 2.661468982696533, - "619": 2.6796717643737793, - "620": 2.6591899394989014, - "621": 2.6647849082946777, - "622": 2.6759605407714844, - "623": 2.6636757850646973, - "624": 2.6850664615631104, - "625": 2.6810436248779297, - "626": 2.675769090652466, - "627": 2.649951696395874, - "628": 2.6543140411376953, - "629": 2.6440024375915527, - "630": 2.6621458530426025, - "631": 2.649712085723877, - "632": 2.6524407863616943, - "633": 2.672440528869629, - "634": 2.6835622787475586, - "635": 2.693869113922119, - "636": 2.6719155311584473, - "637": 2.679391860961914, - "638": 2.6672911643981934, - "639": 2.655522346496582, - "640": 2.65548038482666, - "641": 2.6493778228759766, - "642": 2.656399726867676, - "643": 2.6746835708618164, - "644": 2.662984609603882, - "645": 2.693105697631836, - "646": 2.6630825996398926, - "647": 2.689990282058716 - }, - "lr": { - "594": 0.215, - "595": 0.215, - "596": 0.215, - "597": 0.215, - "598": 0.215, - "599": 0.215, - "600": 0.215, - "601": 0.215, - "602": 0.215, - "603": 0.215, - "604": 0.215, - "605": 0.215, - "606": 0.215, - "607": 0.215, - "608": 0.215, - "609": 0.215, - "610": 0.215, - "611": 0.215, - "612": 0.215, - "613": 0.215, - "614": 0.215, - "615": 0.215, - "616": 0.215, - "617": 0.215, - "618": 0.215, - "619": 0.215, - "620": 0.215, - "621": 0.215, - "622": 0.215, - "623": 0.215, - "624": 0.215, - "625": 0.215, - "626": 0.215, - "627": 0.215, - "628": 0.215, - "629": 0.215, - "630": 0.215, - "631": 0.215, - "632": 0.215, - "633": 0.215, - "634": 0.215, - "635": 0.215, - "636": 0.215, - "637": 0.215, - "638": 0.215, - "639": 0.215, - "640": 0.215, - "641": 0.215, - "642": 0.215, - "643": 0.215, - "644": 0.215, - "645": 0.215, - "646": 0.215, - "647": 0.215 - } - }, - "train_epoch_time": 4.788145542144775, - "train_loss": 2.6561296804392525, - "train_score": 0.24334872664731408, - "val_loss": 2.672862677021224, - "val_score": 0.2390571186483659 - }, - { - "epoch": 12, - "grad_norm": 0.2774239182472229, - "learning_rate": 0.215, - "model_norm": 87.45559692382812, - "step_logs": { - "grad_norm": { - "648": 0.792896568775177, - "649": 0.8353447914123535, - "650": 0.7810977101325989, - "651": 0.6959315538406372, - "652": 0.6664336323738098, - "653": 0.6596155762672424, - "654": 0.6479871869087219, - "655": 0.6403710246086121, - "656": 0.5676006078720093, - "657": 0.5141499042510986, - "658": 0.49931102991104126, - "659": 0.4865494966506958, - "660": 0.4632185399532318, - "661": 0.4731646478176117, - "662": 0.47062548995018005, - "663": 0.4806637763977051, - "664": 0.5314803123474121, - "665": 0.6604889631271362, - "666": 0.7280536890029907, - "667": 0.763297438621521, - "668": 0.7442384958267212, - "669": 0.641446590423584, - "670": 0.5107265710830688, - "671": 0.4029940366744995, - "672": 0.36072438955307007, - "673": 0.30386051535606384, - "674": 0.24650147557258606, - "675": 0.2413417249917984, - "676": 0.24129937589168549, - "677": 0.19265297055244446, - "678": 0.1772371083498001, - "679": 0.19798628985881805, - "680": 0.28178197145462036, - "681": 0.2590637505054474, - "682": 0.1860135942697525, - "683": 0.2082049697637558, - "684": 0.20826393365859985, - "685": 0.1825958788394928, - "686": 0.19578436017036438, - "687": 0.1916915327310562, - "688": 0.1876634657382965, - "689": 0.23662607371807098, - "690": 0.2560403645038605, - "691": 0.2715053856372833, - "692": 0.29553651809692383, - "693": 0.32233282923698425, - "694": 0.33564308285713196, - "695": 0.3652053773403168, - "696": 0.2813124358654022, - "697": 0.2436148226261139, - "698": 0.27020028233528137, - "699": 0.31399574875831604, - "700": 0.2893792986869812, - "701": 0.2774239182472229 - }, - "loss": { - "648": 2.6537842750549316, - "649": 2.6676998138427734, - "650": 2.6535425186157227, - "651": 2.6724400520324707, - "652": 2.645719528198242, - "653": 2.6452810764312744, - "654": 2.6345176696777344, - "655": 2.6461164951324463, - "656": 2.640970230102539, - "657": 2.6327147483825684, - "658": 2.6243534088134766, - "659": 2.6190435886383057, - "660": 2.604731559753418, - "661": 2.6417384147644043, - "662": 2.6303091049194336, - "663": 2.6267900466918945, - "664": 2.619058609008789, - "665": 2.635082721710205, - "666": 2.6518149375915527, - "667": 2.616220474243164, - "668": 2.634359836578369, - "669": 2.648690700531006, - "670": 2.636586904525757, - "671": 2.617892026901245, - "672": 2.610780715942383, - "673": 2.6191556453704834, - "674": 2.6143531799316406, - "675": 2.604020118713379, - "676": 2.628568410873413, - "677": 2.61314058303833, - "678": 2.6149210929870605, - "679": 2.6063547134399414, - "680": 2.604421615600586, - "681": 2.6073555946350098, - "682": 2.617856979370117, - "683": 2.6121232509613037, - "684": 2.6046979427337646, - "685": 2.6048662662506104, - "686": 2.6013407707214355, - "687": 2.6054189205169678, - "688": 2.5942869186401367, - "689": 2.6175761222839355, - "690": 2.5908968448638916, - "691": 2.615455389022827, - "692": 2.5798377990722656, - "693": 2.596287488937378, - "694": 2.6051957607269287, - "695": 2.6023013591766357, - "696": 2.6002156734466553, - "697": 2.595587730407715, - "698": 2.585690498352051, - "699": 2.6131420135498047, - "700": 2.5900957584381104, - "701": 2.5907397270202637 - }, - "lr": { - "648": 0.215, - "649": 0.21367283950617283, - "650": 0.21234567901234566, - "651": 0.21101851851851852, - "652": 0.20969135802469135, - "653": 0.2083641975308642, - "654": 0.20703703703703705, - "655": 0.20570987654320988, - "656": 0.2043827160493827, - "657": 0.20305555555555554, - "658": 0.20172839506172838, - "659": 0.20040123456790124, - "660": 0.19907407407407407, - "661": 0.1977469135802469, - "662": 0.19641975308641976, - "663": 0.1950925925925926, - "664": 0.19376543209876543, - "665": 0.19243827160493826, - "666": 0.1911111111111111, - "667": 0.18978395061728395, - "668": 0.18845679012345679, - "669": 0.18712962962962962, - "670": 0.18580246913580248, - "671": 0.1844753086419753, - "672": 0.18314814814814814, - "673": 0.18182098765432098, - "674": 0.1804938271604938, - "675": 0.17916666666666667, - "676": 0.1778395061728395, - "677": 0.17651234567901233, - "678": 0.1751851851851852, - "679": 0.17385802469135803, - "680": 0.17253086419753086, - "681": 0.1712037037037037, - "682": 0.16987654320987652, - "683": 0.16854938271604938, - "684": 0.16722222222222222, - "685": 0.16589506172839505, - "686": 0.1645679012345679, - "687": 0.16324074074074074, - "688": 0.16191358024691357, - "689": 0.1605864197530864, - "690": 0.15925925925925924, - "691": 0.15793209876543207, - "692": 0.15660493827160493, - "693": 0.15527777777777776, - "694": 0.15395061728395062, - "695": 0.15262345679012346, - "696": 0.1512962962962963, - "697": 0.14996913580246912, - "698": 0.14864197530864195, - "699": 0.1473148148148148, - "700": 0.14598765432098765, - "701": 0.14466049382716048 - } - }, - "train_epoch_time": 4.788262128829956, - "train_loss": 2.594737240984245, - "train_score": 0.2596384056500417, - "val_loss": 2.6130455312170473, - "val_score": 0.25239936085300246 - }, - { - "epoch": 13, - "grad_norm": 0.19743777811527252, - "learning_rate": 0.14333333333333334, - "model_norm": 87.4638671875, - "step_logs": { - "grad_norm": { - "702": 0.288577675819397, - "703": 0.206546813249588, - "704": 0.18744145333766937, - "705": 0.23894193768501282, - "706": 0.2635490894317627, - "707": 0.1706453412771225, - "708": 0.19992195069789886, - "709": 0.19615544378757477, - "710": 0.19003179669380188, - "711": 0.20042403042316437, - "712": 0.22111228108406067, - "713": 0.20645873248577118, - "714": 0.19862057268619537, - "715": 0.21230727434158325, - "716": 0.20010048151016235, - "717": 0.208111971616745, - "718": 0.2040364295244217, - "719": 0.20175381004810333, - "720": 0.20806914567947388, - "721": 0.2026674598455429, - "722": 0.19035674631595612, - "723": 0.18032985925674438, - "724": 0.1910620927810669, - "725": 0.17998524010181427, - "726": 0.17583727836608887, - "727": 0.17799758911132812, - "728": 0.20407570898532867, - "729": 0.18521158397197723, - "730": 0.201987624168396, - "731": 0.2337084859609604, - "732": 0.2262936383485794, - "733": 0.2119879424571991, - "734": 0.21688628196716309, - "735": 0.2242150902748108, - "736": 0.2115195095539093, - "737": 0.2281603366136551, - "738": 0.2723083794116974, - "739": 0.204091876745224, - "740": 0.19272255897521973, - "741": 0.19437508285045624, - "742": 0.20794957876205444, - "743": 0.20709531009197235, - "744": 0.19182224571704865, - "745": 0.20327045023441315, - "746": 0.18197189271450043, - "747": 0.16160893440246582, - "748": 0.1971614509820938, - "749": 0.19469018280506134, - "750": 0.1888684332370758, - "751": 0.1756402999162674, - "752": 0.17552946507930756, - "753": 0.16428831219673157, - "754": 0.1855579912662506, - "755": 0.19743777811527252 - }, - "loss": { - "702": 2.586320161819458, - "703": 2.5773162841796875, - "704": 2.5759706497192383, - "705": 2.5901947021484375, - "706": 2.598660945892334, - "707": 2.5948023796081543, - "708": 2.5947155952453613, - "709": 2.58949613571167, - "710": 2.6068930625915527, - "711": 2.598865509033203, - "712": 2.5606284141540527, - "713": 2.5946717262268066, - "714": 2.5841267108917236, - "715": 2.60235595703125, - "716": 2.5846023559570312, - "717": 2.576630115509033, - "718": 2.5817763805389404, - "719": 2.5823283195495605, - "720": 2.590935230255127, - "721": 2.5704972743988037, - "722": 2.593195915222168, - "723": 2.5623183250427246, - "724": 2.576021909713745, - "725": 2.60878849029541, - "726": 2.5802509784698486, - "727": 2.5956945419311523, - "728": 2.5693695545196533, - "729": 2.5748777389526367, - "730": 2.5698094367980957, - "731": 2.6033058166503906, - "732": 2.5717451572418213, - "733": 2.589519500732422, - "734": 2.5744495391845703, - "735": 2.5993423461914062, - "736": 2.5652925968170166, - "737": 2.573065757751465, - "738": 2.590214252471924, - "739": 2.5813217163085938, - "740": 2.5754079818725586, - "741": 2.5944643020629883, - "742": 2.575998067855835, - "743": 2.5622103214263916, - "744": 2.573355197906494, - "745": 2.567068099975586, - "746": 2.5831942558288574, - "747": 2.585444927215576, - "748": 2.5840260982513428, - "749": 2.5696730613708496, - "750": 2.5842604637145996, - "751": 2.579193115234375, - "752": 2.580641269683838, - "753": 2.5773744583129883, - "754": 2.5695557594299316, - "755": 2.584674119949341 - }, - "lr": { - "702": 0.14333333333333334, - "703": 0.14200617283950617, - "704": 0.140679012345679, - "705": 0.13935185185185184, - "706": 0.13802469135802467, - "707": 0.1366975308641975, - "708": 0.13537037037037036, - "709": 0.1340432098765432, - "710": 0.13271604938271606, - "711": 0.1313888888888889, - "712": 0.13006172839506172, - "713": 0.12873456790123455, - "714": 0.12740740740740739, - "715": 0.12608024691358022, - "716": 0.12475308641975309, - "717": 0.12342592592592593, - "718": 0.12209876543209879, - "719": 0.12077160493827162, - "720": 0.11944444444444445, - "721": 0.11811728395061728, - "722": 0.11679012345679012, - "723": 0.11546296296296295, - "724": 0.11413580246913581, - "725": 0.11280864197530864, - "726": 0.1114814814814815, - "727": 0.11015432098765433, - "728": 0.10882716049382717, - "729": 0.1075, - "730": 0.10617283950617283, - "731": 0.10484567901234566, - "732": 0.10351851851851852, - "733": 0.10219135802469136, - "734": 0.10086419753086419, - "735": 0.09953703703703702, - "736": 0.09820987654320988, - "737": 0.09688271604938271, - "738": 0.09555555555555555, - "739": 0.09422839506172838, - "740": 0.09290123456790124, - "741": 0.09157407407407407, - "742": 0.0902469135802469, - "743": 0.08891975308641974, - "744": 0.0875925925925926, - "745": 0.08626543209876543, - "746": 0.08493827160493826, - "747": 0.0836111111111111, - "748": 0.08228395061728395, - "749": 0.08095679012345679, - "750": 0.07962962962962962, - "751": 0.07830246913580245, - "752": 0.07697530864197531, - "753": 0.07564814814814814, - "754": 0.07432098765432098, - "755": 0.07299382716049381 - } - }, - "train_epoch_time": 4.789228916168213, - "train_loss": 2.5746010681820053, - "train_score": 0.2640905218452771, - "val_loss": 2.591059433743272, - "val_score": 0.2571577215577924 - }, - { - "epoch": 14, - "grad_norm": 0.17980313301086426, - "learning_rate": 0.07166666666666667, - "model_norm": 87.4664306640625, - "step_logs": { - "grad_norm": { - "756": 0.19651539623737335, - "757": 0.22051754593849182, - "758": 0.18168343603610992, - "759": 0.19114398956298828, - "760": 0.20848040282726288, - "761": 0.19059225916862488, - "762": 0.17042526602745056, - "763": 0.19235539436340332, - "764": 0.22388316690921783, - "765": 0.18128003180027008, - "766": 0.21614381670951843, - "767": 0.1869804412126541, - "768": 0.17486269772052765, - "769": 0.20444579422473907, - "770": 0.20636223256587982, - "771": 0.15299025177955627, - "772": 0.1582631766796112, - "773": 0.17599335312843323, - "774": 0.1699964851140976, - "775": 0.17469920217990875, - "776": 0.17793191969394684, - "777": 0.17099779844284058, - "778": 0.18882369995117188, - "779": 0.2096959948539734, - "780": 0.17221367359161377, - "781": 0.19251389801502228, - "782": 0.1936895102262497, - "783": 0.16879186034202576, - "784": 0.17370820045471191, - "785": 0.17811767756938934, - "786": 0.193150594830513, - "787": 0.18296052515506744, - "788": 0.21088339388370514, - "789": 0.1723606288433075, - "790": 0.20569881796836853, - "791": 0.16170746088027954, - "792": 0.1840723603963852, - "793": 0.19184309244155884, - "794": 0.19387155771255493, - "795": 0.16084368526935577, - "796": 0.17537648975849152, - "797": 0.18694652616977692, - "798": 0.1754896193742752, - "799": 0.2051302045583725, - "800": 0.18033966422080994, - "801": 0.18328621983528137, - "802": 0.18514332175254822, - "803": 0.19138571619987488, - "804": 0.17341695725917816, - "805": 0.17652909457683563, - "806": 0.17719390988349915, - "807": 0.17969363927841187, - "808": 0.17272068560123444, - "809": 0.17980313301086426 - }, - "loss": { - "756": 2.5785164833068848, - "757": 2.574859619140625, - "758": 2.5759048461914062, - "759": 2.5679521560668945, - "760": 2.5693013668060303, - "761": 2.585184097290039, - "762": 2.581094264984131, - "763": 2.564300298690796, - "764": 2.5730648040771484, - "765": 2.566831350326538, - "766": 2.581516981124878, - "767": 2.5767414569854736, - "768": 2.5858826637268066, - "769": 2.5594544410705566, - "770": 2.60379695892334, - "771": 2.560962677001953, - "772": 2.5692503452301025, - "773": 2.5719165802001953, - "774": 2.5704259872436523, - "775": 2.555741548538208, - "776": 2.579489231109619, - "777": 2.5771191120147705, - "778": 2.5696277618408203, - "779": 2.577171802520752, - "780": 2.5648751258850098, - "781": 2.5556445121765137, - "782": 2.574836254119873, - "783": 2.550807476043701, - "784": 2.579025983810425, - "785": 2.5787100791931152, - "786": 2.5677437782287598, - "787": 2.57653546333313, - "788": 2.599581718444824, - "789": 2.561877965927124, - "790": 2.552694797515869, - "791": 2.579023838043213, - "792": 2.5785627365112305, - "793": 2.5573389530181885, - "794": 2.5599212646484375, - "795": 2.567988634109497, - "796": 2.5702061653137207, - "797": 2.5582666397094727, - "798": 2.569242238998413, - "799": 2.5669472217559814, - "800": 2.5579676628112793, - "801": 2.5632500648498535, - "802": 2.5691051483154297, - "803": 2.575640916824341, - "804": 2.578709125518799, - "805": 2.578014850616455, - "806": 2.5717475414276123, - "807": 2.572329044342041, - "808": 2.578866958618164, - "809": 2.557891845703125 - }, - "lr": { - "756": 0.07166666666666667, - "757": 0.0703395061728395, - "758": 0.06901234567901234, - "759": 0.06768518518518517, - "760": 0.06635802469135803, - "761": 0.06503086419753086, - "762": 0.06370370370370369, - "763": 0.06237654320987653, - "764": 0.06104938271604939, - "765": 0.059722222222222225, - "766": 0.05839506172839506, - "767": 0.05706790123456789, - "768": 0.05574074074074075, - "769": 0.05441358024691358, - "770": 0.053086419753086415, - "771": 0.05175925925925925, - "772": 0.05043209876543211, - "773": 0.04910493827160494, - "774": 0.04777777777777777, - "775": 0.046450617283950606, - "776": 0.045123456790123466, - "777": 0.0437962962962963, - "778": 0.04246913580246913, - "779": 0.04114197530864196, - "780": 0.039814814814814824, - "781": 0.038487654320987656, - "782": 0.03716049382716049, - "783": 0.03583333333333333, - "784": 0.03450617283950618, - "785": 0.033179012345679014, - "786": 0.031851851851851846, - "787": 0.030524691358024682, - "788": 0.02919753086419754, - "789": 0.027870370370370375, - "790": 0.026543209876543208, - "791": 0.02521604938271604, - "792": 0.0238888888888889, - "793": 0.022561728395061733, - "794": 0.021234567901234565, - "795": 0.0199074074074074, - "796": 0.018580246913580258, - "797": 0.01725308641975309, - "798": 0.015925925925925923, - "799": 0.01459876543209876, - "800": 0.013271604938271616, - "801": 0.01194444444444445, - "802": 0.010617283950617283, - "803": 0.009290123456790117, - "804": 0.007962962962962974, - "805": 0.006635802469135808, - "806": 0.005308641975308641, - "807": 0.003981481481481476, - "808": 0.002654320987654333, - "809": 0.0013271604938271664 - } - }, - "train_epoch_time": 4.78950047492981, - "train_loss": 2.5690175032513043, - "train_score": 0.26566423052865773, - "val_loss": 2.5859530095254786, - "val_score": 0.2592341778404266 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:31:32.282473", - "final_model_norm": 87.4664306640625, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:29:51.480189", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.652818202972412, - "learning_rate": 4.64e-11, - "model_norm": 95.14454650878906, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 6.276208400726318, - "3": 8.374259948730469, - "4": 16.702686309814453, - "5": 4.318990230560303, - "6": 4.053266525268555, - "7": 4.086020469665527, - "8": 5.3812150955200195, - "9": 3.9882380962371826, - "10": 3.1579270362854004, - "11": 4.5157694816589355, - "12": 7.758707523345947, - "13": 6.416595458984375, - "14": 16.67332649230957, - "15": 282.22894287109375, - "16": 22.914852142333984, - "17": 18.954862594604492, - "18": 16.624263763427734, - "19": 9.764891624450684, - "20": 6.315908432006836, - "21": 11.82269287109375, - "22": 5.424689769744873, - "23": 5.268277645111084, - "24": 15.263899803161621, - "25": 14.404182434082031, - "26": 6.300836086273193, - "27": 4.741504669189453, - "28": 4.246946811676025, - "29": 3.7954163551330566, - "30": 10.74240493774414, - "31": 5.669170379638672, - "32": 4.174526691436768, - "33": 4.03250789642334, - "34": 2.977074146270752, - "35": 2.629021406173706, - "36": 6.231155872344971, - "37": 5.815568447113037, - "38": 4.085026264190674, - "39": 3.1897835731506348, - "40": 2.947141170501709, - "41": 11.358014106750488, - "42": 8.03885555267334, - "43": 3.9542248249053955, - "44": 3.493335008621216, - "45": 2.769835948944092, - "46": 2.4430980682373047, - "47": 1.7583775520324707, - "48": 7.817409038543701, - "49": 2.657479763031006, - "50": 3.692596435546875, - "51": 2.1273584365844727, - "52": 1.7742284536361694, - "53": 1.652818202972412 - }, - "loss": { - "0": 4.53324556350708, - "1": 4.53290319442749, - "2": 3.8033523559570312, - "3": 3.8736085891723633, - "4": 4.4257612228393555, - "5": 4.802114009857178, - "6": 4.167995929718018, - "7": 3.772918939590454, - "8": 3.651449203491211, - "9": 4.178749084472656, - "10": 3.4780311584472656, - "11": 3.7390782833099365, - "12": 4.454328536987305, - "13": 4.386186599731445, - "14": 8.97293758392334, - "15": 14.497613906860352, - "16": 5.278868675231934, - "17": 9.501153945922852, - "18": 11.088691711425781, - "19": 14.500003814697266, - "20": 14.40684700012207, - "21": 12.262041091918945, - "22": 10.736939430236816, - "23": 8.117122650146484, - "24": 11.160126686096191, - "25": 15.988414764404297, - "26": 16.101322174072266, - "27": 13.540946960449219, - "28": 10.738876342773438, - "29": 7.659339904785156, - "30": 6.603672504425049, - "31": 11.902063369750977, - "32": 10.373956680297852, - "33": 8.095108032226562, - "34": 5.627439498901367, - "35": 3.818453550338745, - "36": 5.04629373550415, - "37": 6.023892879486084, - "38": 8.797224044799805, - "39": 7.081573486328125, - "40": 4.819413661956787, - "41": 7.542447090148926, - "42": 12.643533706665039, - "43": 12.60511589050293, - "44": 10.406379699707031, - "45": 7.89165735244751, - "46": 5.85908317565918, - "47": 4.3023200035095215, - "48": 5.10648250579834, - "49": 8.244983673095703, - "50": 7.436631202697754, - "51": 6.794487953186035, - "52": 5.365841865539551, - "53": 4.076620578765869 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "train_epoch_time": 4.7896528244018555, - "train_loss": 3.811383214947825, - "train_score": 0.1526105183055103, - "val_loss": 3.8258205180053184, - "val_score": 0.15114182700861745 - }, - { - "epoch": 1, - "grad_norm": 0.48905959725379944, - "learning_rate": 0.464, - "model_norm": 95.07994842529297, - "step_logs": { - "grad_norm": { - "54": 3.6941728591918945, - "55": 1.5769537687301636, - "56": 1.4904303550720215, - "57": 1.2141751050949097, - "58": 3.826354742050171, - "59": 1.4384191036224365, - "60": 1.3476049900054932, - "61": 1.2866255044937134, - "62": 2.2173924446105957, - "63": 1.2530168294906616, - "64": 1.0791776180267334, - "65": 2.5892274379730225, - "66": 1.2960045337677002, - "67": 1.150550365447998, - "68": 0.636892557144165, - "69": 0.8866409659385681, - "70": 2.255319356918335, - "71": 1.1988329887390137, - "72": 1.037598729133606, - "73": 1.3717186450958252, - "74": 1.1444860696792603, - "75": 0.3106951117515564, - "76": 0.4716980755329132, - "77": 1.2585800886154175, - "78": 1.1399120092391968, - "79": 0.5681597590446472, - "80": 0.8048447966575623, - "81": 2.011240005493164, - "82": 1.1774786710739136, - "83": 0.8743529915809631, - "84": 1.4935977458953857, - "85": 1.0602136850357056, - "86": 0.2866179645061493, - "87": 0.5061366558074951, - "88": 0.7225818037986755, - "89": 1.5581732988357544, - "90": 1.103549838066101, - "91": 0.4402516484260559, - "92": 0.6982254385948181, - "93": 0.8109797835350037, - "94": 1.2745931148529053, - "95": 1.061758279800415, - "96": 0.35099613666534424, - "97": 0.4390299320220947, - "98": 0.8580119609832764, - "99": 0.9358235597610474, - "100": 1.1460063457489014, - "101": 0.9937201738357544, - "102": 0.47928911447525024, - "103": 0.6241452693939209, - "104": 1.1688700914382935, - "105": 0.970736563205719, - "106": 0.34720972180366516, - "107": 0.48905959725379944 - }, - "loss": { - "54": 3.819190740585327, - "55": 5.175209045410156, - "56": 4.418959617614746, - "57": 3.5471701622009277, - "58": 3.8454673290252686, - "59": 5.155857086181641, - "60": 4.353496551513672, - "61": 3.6526291370391846, - "62": 3.556497573852539, - "63": 4.180559158325195, - "64": 3.539870262145996, - "65": 3.563734292984009, - "66": 4.3748016357421875, - "67": 3.724374771118164, - "68": 3.342287302017212, - "69": 3.476107597351074, - "70": 3.532440185546875, - "71": 4.159850120544434, - "72": 3.531541347503662, - "73": 3.4115400314331055, - "74": 3.745779037475586, - "75": 3.3326189517974854, - "76": 3.366605281829834, - "77": 3.401813507080078, - "78": 3.697483539581299, - "79": 3.3507590293884277, - "80": 3.369561195373535, - "81": 3.525599479675293, - "82": 3.9687652587890625, - "83": 3.4684581756591797, - "84": 3.424407958984375, - "85": 3.729970932006836, - "86": 3.3281264305114746, - "87": 3.36848783493042, - "88": 3.370513677597046, - "89": 3.4576525688171387, - "90": 3.7721939086914062, - "91": 3.3521311283111572, - "92": 3.3533034324645996, - "93": 3.4419989585876465, - "94": 3.3973400592803955, - "95": 3.6539418697357178, - "96": 3.333446979522705, - "97": 3.348630428314209, - "98": 3.3506622314453125, - "99": 3.510232925415039, - "100": 3.4140822887420654, - "101": 3.562901496887207, - "102": 3.3373665809631348, - "103": 3.3839244842529297, - "104": 3.4235408306121826, - "105": 3.5752899646759033, - "106": 3.3537020683288574, - "107": 3.339092254638672 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "train_epoch_time": 4.787497520446777, - "train_loss": 3.376562429055935, - "train_score": 0.15260939743033097, - "val_loss": 3.395580074954115, - "val_score": 0.15114182700861745 - }, - { - "epoch": 2, - "grad_norm": 0.5694767832756042, - "learning_rate": 0.464, - "model_norm": 95.0799331665039, - "step_logs": { - "grad_norm": { - "108": 0.937101423740387, - "109": 0.9100974202156067, - "110": 0.7832270860671997, - "111": 0.8097555041313171, - "112": 0.9023659825325012, - "113": 0.8763775825500488, - "114": 0.7913365364074707, - "115": 0.8260985016822815, - "116": 0.9353631138801575, - "117": 0.8662790656089783, - "118": 0.6298329830169678, - "119": 0.701732337474823, - "120": 0.9115036725997925, - "121": 0.8654568195343018, - "122": 0.7037350535392761, - "123": 0.7147424221038818, - "124": 0.7594584822654724, - "125": 0.7744802832603455, - "126": 0.8010094165802002, - "127": 0.771429181098938, - "128": 0.6653699278831482, - "129": 0.7111042141914368, - "130": 0.8257189989089966, - "131": 0.7920432686805725, - "132": 0.671977162361145, - "133": 0.682690441608429, - "134": 0.6913823485374451, - "135": 0.6903976798057556, - "136": 0.7035506963729858, - "137": 0.7023475766181946, - "138": 0.6732025146484375, - "139": 0.6678005456924438, - "140": 0.6431317925453186, - "141": 0.6587648391723633, - "142": 0.7042838931083679, - "143": 0.673664391040802, - "144": 0.5878357291221619, - "145": 0.6040487289428711, - "146": 0.644805908203125, - "147": 0.6217125058174133, - "148": 0.5670838356018066, - "149": 0.6007515788078308, - "150": 0.6736865043640137, - "151": 0.6285593509674072, - "152": 0.512269914150238, - "153": 0.5301933288574219, - "154": 0.5890335440635681, - "155": 0.6092319488525391, - "156": 0.6309515237808228, - "157": 0.6185550689697266, - "158": 0.5833370089530945, - "159": 0.5794802308082581, - "160": 0.5471209287643433, - "161": 0.5694767832756042 - }, - "loss": { - "108": 3.3574681282043457, - "109": 3.4849300384521484, - "110": 3.3535819053649902, - "111": 3.461355447769165, - "112": 3.3725531101226807, - "113": 3.492842197418213, - "114": 3.394954204559326, - "115": 3.4387760162353516, - "116": 3.3632302284240723, - "117": 3.4699866771698, - "118": 3.361332654953003, - "119": 3.412446975708008, - "120": 3.335014581680298, - "121": 3.44108247756958, - "122": 3.3754525184631348, - "123": 3.432736396789551, - "124": 3.398573637008667, - "125": 3.4387564659118652, - "126": 3.3856282234191895, - "127": 3.4441332817077637, - "128": 3.3441851139068604, - "129": 3.3686683177948, - "130": 3.3438167572021484, - "131": 3.4587936401367188, - "132": 3.353421688079834, - "133": 3.3798303604125977, - "134": 3.33695650100708, - "135": 3.414294719696045, - "136": 3.3588247299194336, - "137": 3.391749382019043, - "138": 3.342038154602051, - "139": 3.40187406539917, - "140": 3.344695568084717, - "141": 3.3651533126831055, - "142": 3.36657452583313, - "143": 3.450082302093506, - "144": 3.338261604309082, - "145": 3.3799362182617188, - "146": 3.3470382690429688, - "147": 3.403172731399536, - "148": 3.358397960662842, - "149": 3.3757081031799316, - "150": 3.373363494873047, - "151": 3.388943672180176, - "152": 3.331705093383789, - "153": 3.3673605918884277, - "154": 3.3178887367248535, - "155": 3.4022326469421387, - "156": 3.3367085456848145, - "157": 3.3797800540924072, - "158": 3.3392200469970703, - "159": 3.3776514530181885, - "160": 3.309654712677002, - "161": 3.3489630222320557 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "train_epoch_time": 4.787580490112305, - "train_loss": 3.3418038329229804, - "train_score": 0.15261276005586902, - "val_loss": 3.3609827295647148, - "val_score": 0.15114182700861745 - }, - { - "epoch": 3, - "grad_norm": 0.592322587966919, - "learning_rate": 0.464, - "model_norm": 95.12150573730469, - "step_logs": { - "grad_norm": { - "162": 0.615007758140564, - "163": 0.5873151421546936, - "164": 0.5264214277267456, - "165": 0.5163225531578064, - "166": 0.4879056513309479, - "167": 0.5220987796783447, - "168": 0.5873386263847351, - "169": 0.5762194395065308, - "170": 0.5494808554649353, - "171": 0.5450929403305054, - "172": 0.550300657749176, - "173": 0.5463757514953613, - "174": 0.5122048854827881, - "175": 0.5181676745414734, - "176": 0.5633895993232727, - "177": 0.5566295981407166, - "178": 0.5165855288505554, - "179": 0.5185393691062927, - "180": 0.5492952466011047, - "181": 0.5626727342605591, - "182": 0.5611037611961365, - "183": 0.5426319241523743, - "184": 0.5038169026374817, - "185": 0.5322648882865906, - "186": 0.5438753962516785, - "187": 0.8277273774147034, - "188": 0.7329356074333191, - "189": 0.6525020003318787, - "190": 0.508832573890686, - "191": 0.5306726098060608, - "192": 0.5974617600440979, - "193": 0.6298349499702454, - "194": 0.6329159736633301, - "195": 0.6210013031959534, - "196": 0.5854630470275879, - "197": 1.0215471982955933, - "198": 0.7013662457466125, - "199": 0.6193742752075195, - "200": 0.49590811133384705, - "201": 0.5079411864280701, - "202": 0.5181183218955994, - "203": 0.538161039352417, - "204": 0.5654072761535645, - "205": 0.5818029046058655, - "206": 0.6068726778030396, - "207": 0.6282179951667786, - "208": 0.6167364120483398, - "209": 0.6415621042251587, - "210": 0.6173148155212402, - "211": 0.5772902965545654, - "212": 0.5579806566238403, - "213": 0.5356670618057251, - "214": 0.5393517017364502, - "215": 0.592322587966919 - }, - "loss": { - "162": 3.327364206314087, - "163": 3.3843235969543457, - "164": 3.3192696571350098, - "165": 3.3537888526916504, - "166": 3.330967903137207, - "167": 3.350696086883545, - "168": 3.3641114234924316, - "169": 3.359283447265625, - "170": 3.3143627643585205, - "171": 3.3739585876464844, - "172": 3.3140740394592285, - "173": 3.355048179626465, - "174": 3.3630337715148926, - "175": 3.342874526977539, - "176": 3.3190088272094727, - "177": 3.3339576721191406, - "178": 3.3038487434387207, - "179": 3.3242692947387695, - "180": 3.3445029258728027, - "181": 3.338287353515625, - "182": 3.297006845474243, - "183": 3.3233845233917236, - "184": 3.302051067352295, - "185": 3.330042839050293, - "186": 3.2854273319244385, - "187": 3.3331639766693115, - "188": 3.355712413787842, - "189": 3.3565173149108887, - "190": 3.3086819648742676, - "191": 3.3157200813293457, - "192": 3.2771875858306885, - "193": 3.31754207611084, - "194": 3.2726426124572754, - "195": 3.2991514205932617, - "196": 3.2811403274536133, - "197": 3.3124465942382812, - "198": 3.299910306930542, - "199": 3.3078887462615967, - "200": 3.2597906589508057, - "201": 3.284745216369629, - "202": 3.225611448287964, - "203": 3.2391726970672607, - "204": 3.237396240234375, - "205": 3.248345375061035, - "206": 3.235475540161133, - "207": 3.240050792694092, - "208": 3.207780599594116, - "209": 3.2279539108276367, - "210": 3.205918788909912, - "211": 3.22377872467041, - "212": 3.217679023742676, - "213": 3.2277183532714844, - "214": 3.1825857162475586, - "215": 3.1997873783111572 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "train_epoch_time": 4.787698745727539, - "train_loss": 3.195310854125057, - "train_score": 0.16691400641775883, - "val_loss": 3.205283027291982, - "val_score": 0.16692827949909886 - }, - { - "epoch": 4, - "grad_norm": 0.5832625031471252, - "learning_rate": 0.464, - "model_norm": 95.16639709472656, - "step_logs": { - "grad_norm": { - "216": 0.6614340543746948, - "217": 0.6891858577728271, - "218": 0.6676728129386902, - "219": 0.5995890498161316, - "220": 0.5358002185821533, - "221": 0.536672830581665, - "222": 0.594521701335907, - "223": 0.6287701725959778, - "224": 0.6931347846984863, - "225": 0.8357718586921692, - "226": 0.6993051171302795, - "227": 0.6106818318367004, - "228": 0.5817131996154785, - "229": 0.637666642665863, - "230": 0.6744968295097351, - "231": 0.647172749042511, - "232": 0.6165562272071838, - "233": 0.632185697555542, - "234": 0.6141465306282043, - "235": 0.5918753743171692, - "236": 0.5487201809883118, - "237": 0.5572559237480164, - "238": 0.5455392599105835, - "239": 0.5610880255699158, - "240": 0.6105849146842957, - "241": 0.5911791324615479, - "242": 0.5344687104225159, - "243": 0.515195369720459, - "244": 0.4881223142147064, - "245": 0.5291801691055298, - "246": 0.5750199556350708, - "247": 0.5984045267105103, - "248": 0.5724953413009644, - "249": 0.6173244714736938, - "250": 0.5688831210136414, - "251": 0.5364689230918884, - "252": 0.5486172437667847, - "253": 0.5572051405906677, - "254": 0.5692540407180786, - "255": 0.5646526217460632, - "256": 0.5788152813911438, - "257": 0.5705143809318542, - "258": 0.5741739273071289, - "259": 0.5943071842193604, - "260": 0.5959339141845703, - "261": 0.5554969906806946, - "262": 0.5691060423851013, - "263": 0.6079320311546326, - "264": 0.5677531957626343, - "265": 0.5119683742523193, - "266": 0.4963459074497223, - "267": 0.5055669546127319, - "268": 0.5560286045074463, - "269": 0.5832625031471252 - }, - "loss": { - "216": 3.1965291500091553, - "217": 3.2185592651367188, - "218": 3.2174973487854004, - "219": 3.1988306045532227, - "220": 3.1923441886901855, - "221": 3.1768062114715576, - "222": 3.155893564224243, - "223": 3.197890281677246, - "224": 3.1688971519470215, - "225": 3.241753101348877, - "226": 3.208406686782837, - "227": 3.191225528717041, - "228": 3.1688451766967773, - "229": 3.165341854095459, - "230": 3.1427555084228516, - "231": 3.205348014831543, - "232": 3.146683692932129, - "233": 3.168276786804199, - "234": 3.171910285949707, - "235": 3.1608381271362305, - "236": 3.126901626586914, - "237": 3.1705613136291504, - "238": 3.151902198791504, - "239": 3.150212287902832, - "240": 3.126461982727051, - "241": 3.149775981903076, - "242": 3.1044583320617676, - "243": 3.1385936737060547, - "244": 3.091449737548828, - "245": 3.118412971496582, - "246": 3.119467258453369, - "247": 3.147890567779541, - "248": 3.1146764755249023, - "249": 3.1356582641601562, - "250": 3.1246023178100586, - "251": 3.119462251663208, - "252": 3.0996222496032715, - "253": 3.123623847961426, - "254": 3.1057260036468506, - "255": 3.117973804473877, - "256": 3.088528633117676, - "257": 3.1137094497680664, - "258": 3.0836539268493652, - "259": 3.108476161956787, - "260": 3.094310760498047, - "261": 3.105084180831909, - "262": 3.098038673400879, - "263": 3.105175495147705, - "264": 3.076687812805176, - "265": 3.083289623260498, - "266": 3.077672004699707, - "267": 3.0758309364318848, - "268": 3.0616202354431152, - "269": 3.098269462585449 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "train_epoch_time": 4.787846326828003, - "train_loss": 3.0851088610064865, - "train_score": 0.17949807206903673, - "val_loss": 3.1028489352093773, - "val_score": 0.1790910234886524 - }, - { - "epoch": 5, - "grad_norm": 0.721752405166626, - "learning_rate": 0.464, - "model_norm": 95.19732666015625, - "step_logs": { - "grad_norm": { - "270": 0.7902553081512451, - "271": 0.6040201187133789, - "272": 0.44922083616256714, - "273": 0.44981932640075684, - "274": 0.44080644845962524, - "275": 0.45439624786376953, - "276": 0.5262670516967773, - "277": 0.8339715600013733, - "278": 0.6985093355178833, - "279": 0.5644071102142334, - "280": 0.5074340105056763, - "281": 0.49407148361206055, - "282": 0.5077321529388428, - "283": 0.5668754577636719, - "284": 0.5768797993659973, - "285": 0.5454105138778687, - "286": 0.5155891180038452, - "287": 0.5330159664154053, - "288": 0.5265529751777649, - "289": 0.6538668274879456, - "290": 0.6643021106719971, - "291": 0.8174726963043213, - "292": 0.6895253658294678, - "293": 0.6218353509902954, - "294": 0.5209974050521851, - "295": 0.5527642965316772, - "296": 0.5057004690170288, - "297": 0.5349284410476685, - "298": 0.4257163405418396, - "299": 0.37334802746772766, - "300": 0.41469070315361023, - "301": 0.5852785110473633, - "302": 0.587361752986908, - "303": 0.7149254083633423, - "304": 0.6898226737976074, - "305": 0.7398832440376282, - "306": 0.670011043548584, - "307": 0.6290679574012756, - "308": 0.5894433856010437, - "309": 0.5336933135986328, - "310": 0.5109879970550537, - "311": 0.5104011297225952, - "312": 0.5443515181541443, - "313": 0.5967521667480469, - "314": 0.680535614490509, - "315": 0.7892389297485352, - "316": 1.1440263986587524, - "317": 0.7391549944877625, - "318": 0.5006719827651978, - "319": 0.47819140553474426, - "320": 0.4830920100212097, - "321": 0.573211669921875, - "322": 0.6276507377624512, - "323": 0.721752405166626 - }, - "loss": { - "270": 3.0802083015441895, - "271": 3.1248939037323, - "272": 3.086836814880371, - "273": 3.0596683025360107, - "274": 3.0586657524108887, - "275": 3.0552639961242676, - "276": 3.0669636726379395, - "277": 3.0884647369384766, - "278": 3.140105724334717, - "279": 3.0839905738830566, - "280": 3.0772690773010254, - "281": 3.0480806827545166, - "282": 3.04884672164917, - "283": 3.0641770362854004, - "284": 3.060135841369629, - "285": 3.0600745677948, - "286": 3.0486268997192383, - "287": 3.0439209938049316, - "288": 3.0395689010620117, - "289": 3.05881404876709, - "290": 3.057131767272949, - "291": 3.0702338218688965, - "292": 3.0868217945098877, - "293": 3.055919885635376, - "294": 3.036402940750122, - "295": 3.042041778564453, - "296": 3.042891025543213, - "297": 3.0303261280059814, - "298": 3.0434207916259766, - "299": 2.99887752532959, - "300": 3.007185935974121, - "301": 3.0253050327301025, - "302": 3.0507731437683105, - "303": 3.0386862754821777, - "304": 3.0646677017211914, - "305": 3.0318522453308105, - "306": 3.030485153198242, - "307": 3.0205886363983154, - "308": 3.0392098426818848, - "309": 3.008427143096924, - "310": 3.015249729156494, - "311": 3.0068020820617676, - "312": 3.008622169494629, - "313": 2.9995484352111816, - "314": 3.018354654312134, - "315": 3.0260226726531982, - "316": 3.0838632583618164, - "317": 3.096536636352539, - "318": 3.0217180252075195, - "319": 2.9939684867858887, - "320": 2.99055552482605, - "321": 2.9938814640045166, - "322": 3.0175423622131348, - "323": 3.006381034851074 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "train_epoch_time": 4.788123607635498, - "train_loss": 3.0367380023857784, - "train_score": 0.1396251793229255, - "val_loss": 3.049307380833938, - "val_score": 0.13896562884445168 - }, - { - "epoch": 6, - "grad_norm": 0.4812910854816437, - "learning_rate": 0.464, - "model_norm": 95.22705841064453, - "step_logs": { - "grad_norm": { - "324": 0.7046613097190857, - "325": 0.684482216835022, - "326": 0.6434972882270813, - "327": 0.6167725324630737, - "328": 0.5184486508369446, - "329": 0.42307910323143005, - "330": 0.4216797649860382, - "331": 0.43924927711486816, - "332": 0.44578421115875244, - "333": 0.4958129823207855, - "334": 0.5587369203567505, - "335": 0.6142587661743164, - "336": 0.6828575134277344, - "337": 0.7260221242904663, - "338": 0.7251009941101074, - "339": 0.6530268788337708, - "340": 0.5546654462814331, - "341": 0.45238181948661804, - "342": 0.5362229943275452, - "343": 0.6245988607406616, - "344": 0.648998498916626, - "345": 0.6236292123794556, - "346": 0.5856001973152161, - "347": 0.5445979237556458, - "348": 0.5174869894981384, - "349": 0.531870424747467, - "350": 0.6193946003913879, - "351": 0.7220349311828613, - "352": 0.8519195318222046, - "353": 0.8194084763526917, - "354": 0.6350876688957214, - "355": 0.5142744183540344, - "356": 0.4926293194293976, - "357": 0.540449321269989, - "358": 0.6703569889068604, - "359": 0.7354685068130493, - "360": 0.731411337852478, - "361": 0.5776702761650085, - "362": 0.46868500113487244, - "363": 0.390006422996521, - "364": 0.4021241366863251, - "365": 0.466878741979599, - "366": 0.5145150423049927, - "367": 0.5767860412597656, - "368": 0.6581503748893738, - "369": 0.662085771560669, - "370": 0.6042476892471313, - "371": 0.5649664402008057, - "372": 0.49768534302711487, - "373": 0.4435543417930603, - "374": 0.45051810145378113, - "375": 0.458556592464447, - "376": 0.45275017619132996, - "377": 0.4812910854816437 - }, - "loss": { - "324": 3.0322296619415283, - "325": 3.01348614692688, - "326": 3.016357898712158, - "327": 2.993896007537842, - "328": 2.990234613418579, - "329": 2.967379331588745, - "330": 2.9529285430908203, - "331": 2.9590184688568115, - "332": 2.9784467220306396, - "333": 2.9573917388916016, - "334": 2.984755039215088, - "335": 2.977762222290039, - "336": 3.0143656730651855, - "337": 2.9890060424804688, - "338": 3.015803575515747, - "339": 2.980998992919922, - "340": 2.985811710357666, - "341": 2.9627182483673096, - "342": 2.970905303955078, - "343": 2.9666411876678467, - "344": 2.97985577583313, - "345": 2.9735774993896484, - "346": 2.978839874267578, - "347": 2.983078956604004, - "348": 2.974592685699463, - "349": 2.950813055038452, - "350": 2.9643874168395996, - "351": 2.987494707107544, - "352": 3.0066113471984863, - "353": 3.026047706604004, - "354": 2.9934444427490234, - "355": 2.9399914741516113, - "356": 2.9650113582611084, - "357": 2.9558963775634766, - "358": 2.982348918914795, - "359": 2.9659438133239746, - "360": 2.9718358516693115, - "361": 2.9699904918670654, - "362": 2.967891216278076, - "363": 2.933591842651367, - "364": 2.921778440475464, - "365": 2.939035415649414, - "366": 2.969357490539551, - "367": 2.946373224258423, - "368": 2.9747369289398193, - "369": 2.956366777420044, - "370": 2.9639105796813965, - "371": 2.9503540992736816, - "372": 2.938356876373291, - "373": 2.927687644958496, - "374": 2.947812080383301, - "375": 2.9191367626190186, - "376": 2.932507276535034, - "377": 2.9295830726623535 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "train_epoch_time": 4.788327217102051, - "train_loss": 2.937072514461479, - "train_score": 0.17194561509781622, - "val_loss": 2.9482095980616854, - "val_score": 0.16894643400484605 - }, - { - "epoch": 7, - "grad_norm": 0.5930202603340149, - "learning_rate": 0.464, - "model_norm": 95.25932312011719, - "step_logs": { - "grad_norm": { - "378": 0.5123597383499146, - "379": 0.5268654823303223, - "380": 0.5088984370231628, - "381": 0.4990869462490082, - "382": 0.5021896362304688, - "383": 0.47970518469810486, - "384": 0.46689966320991516, - "385": 0.49518582224845886, - "386": 0.5549895167350769, - "387": 0.5953436493873596, - "388": 0.5516526103019714, - "389": 0.48941537737846375, - "390": 0.41924116015434265, - "391": 0.4126428961753845, - "392": 0.42909085750579834, - "393": 0.4847986698150635, - "394": 0.5366307497024536, - "395": 0.5880151987075806, - "396": 0.5039798021316528, - "397": 0.4468422532081604, - "398": 0.4354686737060547, - "399": 0.4592638313770294, - "400": 0.4618544280529022, - "401": 0.5446250438690186, - "402": 0.7834435105323792, - "403": 0.87884920835495, - "404": 0.8685675859451294, - "405": 0.7562757134437561, - "406": 0.6640731692314148, - "407": 0.5349342823028564, - "408": 0.47209784388542175, - "409": 0.426312118768692, - "410": 0.4473778307437897, - "411": 0.47478505969047546, - "412": 0.524868369102478, - "413": 0.568409264087677, - "414": 0.6179620027542114, - "415": 0.6348097324371338, - "416": 0.6079012155532837, - "417": 0.6017040014266968, - "418": 0.6105506420135498, - "419": 0.6828820705413818, - "420": 0.8697967529296875, - "421": 0.8649794459342957, - "422": 0.7182846069335938, - "423": 0.5542111396789551, - "424": 0.4734981954097748, - "425": 0.43867775797843933, - "426": 0.4955150783061981, - "427": 0.5763095021247864, - "428": 0.6123806834220886, - "429": 0.7042171955108643, - "430": 0.6293308138847351, - "431": 0.5930202603340149 - }, - "loss": { - "378": 2.9278626441955566, - "379": 2.9308393001556396, - "380": 2.9338629245758057, - "381": 2.9160118103027344, - "382": 2.9421284198760986, - "383": 2.9190926551818848, - "384": 2.91929292678833, - "385": 2.919095277786255, - "386": 2.924923896789551, - "387": 2.940929651260376, - "388": 2.938507556915283, - "389": 2.9132022857666016, - "390": 2.914498805999756, - "391": 2.8911900520324707, - "392": 2.9084277153015137, - "393": 2.9068715572357178, - "394": 2.9306201934814453, - "395": 2.9106624126434326, - "396": 2.9211835861206055, - "397": 2.901524543762207, - "398": 2.9183766841888428, - "399": 2.8904476165771484, - "400": 2.9231927394866943, - "401": 2.9081082344055176, - "402": 2.9449961185455322, - "403": 2.9645495414733887, - "404": 2.989840030670166, - "405": 2.933091640472412, - "406": 2.9595439434051514, - "407": 2.9268269538879395, - "408": 2.9075257778167725, - "409": 2.900203227996826, - "410": 2.9099552631378174, - "411": 2.909003257751465, - "412": 2.9015204906463623, - "413": 2.9033491611480713, - "414": 2.9276843070983887, - "415": 2.920766830444336, - "416": 2.9083471298217773, - "417": 2.8954715728759766, - "418": 2.912308692932129, - "419": 2.907003164291382, - "420": 2.936640977859497, - "421": 2.936184883117676, - "422": 2.9318740367889404, - "423": 2.9092180728912354, - "424": 2.8859424591064453, - "425": 2.865473985671997, - "426": 2.9003617763519287, - "427": 2.8803653717041016, - "428": 2.8993115425109863, - "429": 2.8887863159179688, - "430": 2.91388201713562, - "431": 2.8706154823303223 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "train_epoch_time": 4.787798166275024, - "train_loss": 2.8939731666313184, - "train_score": 0.17380626796820928, - "val_loss": 2.901402291693178, - "val_score": 0.17138615839106344 - }, - { - "epoch": 8, - "grad_norm": 0.8885157108306885, - "learning_rate": 0.464, - "model_norm": 95.30177307128906, - "step_logs": { - "grad_norm": { - "432": 0.6617476940155029, - "433": 0.6248102188110352, - "434": 0.5668144822120667, - "435": 0.5151016116142273, - "436": 0.58073890209198, - "437": 0.7222148776054382, - "438": 1.1242494583129883, - "439": 1.1053086519241333, - "440": 0.90779048204422, - "441": 0.6062111258506775, - "442": 0.4770806133747101, - "443": 0.4220946729183197, - "444": 0.4529079794883728, - "445": 0.5120531916618347, - "446": 0.6154143810272217, - "447": 0.7112812399864197, - "448": 0.7018038630485535, - "449": 0.6837787628173828, - "450": 0.6769550442695618, - "451": 0.6928111910820007, - "452": 0.9153744578361511, - "453": 0.9256760478019714, - "454": 0.767328679561615, - "455": 0.6256396174430847, - "456": 0.6242458820343018, - "457": 0.6252599954605103, - "458": 0.6766478419303894, - "459": 0.6967686414718628, - "460": 0.7694593071937561, - "461": 0.8051020503044128, - "462": 0.7642512321472168, - "463": 0.7067453861236572, - "464": 0.6544169187545776, - "465": 0.7174378633499146, - "466": 0.7884799838066101, - "467": 0.812394380569458, - "468": 0.7971006631851196, - "469": 0.6915827393531799, - "470": 0.6738126277923584, - "471": 0.7205873131752014, - "472": 0.8300636410713196, - "473": 0.7758504748344421, - "474": 0.6803692579269409, - "475": 0.6284384727478027, - "476": 0.6289702653884888, - "477": 0.7247979044914246, - "478": 0.8562721610069275, - "479": 0.9490899443626404, - "480": 0.8300268650054932, - "481": 0.6356168389320374, - "482": 0.6143249273300171, - "483": 0.7047991156578064, - "484": 0.832797646522522, - "485": 0.8885157108306885 - }, - "loss": { - "432": 2.8969850540161133, - "433": 2.8764634132385254, - "434": 2.8807573318481445, - "435": 2.850984573364258, - "436": 2.868626356124878, - "437": 2.8706536293029785, - "438": 2.9357008934020996, - "439": 2.9886069297790527, - "440": 2.9636659622192383, - "441": 2.8769421577453613, - "442": 2.8727664947509766, - "443": 2.8487119674682617, - "444": 2.838052272796631, - "445": 2.826735019683838, - "446": 2.8392481803894043, - "447": 2.843799114227295, - "448": 2.8656325340270996, - "449": 2.848928451538086, - "450": 2.8602981567382812, - "451": 2.8328380584716797, - "452": 2.8663294315338135, - "453": 2.8875441551208496, - "454": 2.8956637382507324, - "455": 2.823615550994873, - "456": 2.827363967895508, - "457": 2.8147706985473633, - "458": 2.8390417098999023, - "459": 2.8198049068450928, - "460": 2.8387603759765625, - "461": 2.839061737060547, - "462": 2.8402538299560547, - "463": 2.821221113204956, - "464": 2.810823917388916, - "465": 2.8279409408569336, - "466": 2.839804172515869, - "467": 2.835284948348999, - "468": 2.8370203971862793, - "469": 2.7924611568450928, - "470": 2.8063201904296875, - "471": 2.7872331142425537, - "472": 2.836988687515259, - "473": 2.823427677154541, - "474": 2.812631607055664, - "475": 2.7815423011779785, - "476": 2.7804808616638184, - "477": 2.7773475646972656, - "478": 2.8141560554504395, - "479": 2.8172104358673096, - "480": 2.8529977798461914, - "481": 2.780829429626465, - "482": 2.7638890743255615, - "483": 2.74462890625, - "484": 2.804307460784912, - "485": 2.8235862255096436 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "train_epoch_time": 4.787794589996338, - "train_loss": 2.8234112734774093, - "train_score": 0.17195234042158086, - "val_loss": 2.831862328931193, - "val_score": 0.16844413748564868 - }, - { - "epoch": 9, - "grad_norm": 0.7335805892944336, - "learning_rate": 0.464, - "model_norm": 95.34683990478516, - "step_logs": { - "grad_norm": { - "486": 0.7950608134269714, - "487": 0.619175136089325, - "488": 0.69770348072052, - "489": 0.8962931632995605, - "490": 0.8749693632125854, - "491": 0.6809191703796387, - "492": 0.5763744711875916, - "493": 0.6200893521308899, - "494": 0.7120007276535034, - "495": 0.8273748159408569, - "496": 0.8230582475662231, - "497": 0.7934614419937134, - "498": 0.7708424925804138, - "499": 0.78073650598526, - "500": 0.8738507628440857, - "501": 0.9576129913330078, - "502": 1.0112968683242798, - "503": 0.8379231691360474, - "504": 0.7411358952522278, - "505": 0.6115418076515198, - "506": 0.5339595079421997, - "507": 0.6205567717552185, - "508": 0.6820998787879944, - "509": 0.7188384532928467, - "510": 0.7029887437820435, - "511": 0.6189846992492676, - "512": 0.6333284378051758, - "513": 0.6914864778518677, - "514": 0.681087076663971, - "515": 0.5931110382080078, - "516": 0.6183187961578369, - "517": 0.7948628067970276, - "518": 0.7336117625236511, - "519": 0.5850400924682617, - "520": 0.5836624503135681, - "521": 0.6116194725036621, - "522": 0.667742908000946, - "523": 0.7010170221328735, - "524": 0.7001011371612549, - "525": 0.6831109523773193, - "526": 0.6719421148300171, - "527": 0.711574375629425, - "528": 0.8238739371299744, - "529": 0.8185665011405945, - "530": 0.7372053861618042, - "531": 0.6520942449569702, - "532": 0.6019648909568787, - "533": 0.6640612483024597, - "534": 0.638088047504425, - "535": 0.5757718682289124, - "536": 0.6625477075576782, - "537": 0.682201623916626, - "538": 0.6337475180625916, - "539": 0.7335805892944336 - }, - "loss": { - "486": 2.8294472694396973, - "487": 2.7400765419006348, - "488": 2.7507190704345703, - "489": 2.793379306793213, - "490": 2.818857192993164, - "491": 2.7769064903259277, - "492": 2.7376935482025146, - "493": 2.740218162536621, - "494": 2.7673373222351074, - "495": 2.7299458980560303, - "496": 2.7706587314605713, - "497": 2.751803159713745, - "498": 2.772120952606201, - "499": 2.7298178672790527, - "500": 2.76345157623291, - "501": 2.7723190784454346, - "502": 2.8080880641937256, - "503": 2.77487850189209, - "504": 2.76108455657959, - "505": 2.7070536613464355, - "506": 2.6962361335754395, - "507": 2.7109580039978027, - "508": 2.715606689453125, - "509": 2.7147459983825684, - "510": 2.7383029460906982, - "511": 2.6887598037719727, - "512": 2.701934814453125, - "513": 2.7053263187408447, - "514": 2.743013858795166, - "515": 2.681004762649536, - "516": 2.691380739212036, - "517": 2.681699275970459, - "518": 2.7380852699279785, - "519": 2.6838488578796387, - "520": 2.686288833618164, - "521": 2.6749088764190674, - "522": 2.701096534729004, - "523": 2.697679042816162, - "524": 2.707214593887329, - "525": 2.687723159790039, - "526": 2.70268177986145, - "527": 2.6728389263153076, - "528": 2.696200370788574, - "529": 2.688584327697754, - "530": 2.7217512130737305, - "531": 2.6711459159851074, - "532": 2.681565523147583, - "533": 2.6599719524383545, - "534": 2.6768884658813477, - "535": 2.6378355026245117, - "536": 2.655148983001709, - "537": 2.64034366607666, - "538": 2.669140100479126, - "539": 2.6393368244171143 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "train_epoch_time": 4.787980079650879, - "train_loss": 2.685435557673275, - "train_score": 0.23642059713815855, - "val_loss": 2.699703412816938, - "val_score": 0.23265284185226148 - }, - { - "epoch": 10, - "grad_norm": 0.4752683639526367, - "learning_rate": 0.464, - "model_norm": 95.39339447021484, - "step_logs": { - "grad_norm": { - "540": 0.6320156455039978, - "541": 0.4787788987159729, - "542": 0.4651777744293213, - "543": 0.46387532353401184, - "544": 0.531488299369812, - "545": 0.6712128520011902, - "546": 0.7085956335067749, - "547": 0.7731642723083496, - "548": 0.7668701410293579, - "549": 0.8050109148025513, - "550": 0.9345594048500061, - "551": 0.8671684861183167, - "552": 0.6863387227058411, - "553": 0.5661612749099731, - "554": 0.517009973526001, - "555": 0.45309242606163025, - "556": 0.4611679017543793, - "557": 0.4993745982646942, - "558": 0.559796929359436, - "559": 0.732742428779602, - "560": 0.6820537447929382, - "561": 0.6208508610725403, - "562": 0.607408344745636, - "563": 0.7566462159156799, - "564": 0.7901949286460876, - "565": 0.7086068987846375, - "566": 0.6591873168945312, - "567": 0.6047205924987793, - "568": 0.6425679922103882, - "569": 0.6278573274612427, - "570": 0.575109601020813, - "571": 0.5234408378601074, - "572": 0.4931306540966034, - "573": 0.5067612528800964, - "574": 0.5322205424308777, - "575": 0.5439607501029968, - "576": 0.5016165971755981, - "577": 0.5407265424728394, - "578": 0.7372181415557861, - "579": 0.7476486563682556, - "580": 0.748710572719574, - "581": 0.9689611792564392, - "582": 1.2562280893325806, - "583": 1.292590856552124, - "584": 0.973041296005249, - "585": 0.6824032068252563, - "586": 0.5244526267051697, - "587": 0.554132342338562, - "588": 0.5782356858253479, - "589": 0.6434018015861511, - "590": 0.6371096968650818, - "591": 0.590674102306366, - "592": 0.5333691835403442, - "593": 0.4752683639526367 - }, - "loss": { - "540": 2.686978340148926, - "541": 2.6249241828918457, - "542": 2.6504530906677246, - "543": 2.6456995010375977, - "544": 2.622481346130371, - "545": 2.6247398853302, - "546": 2.6595399379730225, - "547": 2.6740851402282715, - "548": 2.675379991531372, - "549": 2.6552562713623047, - "550": 2.681993007659912, - "551": 2.685206890106201, - "552": 2.6768953800201416, - "553": 2.612220287322998, - "554": 2.61747670173645, - "555": 2.6182384490966797, - "556": 2.6112167835235596, - "557": 2.5930275917053223, - "558": 2.5975537300109863, - "559": 2.6249661445617676, - "560": 2.624742031097412, - "561": 2.638888359069824, - "562": 2.6378297805786133, - "563": 2.624378204345703, - "564": 2.669511318206787, - "565": 2.6083102226257324, - "566": 2.6195950508117676, - "567": 2.582709789276123, - "568": 2.6142172813415527, - "569": 2.627603530883789, - "570": 2.5986456871032715, - "571": 2.594231128692627, - "572": 2.571992874145508, - "573": 2.590059280395508, - "574": 2.575410842895508, - "575": 2.5851104259490967, - "576": 2.5679733753204346, - "577": 2.574068546295166, - "578": 2.592306613922119, - "579": 2.6290454864501953, - "580": 2.5941970348358154, - "581": 2.625878095626831, - "582": 2.6908907890319824, - "583": 2.7271928787231445, - "584": 2.694558620452881, - "585": 2.6065611839294434, - "586": 2.56463885307312, - "587": 2.5697219371795654, - "588": 2.5664005279541016, - "589": 2.5572540760040283, - "590": 2.590574264526367, - "591": 2.5723307132720947, - "592": 2.554614543914795, - "593": 2.539903163909912 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "train_epoch_time": 4.789448976516724, - "train_loss": 2.5473097831310123, - "train_score": 0.29266835552034964, - "val_loss": 2.572094604150573, - "val_score": 0.28362245219583215 - }, - { - "epoch": 11, - "grad_norm": 0.7030220627784729, - "learning_rate": 0.464, - "model_norm": 95.4269790649414, - "step_logs": { - "grad_norm": { - "594": 0.4165057837963104, - "595": 0.38980862498283386, - "596": 0.4164181649684906, - "597": 0.4445800483226776, - "598": 0.5343098044395447, - "599": 0.5596135258674622, - "600": 0.6174715161323547, - "601": 0.5824212431907654, - "602": 0.5237164497375488, - "603": 0.454979807138443, - "604": 0.4843762218952179, - "605": 0.41267240047454834, - "606": 0.4199749827384949, - "607": 0.45983171463012695, - "608": 0.5696313977241516, - "609": 0.8060118556022644, - "610": 0.9275069832801819, - "611": 0.9513506889343262, - "612": 0.8771731853485107, - "613": 0.7656822204589844, - "614": 0.7516440749168396, - "615": 0.6336190700531006, - "616": 0.4952774941921234, - "617": 0.41883477568626404, - "618": 0.3993341326713562, - "619": 0.41083458065986633, - "620": 0.5018238425254822, - "621": 0.6153827905654907, - "622": 0.7203396558761597, - "623": 0.7669448256492615, - "624": 0.7399802207946777, - "625": 0.7138455510139465, - "626": 0.6816047430038452, - "627": 0.651770830154419, - "628": 0.6485459804534912, - "629": 0.6119530200958252, - "630": 0.5728703141212463, - "631": 0.6326759457588196, - "632": 0.6876031160354614, - "633": 0.7211130261421204, - "634": 0.7122551798820496, - "635": 0.632091760635376, - "636": 0.5928220748901367, - "637": 0.6026967763900757, - "638": 0.5505890846252441, - "639": 0.4651810824871063, - "640": 0.4886285066604614, - "641": 0.5196266174316406, - "642": 0.605863630771637, - "643": 0.7037306427955627, - "644": 0.9081894159317017, - "645": 0.9372041821479797, - "646": 0.8744444847106934, - "647": 0.7030220627784729 - }, - "loss": { - "594": 2.5411648750305176, - "595": 2.55806827545166, - "596": 2.5419533252716064, - "597": 2.5229320526123047, - "598": 2.5441854000091553, - "599": 2.5458807945251465, - "600": 2.538055658340454, - "601": 2.5621376037597656, - "602": 2.550751209259033, - "603": 2.5387682914733887, - "604": 2.513301134109497, - "605": 2.534552574157715, - "606": 2.5238735675811768, - "607": 2.5166447162628174, - "608": 2.516538143157959, - "609": 2.577965259552002, - "610": 2.6280593872070312, - "611": 2.589290142059326, - "612": 2.632606029510498, - "613": 2.5650429725646973, - "614": 2.595564126968384, - "615": 2.563746452331543, - "616": 2.530768871307373, - "617": 2.538038730621338, - "618": 2.542367458343506, - "619": 2.5250368118286133, - "620": 2.516429901123047, - "621": 2.55241060256958, - "622": 2.541691541671753, - "623": 2.5550291538238525, - "624": 2.5688633918762207, - "625": 2.5351779460906982, - "626": 2.5427422523498535, - "627": 2.5638809204101562, - "628": 2.5520267486572266, - "629": 2.50813889503479, - "630": 2.5204803943634033, - "631": 2.534245491027832, - "632": 2.540843963623047, - "633": 2.5320701599121094, - "634": 2.539372444152832, - "635": 2.5022246837615967, - "636": 2.5396337509155273, - "637": 2.535313606262207, - "638": 2.5398499965667725, - "639": 2.4777631759643555, - "640": 2.4896185398101807, - "641": 2.511262893676758, - "642": 2.501821756362915, - "643": 2.525163173675537, - "644": 2.5627331733703613, - "645": 2.60943603515625, - "646": 2.563251256942749, - "647": 2.5517003536224365 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "train_epoch_time": 4.788496732711792, - "train_loss": 2.5154153128094445, - "train_score": 0.29197341278088484, - "val_loss": 2.5424890282780925, - "val_score": 0.28494098081106983 - }, - { - "epoch": 12, - "grad_norm": 0.3136054575443268, - "learning_rate": 0.464, - "model_norm": 95.45179748535156, - "step_logs": { - "grad_norm": { - "648": 0.5140668153762817, - "649": 0.5310370922088623, - "650": 0.5972155332565308, - "651": 0.6897873282432556, - "652": 0.6560055613517761, - "653": 0.5812339782714844, - "654": 0.571418285369873, - "655": 0.5198261737823486, - "656": 0.4848625361919403, - "657": 0.43155333399772644, - "658": 0.39806169271469116, - "659": 0.3970857262611389, - "660": 0.41489464044570923, - "661": 0.4632144868373871, - "662": 0.5339930057525635, - "663": 0.5290558338165283, - "664": 0.41746291518211365, - "665": 0.31858471035957336, - "666": 0.2990455627441406, - "667": 0.2865983247756958, - "668": 0.2888813614845276, - "669": 0.295500785112381, - "670": 0.3475888669490814, - "671": 0.37309378385543823, - "672": 0.3875153660774231, - "673": 0.42623019218444824, - "674": 0.46908703446388245, - "675": 0.48923808336257935, - "676": 0.48527809977531433, - "677": 0.44631049036979675, - "678": 0.42582863569259644, - "679": 0.49393194913864136, - "680": 0.5919609665870667, - "681": 0.6775600910186768, - "682": 0.6533501744270325, - "683": 0.6141091585159302, - "684": 0.5120543241500854, - "685": 0.4202282428741455, - "686": 0.38074174523353577, - "687": 0.34943413734436035, - "688": 0.3027404844760895, - "689": 0.24933558702468872, - "690": 0.2531324028968811, - "691": 0.23845671117305756, - "692": 0.24858500063419342, - "693": 0.2719728648662567, - "694": 0.2981884777545929, - "695": 0.32925498485565186, - "696": 0.3263830840587616, - "697": 0.32748541235923767, - "698": 0.3687443733215332, - "699": 0.38323426246643066, - "700": 0.3244877755641937, - "701": 0.3136054575443268 - }, - "loss": { - "648": 2.5151448249816895, - "649": 2.506683826446533, - "650": 2.4910826683044434, - "651": 2.5092737674713135, - "652": 2.5533409118652344, - "653": 2.503725051879883, - "654": 2.5076305866241455, - "655": 2.498969078063965, - "656": 2.4992101192474365, - "657": 2.480461835861206, - "658": 2.487180709838867, - "659": 2.4801599979400635, - "660": 2.4802134037017822, - "661": 2.46563720703125, - "662": 2.494999885559082, - "663": 2.5054945945739746, - "664": 2.4839906692504883, - "665": 2.4889817237854004, - "666": 2.4599452018737793, - "667": 2.4579591751098633, - "668": 2.468801736831665, - "669": 2.446441888809204, - "670": 2.460641384124756, - "671": 2.462390899658203, - "672": 2.4728314876556396, - "673": 2.4499056339263916, - "674": 2.473860025405884, - "675": 2.467082977294922, - "676": 2.460484027862549, - "677": 2.4631881713867188, - "678": 2.4538638591766357, - "679": 2.4610934257507324, - "680": 2.4810004234313965, - "681": 2.484571933746338, - "682": 2.4824466705322266, - "683": 2.479504346847534, - "684": 2.471201181411743, - "685": 2.4623706340789795, - "686": 2.4673476219177246, - "687": 2.4720041751861572, - "688": 2.444331407546997, - "689": 2.4316089153289795, - "690": 2.4574413299560547, - "691": 2.4243526458740234, - "692": 2.423112630844116, - "693": 2.4112918376922607, - "694": 2.434623956680298, - "695": 2.462160587310791, - "696": 2.438055992126465, - "697": 2.455031633377075, - "698": 2.4295156002044678, - "699": 2.4481005668640137, - "700": 2.449397325515747, - "701": 2.429503917694092 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "train_epoch_time": 4.7901530265808105, - "train_loss": 2.438967116217702, - "train_score": 0.3005682837415802, - "val_loss": 2.465615165767385, - "val_score": 0.29338135097248547 - }, - { - "epoch": 13, - "grad_norm": 0.16016384959220886, - "learning_rate": 0.3093333333333334, - "model_norm": 95.464599609375, - "step_logs": { - "grad_norm": { - "702": 0.3453993797302246, - "703": 0.3100011646747589, - "704": 0.26915502548217773, - "705": 0.24128884077072144, - "706": 0.22970464825630188, - "707": 0.2259349524974823, - "708": 0.2878847122192383, - "709": 0.2669242024421692, - "710": 0.2583456039428711, - "711": 0.27287745475769043, - "712": 0.2391832172870636, - "713": 0.27245578169822693, - "714": 0.3058371841907501, - "715": 0.3215775191783905, - "716": 0.3002978563308716, - "717": 0.2868741452693939, - "718": 0.27294203639030457, - "719": 0.28981003165245056, - "720": 0.30295103788375854, - "721": 0.2705759108066559, - "722": 0.2438855767250061, - "723": 0.265132874250412, - "724": 0.3027491867542267, - "725": 0.3553493916988373, - "726": 0.384289413690567, - "727": 0.3158110976219177, - "728": 0.26802176237106323, - "729": 0.19784598052501678, - "730": 0.17569728195667267, - "731": 0.22731731832027435, - "732": 0.23690709471702576, - "733": 0.22688141465187073, - "734": 0.21649880707263947, - "735": 0.2058725506067276, - "736": 0.2107367068529129, - "737": 0.2478368878364563, - "738": 0.20716924965381622, - "739": 0.18654270470142365, - "740": 0.20285052061080933, - "741": 0.21022991836071014, - "742": 0.17788375914096832, - "743": 0.1758386343717575, - "744": 0.19142788648605347, - "745": 0.15469466149806976, - "746": 0.1731296330690384, - "747": 0.16940422356128693, - "748": 0.16589288413524628, - "749": 0.17014259099960327, - "750": 0.19174227118492126, - "751": 0.20674866437911987, - "752": 0.19658075273036957, - "753": 0.16921284794807434, - "754": 0.1723858118057251, - "755": 0.16016384959220886 - }, - "loss": { - "702": 2.4194765090942383, - "703": 2.4304871559143066, - "704": 2.438307046890259, - "705": 2.423537254333496, - "706": 2.4442191123962402, - "707": 2.4261655807495117, - "708": 2.443248987197876, - "709": 2.4373044967651367, - "710": 2.4286811351776123, - "711": 2.4274113178253174, - "712": 2.4244940280914307, - "713": 2.4421350955963135, - "714": 2.4272165298461914, - "715": 2.4516987800598145, - "716": 2.426527976989746, - "717": 2.457326650619507, - "718": 2.445324420928955, - "719": 2.424609899520874, - "720": 2.425920248031616, - "721": 2.4272212982177734, - "722": 2.445246458053589, - "723": 2.427469491958618, - "724": 2.409996747970581, - "725": 2.4332985877990723, - "726": 2.432913064956665, - "727": 2.4027180671691895, - "728": 2.4143729209899902, - "729": 2.432358741760254, - "730": 2.429215669631958, - "731": 2.412034034729004, - "732": 2.412898540496826, - "733": 2.4283459186553955, - "734": 2.405430316925049, - "735": 2.4155373573303223, - "736": 2.4246621131896973, - "737": 2.409104108810425, - "738": 2.4167308807373047, - "739": 2.4168617725372314, - "740": 2.4213085174560547, - "741": 2.4189791679382324, - "742": 2.441124439239502, - "743": 2.418696403503418, - "744": 2.4251351356506348, - "745": 2.4194159507751465, - "746": 2.405216693878174, - "747": 2.4216737747192383, - "748": 2.4059338569641113, - "749": 2.3975815773010254, - "750": 2.411850690841675, - "751": 2.40053129196167, - "752": 2.4194886684417725, - "753": 2.413079261779785, - "754": 2.409118890762329, - "755": 2.4035377502441406 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "train_epoch_time": 4.7884681224823, - "train_loss": 2.41268866606046, - "train_score": 0.3084951128644957, - "val_loss": 2.4423450591506697, - "val_score": 0.30263795210080635 - }, - { - "epoch": 14, - "grad_norm": 0.16784948110580444, - "learning_rate": 0.1546666666666667, - "model_norm": 95.46870422363281, - "step_logs": { - "grad_norm": { - "756": 0.19645656645298004, - "757": 0.18018025159835815, - "758": 0.17488263547420502, - "759": 0.2123248130083084, - "760": 0.20626197755336761, - "761": 0.16895000636577606, - "762": 0.18747922778129578, - "763": 0.1822664439678192, - "764": 0.21675723791122437, - "765": 0.2158898562192917, - "766": 0.18203391134738922, - "767": 0.17938371002674103, - "768": 0.18146370351314545, - "769": 0.18185514211654663, - "770": 0.16450749337673187, - "771": 0.16111457347869873, - "772": 0.1627318561077118, - "773": 0.2093825489282608, - "774": 0.17623504996299744, - "775": 0.16924811899662018, - "776": 0.16619442403316498, - "777": 0.15962553024291992, - "778": 0.17313867807388306, - "779": 0.1702631264925003, - "780": 0.1643819659948349, - "781": 0.17640475928783417, - "782": 0.16850781440734863, - "783": 0.20662371814250946, - "784": 0.17208142578601837, - "785": 0.16308902204036713, - "786": 0.1885741949081421, - "787": 0.18951012194156647, - "788": 0.19879771769046783, - "789": 0.1690634936094284, - "790": 0.18761664628982544, - "791": 0.14956939220428467, - "792": 0.16341783106327057, - "793": 0.15777888894081116, - "794": 0.16458484530448914, - "795": 0.16985629498958588, - "796": 0.1632569581270218, - "797": 0.15049952268600464, - "798": 0.15547393262386322, - "799": 0.17240214347839355, - "800": 0.14596056938171387, - "801": 0.1486940234899521, - "802": 0.19244736433029175, - "803": 0.16164113581180573, - "804": 0.16381162405014038, - "805": 0.2064862698316574, - "806": 0.1532782018184662, - "807": 0.17337344586849213, - "808": 0.1766686886548996, - "809": 0.16784948110580444 - }, - "loss": { - "756": 2.4169013500213623, - "757": 2.422706365585327, - "758": 2.4225950241088867, - "759": 2.4113571643829346, - "760": 2.3900463581085205, - "761": 2.4160799980163574, - "762": 2.4278297424316406, - "763": 2.405268669128418, - "764": 2.39888858795166, - "765": 2.4069385528564453, - "766": 2.420778512954712, - "767": 2.415346145629883, - "768": 2.4281342029571533, - "769": 2.410311222076416, - "770": 2.4197232723236084, - "771": 2.402217388153076, - "772": 2.4118905067443848, - "773": 2.418978691101074, - "774": 2.433053970336914, - "775": 2.398118019104004, - "776": 2.3858675956726074, - "777": 2.3922033309936523, - "778": 2.4053726196289062, - "779": 2.4241819381713867, - "780": 2.4094724655151367, - "781": 2.401900291442871, - "782": 2.40262508392334, - "783": 2.429072856903076, - "784": 2.3976826667785645, - "785": 2.41078519821167, - "786": 2.4068260192871094, - "787": 2.4138550758361816, - "788": 2.4098353385925293, - "789": 2.4246461391448975, - "790": 2.3871753215789795, - "791": 2.40929913520813, - "792": 2.408252239227295, - "793": 2.3965396881103516, - "794": 2.4261937141418457, - "795": 2.399303436279297, - "796": 2.3932385444641113, - "797": 2.4012861251831055, - "798": 2.4015824794769287, - "799": 2.410233497619629, - "800": 2.3908591270446777, - "801": 2.392645835876465, - "802": 2.398653745651245, - "803": 2.3908660411834717, - "804": 2.402310848236084, - "805": 2.427328109741211, - "806": 2.410106658935547, - "807": 2.3900437355041504, - "808": 2.421699285507202, - "809": 2.4033288955688477 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "train_epoch_time": 4.789684534072876, - "train_loss": 2.4054617320105884, - "train_score": 0.31077048951276237, - "val_loss": 2.4350329337793433, - "val_score": 0.30439150391572106 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:33:13.281734", - "final_model_norm": 95.46870422363281, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:31:32.423702", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.3940119743347168, - "learning_rate": 4.64e-11, - "model_norm": 87.59406280517578, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.522193908691406, - "3": 8.844858169555664, - "4": 14.65994644165039, - "5": 4.685466289520264, - "6": 3.948777437210083, - "7": 3.7569050788879395, - "8": 6.890072345733643, - "9": 4.172502040863037, - "10": 4.804870128631592, - "11": 8.966069221496582, - "12": 6.553073883056641, - "13": 11.516484260559082, - "14": 33.73067855834961, - "15": 11.882366180419922, - "16": 4.092479705810547, - "17": 9.66065502166748, - "18": 52.35045623779297, - "19": 19.61064338684082, - "20": 36.703125, - "21": 16.85268211364746, - "22": 8.551225662231445, - "23": 6.886287689208984, - "24": 21.28398323059082, - "25": 9.168475151062012, - "26": 5.5196638107299805, - "27": 4.271469593048096, - "28": 6.910177230834961, - "29": 3.036736488342285, - "30": 12.238639831542969, - "31": 9.314115524291992, - "32": 4.548000335693359, - "33": 3.6965489387512207, - "34": 3.354375123977661, - "35": 3.745252847671509, - "36": 10.948062896728516, - "37": 13.788848876953125, - "38": 7.157576560974121, - "39": 3.2581188678741455, - "40": 2.4912819862365723, - "41": 8.811918258666992, - "42": 2.675812244415283, - "43": 1.9945507049560547, - "44": 1.610080599784851, - "45": 1.4955765008926392, - "46": 1.4362869262695312, - "47": 2.4751739501953125, - "48": 1.579730749130249, - "49": 2.897298574447632, - "50": 1.3345760107040405, - "51": 1.125062346458435, - "52": 2.776702404022217, - "53": 1.3940119743347168 - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.798859119415283, - "3": 3.903656005859375, - "4": 4.252171516418457, - "5": 4.753815650939941, - "6": 4.088006019592285, - "7": 3.6133923530578613, - "8": 3.781853675842285, - "9": 4.502691745758057, - "10": 3.767045497894287, - "11": 4.191555023193359, - "12": 5.594882011413574, - "13": 5.5175395011901855, - "14": 5.409316539764404, - "15": 5.792328834533691, - "16": 4.137396812438965, - "17": 4.846114158630371, - "18": 14.33014965057373, - "19": 13.757341384887695, - "20": 12.278861999511719, - "21": 14.776731491088867, - "22": 18.466028213500977, - "23": 15.10760498046875, - "24": 14.427495956420898, - "25": 16.616600036621094, - "26": 13.928690910339355, - "27": 10.716838836669922, - "28": 10.541747093200684, - "29": 8.431756973266602, - "30": 7.79141902923584, - "31": 13.087114334106445, - "32": 12.545670509338379, - "33": 9.743616104125977, - "34": 7.695677280426025, - "35": 5.937138557434082, - "36": 7.195171356201172, - "37": 13.259464263916016, - "38": 13.314964294433594, - "39": 10.051591873168945, - "40": 7.038140296936035, - "41": 7.468445777893066, - "42": 9.320440292358398, - "43": 7.310545444488525, - "44": 6.042028427124023, - "45": 5.097186088562012, - "46": 4.168429374694824, - "47": 3.7739603519439697, - "48": 4.4266486167907715, - "49": 3.9670939445495605, - "50": 4.661612510681152, - "51": 3.9164187908172607, - "52": 3.7672781944274902, - "53": 4.303747653961182 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "train_epoch_time": 4.789660692214966, - "train_loss": 3.8168039754952385, - "train_score": 0.15260827655515163, - "val_loss": 3.825397946941319, - "val_score": 0.15114182700861745 - }, - { - "epoch": 1, - "grad_norm": 0.514480471611023, - "learning_rate": 0.464, - "model_norm": 87.60951232910156, - "step_logs": { - "grad_norm": { - "54": 1.2567001581192017, - "55": 1.182987093925476, - "56": 0.7642905116081238, - "57": 0.809528648853302, - "58": 1.6764920949935913, - "59": 1.1489003896713257, - "60": 0.41451770067214966, - "61": 0.6309722661972046, - "62": 0.8026121258735657, - "63": 1.4508488178253174, - "64": 1.0871254205703735, - "65": 0.2284901887178421, - "66": 0.14119938015937805, - "67": 0.1006789430975914, - "68": 0.1068229004740715, - "69": 0.13464471697807312, - "70": 0.23608392477035522, - "71": 0.4231679439544678, - "72": 0.9119272232055664, - "73": 0.9914507269859314, - "74": 1.168287754058838, - "75": 0.9777126908302307, - "76": 0.44396352767944336, - "77": 0.526111364364624, - "78": 0.7891499400138855, - "79": 0.8528002500534058, - "80": 0.9843778014183044, - "81": 0.905493438243866, - "82": 0.642676591873169, - "83": 0.6600900292396545, - "84": 0.7537866830825806, - "85": 0.7418822050094604, - "86": 0.6859331727027893, - "87": 0.677162230014801, - "88": 0.6110721230506897, - "89": 0.6294000148773193, - "90": 0.6997990012168884, - "91": 0.6818952560424805, - "92": 0.6383385062217712, - "93": 0.646510124206543, - "94": 0.6488845944404602, - "95": 0.6384627223014832, - "96": 0.5880641937255859, - "97": 0.5724895596504211, - "98": 0.5346968173980713, - "99": 0.47721046209335327, - "100": 0.3965725898742676, - "101": 0.4267416000366211, - "102": 0.4850084185600281, - "103": 0.5026825070381165, - "104": 0.5574753880500793, - "105": 0.5630974769592285, - "106": 0.5525953769683838, - "107": 0.514480471611023 - }, - "loss": { - "54": 3.829538345336914, - "55": 3.900240421295166, - "56": 3.4838783740997314, - "57": 3.4705233573913574, - "58": 3.5344412326812744, - "59": 3.915027618408203, - "60": 3.4037322998046875, - "61": 3.3808951377868652, - "62": 3.448050022125244, - "63": 3.5262856483459473, - "64": 3.7423739433288574, - "65": 3.374730110168457, - "66": 3.3496668338775635, - "67": 3.3476765155792236, - "68": 3.3487114906311035, - "69": 3.3233907222747803, - "70": 3.354370594024658, - "71": 3.3133368492126465, - "72": 3.3949131965637207, - "73": 3.532787799835205, - "74": 3.4515738487243652, - "75": 3.6085033416748047, - "76": 3.355710983276367, - "77": 3.3929359912872314, - "78": 3.385227680206299, - "79": 3.442448139190674, - "80": 3.383800745010376, - "81": 3.5130300521850586, - "82": 3.344487190246582, - "83": 3.4356167316436768, - "84": 3.4065611362457275, - "85": 3.4435362815856934, - "86": 3.3797593116760254, - "87": 3.4136552810668945, - "88": 3.3321127891540527, - "89": 3.4122955799102783, - "90": 3.3863933086395264, - "91": 3.3977856636047363, - "92": 3.367650270462036, - "93": 3.388272285461426, - "94": 3.3971071243286133, - "95": 3.394378185272217, - "96": 3.3742644786834717, - "97": 3.3986124992370605, - "98": 3.3466382026672363, - "99": 3.391096591949463, - "100": 3.3336567878723145, - "101": 3.362348794937134, - "102": 3.341663360595703, - "103": 3.3433074951171875, - "104": 3.3885936737060547, - "105": 3.3710618019104004, - "106": 3.3358426094055176, - "107": 3.35544490814209 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "train_epoch_time": 4.787289142608643, - "train_loss": 3.3426029892870823, - "train_score": 0.15260827655515163, - "val_loss": 3.3629409692043826, - "val_score": 0.15114182700861745 - }, - { - "epoch": 2, - "grad_norm": 0.443915456533432, - "learning_rate": 0.464, - "model_norm": 87.62141418457031, - "step_logs": { - "grad_norm": { - "108": 0.42400360107421875, - "109": 0.4330315887928009, - "110": 0.48329660296440125, - "111": 0.5137515068054199, - "112": 0.5598971247673035, - "113": 0.531995952129364, - "114": 0.48908567428588867, - "115": 0.48043346405029297, - "116": 0.45190975069999695, - "117": 0.4398237466812134, - "118": 0.4266398549079895, - "119": 0.4553142189979553, - "120": 0.5500909090042114, - "121": 0.5831237435340881, - "122": 0.5732588171958923, - "123": 0.6220180988311768, - "124": 0.6419893503189087, - "125": 0.6264800429344177, - "126": 0.5333755016326904, - "127": 0.4559817612171173, - "128": 0.42108386754989624, - "129": 0.3868430256843567, - "130": 0.3417876958847046, - "131": 0.3228040933609009, - "132": 0.30709001421928406, - "133": 0.3044731616973877, - "134": 0.3061954975128174, - "135": 0.3246074616909027, - "136": 0.3661918640136719, - "137": 0.39639613032341003, - "138": 0.46898069977760315, - "139": 0.47347989678382874, - "140": 0.4593355059623718, - "141": 0.4557546079158783, - "142": 0.45358777046203613, - "143": 0.47689199447631836, - "144": 0.4844415485858917, - "145": 0.49156394600868225, - "146": 0.45105409622192383, - "147": 0.3989025950431824, - "148": 0.3486165702342987, - "149": 0.3875811994075775, - "150": 0.3904741704463959, - "151": 0.3574006259441376, - "152": 0.37521421909332275, - "153": 0.4080508053302765, - "154": 0.3980672061443329, - "155": 0.3725684881210327, - "156": 0.3599435091018677, - "157": 0.38388964533805847, - "158": 0.42521315813064575, - "159": 0.4154224991798401, - "160": 0.4152551293373108, - "161": 0.443915456533432 - }, - "loss": { - "108": 3.312521457672119, - "109": 3.34661865234375, - "110": 3.3654656410217285, - "111": 3.321208953857422, - "112": 3.337642192840576, - "113": 3.395747184753418, - "114": 3.378725290298462, - "115": 3.363778591156006, - "116": 3.3873653411865234, - "117": 3.3523335456848145, - "118": 3.3114991188049316, - "119": 3.335526943206787, - "120": 3.366734504699707, - "121": 3.3786239624023438, - "122": 3.337869167327881, - "123": 3.3490445613861084, - "124": 3.370058536529541, - "125": 3.3664517402648926, - "126": 3.372699022293091, - "127": 3.3372371196746826, - "128": 3.3657479286193848, - "129": 3.3951103687286377, - "130": 3.3745837211608887, - "131": 3.336942672729492, - "132": 3.3377182483673096, - "133": 3.339109420776367, - "134": 3.324085235595703, - "135": 3.3301825523376465, - "136": 3.365748405456543, - "137": 3.3036351203918457, - "138": 3.3139443397521973, - "139": 3.380897045135498, - "140": 3.328117609024048, - "141": 3.3355884552001953, - "142": 3.3178296089172363, - "143": 3.346303939819336, - "144": 3.3225009441375732, - "145": 3.3662266731262207, - "146": 3.3394882678985596, - "147": 3.335334300994873, - "148": 3.3007912635803223, - "149": 3.331977367401123, - "150": 3.3437604904174805, - "151": 3.3364009857177734, - "152": 3.329472064971924, - "153": 3.3540613651275635, - "154": 3.3724470138549805, - "155": 3.344165802001953, - "156": 3.34220027923584, - "157": 3.3188018798828125, - "158": 3.321608543395996, - "159": 3.3312065601348877, - "160": 3.315763235092163, - "161": 3.3427977561950684 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "train_epoch_time": 4.787276983261108, - "train_loss": 3.3392115976068175, - "train_score": 0.15260827655515163, - "val_loss": 3.35697967176733, - "val_score": 0.15114182700861745 - }, - { - "epoch": 3, - "grad_norm": 0.4752013683319092, - "learning_rate": 0.464, - "model_norm": 87.64305877685547, - "step_logs": { - "grad_norm": { - "162": 0.453897088766098, - "163": 0.4476003646850586, - "164": 0.42216330766677856, - "165": 0.3942452669143677, - "166": 0.36772090196609497, - "167": 0.3478882908821106, - "168": 0.33396175503730774, - "169": 0.32168522477149963, - "170": 0.333763062953949, - "171": 0.32779353857040405, - "172": 0.3211841583251953, - "173": 0.30887407064437866, - "174": 0.3044631779193878, - "175": 0.3327837586402893, - "176": 0.37369370460510254, - "177": 0.3706324100494385, - "178": 0.37023842334747314, - "179": 0.3882354497909546, - "180": 0.42095234990119934, - "181": 0.41753268241882324, - "182": 0.37341630458831787, - "183": 0.39300069212913513, - "184": 0.42618292570114136, - "185": 0.4241190552711487, - "186": 0.43219563364982605, - "187": 0.43676042556762695, - "188": 0.43012484908103943, - "189": 0.42461398243904114, - "190": 0.40710654854774475, - "191": 0.39479538798332214, - "192": 0.37511155009269714, - "193": 0.3561560809612274, - "194": 0.3406989276409149, - "195": 0.33156925439834595, - "196": 0.3386594355106354, - "197": 0.35771849751472473, - "198": 0.35407596826553345, - "199": 0.3526514768600464, - "200": 0.36908161640167236, - "201": 0.39491814374923706, - "202": 0.3781105875968933, - "203": 0.3868880867958069, - "204": 0.3794122636318207, - "205": 0.3846893012523651, - "206": 0.3612799048423767, - "207": 0.38112694025039673, - "208": 0.4422636926174164, - "209": 0.48456239700317383, - "210": 0.5463977456092834, - "211": 0.5163854956626892, - "212": 0.46022534370422363, - "213": 0.4628199636936188, - "214": 0.4827488958835602, - "215": 0.4752013683319092 - }, - "loss": { - "162": 3.3317155838012695, - "163": 3.3516845703125, - "164": 3.3407416343688965, - "165": 3.359863042831421, - "166": 3.3250513076782227, - "167": 3.3077006340026855, - "168": 3.3049709796905518, - "169": 3.328124523162842, - "170": 3.3555619716644287, - "171": 3.3292555809020996, - "172": 3.3281049728393555, - "173": 3.328152656555176, - "174": 3.324321746826172, - "175": 3.3027453422546387, - "176": 3.325338840484619, - "177": 3.360889196395874, - "178": 3.3480138778686523, - "179": 3.32133150100708, - "180": 3.3484320640563965, - "181": 3.360474109649658, - "182": 3.3174314498901367, - "183": 3.320765733718872, - "184": 3.3069543838500977, - "185": 3.336766004562378, - "186": 3.3459224700927734, - "187": 3.3552560806274414, - "188": 3.32393217086792, - "189": 3.366337299346924, - "190": 3.3016326427459717, - "191": 3.333552122116089, - "192": 3.311038017272949, - "193": 3.3524298667907715, - "194": 3.339522361755371, - "195": 3.3452847003936768, - "196": 3.356109619140625, - "197": 3.3189916610717773, - "198": 3.2981154918670654, - "199": 3.320248603820801, - "200": 3.312007188796997, - "201": 3.3280959129333496, - "202": 3.332509756088257, - "203": 3.356332778930664, - "204": 3.322726249694824, - "205": 3.3292572498321533, - "206": 3.3012309074401855, - "207": 3.2981183528900146, - "208": 3.312960147857666, - "209": 3.3088507652282715, - "210": 3.311923027038574, - "211": 3.3157012462615967, - "212": 3.2933835983276367, - "213": 3.2858994007110596, - "214": 3.3187434673309326, - "215": 3.2820088863372803 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "train_epoch_time": 4.7873804569244385, - "train_loss": 3.2897771251766037, - "train_score": 0.15260379305443428, - "val_loss": 3.310802425500583, - "val_score": 0.15114631179621793 - }, - { - "epoch": 4, - "grad_norm": 0.5237197279930115, - "learning_rate": 0.464, - "model_norm": 87.70458984375, - "step_logs": { - "grad_norm": { - "216": 0.44777214527130127, - "217": 0.48899611830711365, - "218": 0.5638218522071838, - "219": 0.5643922686576843, - "220": 0.5390493869781494, - "221": 0.5048683881759644, - "222": 0.4525969624519348, - "223": 0.45847010612487793, - "224": 0.4864107370376587, - "225": 4.742197036743164, - "226": 0.8389106392860413, - "227": 0.686756432056427, - "228": 0.4833715856075287, - "229": 0.4567817449569702, - "230": 0.42351213097572327, - "231": 0.4084383547306061, - "232": 0.3881327211856842, - "233": 0.3714514970779419, - "234": 0.35957810282707214, - "235": 0.356612890958786, - "236": 0.3716936707496643, - "237": 0.4051712453365326, - "238": 0.4738687574863434, - "239": 0.5021004676818848, - "240": 0.5018496513366699, - "241": 0.46320876479148865, - "242": 0.42263951897621155, - "243": 0.4185473322868347, - "244": 0.9376572966575623, - "245": 0.4129084646701813, - "246": 0.4135898947715759, - "247": 0.41051092743873596, - "248": 0.4149532914161682, - "249": 0.632099449634552, - "250": 0.5338677167892456, - "251": 0.44909510016441345, - "252": 0.38973763585090637, - "253": 0.3813479244709015, - "254": 0.37471944093704224, - "255": 0.40992334485054016, - "256": 0.4109664559364319, - "257": 0.4014109969139099, - "258": 0.36270540952682495, - "259": 0.36905181407928467, - "260": 0.3724833130836487, - "261": 0.37833690643310547, - "262": 0.37102121114730835, - "263": 0.36926594376564026, - "264": 0.3439177870750427, - "265": 0.3319527506828308, - "266": 0.3107961118221283, - "267": 0.3148708939552307, - "268": 0.33677348494529724, - "269": 0.5237197279930115 - }, - "loss": { - "216": 3.270552396774292, - "217": 3.2687864303588867, - "218": 3.291767120361328, - "219": 3.3108158111572266, - "220": 3.3068325519561768, - "221": 3.290863513946533, - "222": 3.2585535049438477, - "223": 3.255937337875366, - "224": 3.250828742980957, - "225": 3.306823253631592, - "226": 3.3667616844177246, - "227": 3.344569206237793, - "228": 3.286442279815674, - "229": 3.317892074584961, - "230": 3.2904434204101562, - "231": 3.2846639156341553, - "232": 3.2664108276367188, - "233": 3.2574217319488525, - "234": 3.227783441543579, - "235": 3.2278926372528076, - "236": 3.220323085784912, - "237": 3.224609851837158, - "238": 3.210706949234009, - "239": 3.242043972015381, - "240": 3.178334951400757, - "241": 3.2063045501708984, - "242": 3.2051327228546143, - "243": 3.2179551124572754, - "244": 3.183863639831543, - "245": 3.1867003440856934, - "246": 3.183915138244629, - "247": 3.1895570755004883, - "248": 3.172560453414917, - "249": 3.1769180297851562, - "250": 3.171640396118164, - "251": 3.2026853561401367, - "252": 3.193422794342041, - "253": 3.1722874641418457, - "254": 3.1597189903259277, - "255": 3.1747875213623047, - "256": 3.1717894077301025, - "257": 3.1529808044433594, - "258": 3.1525895595550537, - "259": 3.1705822944641113, - "260": 3.121812582015991, - "261": 3.1705031394958496, - "262": 3.1393752098083496, - "263": 3.144162654876709, - "264": 3.157045364379883, - "265": 3.1581740379333496, - "266": 3.1955366134643555, - "267": 3.166224956512451, - "268": 3.156071424484253, - "269": 3.1477420330047607 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "train_epoch_time": 4.787662506103516, - "train_loss": 3.1613572962143843, - "train_score": 0.1667313037763538, - "val_loss": 3.181593383631394, - "val_score": 0.16378444296347425 - }, - { - "epoch": 5, - "grad_norm": 0.35353803634643555, - "learning_rate": 0.464, - "model_norm": 87.74584197998047, - "step_logs": { - "grad_norm": { - "270": 0.42171791195869446, - "271": 0.3841199278831482, - "272": 0.3562532961368561, - "273": 0.32652315497398376, - "274": 0.2897869348526001, - "275": 0.30895140767097473, - "276": 0.32444262504577637, - "277": 0.31616899371147156, - "278": 0.3206145763397217, - "279": 0.3470594882965088, - "280": 0.3585662543773651, - "281": 0.3872945308685303, - "282": 0.39947181940078735, - "283": 0.37214016914367676, - "284": 0.37557560205459595, - "285": 0.44474145770072937, - "286": 0.38327527046203613, - "287": 0.33758410811424255, - "288": 0.3330875635147095, - "289": 1.0608270168304443, - "290": 5.615816593170166, - "291": 0.714798092842102, - "292": 0.470763623714447, - "293": 0.44693851470947266, - "294": 0.45375385880470276, - "295": 0.42669814825057983, - "296": 0.4325542449951172, - "297": 0.4207967519760132, - "298": 0.44864749908447266, - "299": 0.3580751121044159, - "300": 0.3035534918308258, - "301": 0.26924169063568115, - "302": 0.25898274779319763, - "303": 0.2553577423095703, - "304": 0.3023153841495514, - "305": 0.3230116665363312, - "306": 0.3489471673965454, - "307": 0.345846951007843, - "308": 0.3564895689487457, - "309": 0.32305246591567993, - "310": 0.3027469515800476, - "311": 0.2787008583545685, - "312": 0.2635951340198517, - "313": 0.27753347158432007, - "314": 0.27476975321769714, - "315": 0.23849482834339142, - "316": 0.2239506095647812, - "317": 0.23103581368923187, - "318": 0.2659413814544678, - "319": 0.2500379681587219, - "320": 0.22238916158676147, - "321": 0.2682419419288635, - "322": 0.34568262100219727, - "323": 0.35353803634643555 - }, - "loss": { - "270": 3.1640477180480957, - "271": 3.1651968955993652, - "272": 3.141981363296509, - "273": 3.1493163108825684, - "274": 3.122767448425293, - "275": 3.135230541229248, - "276": 3.1407628059387207, - "277": 3.1474385261535645, - "278": 3.143263339996338, - "279": 3.1593728065490723, - "280": 3.1396913528442383, - "281": 3.1290547847747803, - "282": 3.1341495513916016, - "283": 3.138803720474243, - "284": 3.1228044033050537, - "285": 3.1064062118530273, - "286": 3.1627774238586426, - "287": 3.128211498260498, - "288": 3.114041328430176, - "289": 3.1342344284057617, - "290": 3.5254242420196533, - "291": 3.417147636413574, - "292": 3.3069357872009277, - "293": 3.2699406147003174, - "294": 3.2923851013183594, - "295": 3.287499189376831, - "296": 3.2823872566223145, - "297": 3.2895798683166504, - "298": 3.2938082218170166, - "299": 3.253305435180664, - "300": 3.238431692123413, - "301": 3.2401161193847656, - "302": 3.2279105186462402, - "303": 3.221372127532959, - "304": 3.2219200134277344, - "305": 3.2237117290496826, - "306": 3.2213127613067627, - "307": 3.208238363265991, - "308": 3.1887784004211426, - "309": 3.1917471885681152, - "310": 3.190892219543457, - "311": 3.184481382369995, - "312": 3.178776741027832, - "313": 3.1682095527648926, - "314": 3.1639552116394043, - "315": 3.1805777549743652, - "316": 3.1627583503723145, - "317": 3.1545886993408203, - "318": 3.1599767208099365, - "319": 3.1733086109161377, - "320": 3.1594300270080566, - "321": 3.1545329093933105, - "322": 3.1688942909240723, - "323": 3.1808645725250244 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "train_epoch_time": 4.787666082382202, - "train_loss": 3.152705482294091, - "train_score": 0.141473502497933, - "val_loss": 3.1700148270406623, - "val_score": 0.1428001221401957 - }, - { - "epoch": 6, - "grad_norm": 0.501187801361084, - "learning_rate": 0.464, - "model_norm": 87.79143524169922, - "step_logs": { - "grad_norm": { - "324": 0.28567054867744446, - "325": 0.27429282665252686, - "326": 0.28167828917503357, - "327": 0.32508182525634766, - "328": 0.3271867036819458, - "329": 0.35623452067375183, - "330": 0.4134608507156372, - "331": 0.44421863555908203, - "332": 0.4447523355484009, - "333": 0.4654664695262909, - "334": 0.4743514657020569, - "335": 0.507012128829956, - "336": 0.5169128775596619, - "337": 0.5209842920303345, - "338": 0.5336570739746094, - "339": 0.4899309277534485, - "340": 0.42174527049064636, - "341": 0.4082046151161194, - "342": 0.4352353513240814, - "343": 0.4328165650367737, - "344": 0.4127974510192871, - "345": 0.3974686861038208, - "346": 0.40610817074775696, - "347": 0.3848150670528412, - "348": 0.38284167647361755, - "349": 0.4253147542476654, - "350": 0.4837416112422943, - "351": 0.49535873532295227, - "352": 0.6893389225006104, - "353": 0.536414384841919, - "354": 0.4423315227031708, - "355": 0.624416172504425, - "356": 0.43107542395591736, - "357": 0.45929062366485596, - "358": 0.4513593912124634, - "359": 0.446321576833725, - "360": 0.4107964336872101, - "361": 0.39709919691085815, - "362": 0.4834073781967163, - "363": 0.5084345936775208, - "364": 0.5302790403366089, - "365": 0.4411664605140686, - "366": 0.3516263961791992, - "367": 0.31146740913391113, - "368": 0.3488292396068573, - "369": 0.4345119595527649, - "370": 0.37957683205604553, - "371": 0.41497910022735596, - "372": 0.37643009424209595, - "373": 0.4022415280342102, - "374": 0.46977072954177856, - "375": 0.5382270812988281, - "376": 0.4984232783317566, - "377": 0.501187801361084 - }, - "loss": { - "324": 3.150212287902832, - "325": 3.1622352600097656, - "326": 3.155548095703125, - "327": 3.1390395164489746, - "328": 3.141885280609131, - "329": 3.13614559173584, - "330": 3.1545543670654297, - "331": 3.1406188011169434, - "332": 3.1378555297851562, - "333": 3.1276278495788574, - "334": 3.1326255798339844, - "335": 3.115692615509033, - "336": 3.1312341690063477, - "337": 3.1147375106811523, - "338": 3.147418975830078, - "339": 3.1518070697784424, - "340": 3.1034297943115234, - "341": 3.114619255065918, - "342": 3.1150705814361572, - "343": 3.0952627658843994, - "344": 3.101466655731201, - "345": 3.1012301445007324, - "346": 3.1000776290893555, - "347": 3.0812013149261475, - "348": 3.0796804428100586, - "349": 3.0606911182403564, - "350": 3.061680793762207, - "351": 3.0804953575134277, - "352": 3.1108388900756836, - "353": 3.0975003242492676, - "354": 3.068204402923584, - "355": 3.050950765609741, - "356": 3.04215931892395, - "357": 3.069270610809326, - "358": 3.0697364807128906, - "359": 3.0631752014160156, - "360": 3.0354509353637695, - "361": 3.038780927658081, - "362": 3.048603057861328, - "363": 3.052165985107422, - "364": 3.0707545280456543, - "365": 3.051659345626831, - "366": 3.0290098190307617, - "367": 3.0111002922058105, - "368": 3.006466865539551, - "369": 3.0310111045837402, - "370": 3.0067758560180664, - "371": 3.016589641571045, - "372": 3.006666660308838, - "373": 2.9676527976989746, - "374": 2.9829344749450684, - "375": 3.001209259033203, - "376": 2.973054885864258, - "377": 2.9742369651794434 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "train_epoch_time": 4.788262367248535, - "train_loss": 2.9948131232897897, - "train_score": 0.15421224891540822, - "val_loss": 3.021159153444485, - "val_score": 0.1515768514400782 - }, - { - "epoch": 7, - "grad_norm": 0.42807021737098694, - "learning_rate": 0.464, - "model_norm": 87.8177261352539, - "step_logs": { - "grad_norm": { - "378": 0.5912654995918274, - "379": 0.5346393585205078, - "380": 0.4574964940547943, - "381": 0.4305146038532257, - "382": 0.4266318082809448, - "383": 0.4264521598815918, - "384": 0.4047684371471405, - "385": 0.4431267976760864, - "386": 0.5706725120544434, - "387": 0.5285161137580872, - "388": 0.49054163694381714, - "389": 0.45295250415802, - "390": 0.4346877932548523, - "391": 0.4331419765949249, - "392": 0.5483351945877075, - "393": 0.5667582154273987, - "394": 0.524700403213501, - "395": 0.4524461328983307, - "396": 0.44014936685562134, - "397": 0.4243784248828888, - "398": 0.4710800051689148, - "399": 0.48999878764152527, - "400": 0.48598068952560425, - "401": 0.516622006893158, - "402": 0.5722930431365967, - "403": 0.5258074998855591, - "404": 0.49011051654815674, - "405": 0.47478213906288147, - "406": 0.481942743062973, - "407": 0.475354939699173, - "408": 0.4578336477279663, - "409": 0.4217711389064789, - "410": 0.4343506097793579, - "411": 0.4680066704750061, - "412": 0.514274537563324, - "413": 0.7060795426368713, - "414": 0.7704524397850037, - "415": 0.5010085701942444, - "416": 0.4054775536060333, - "417": 0.3802575170993805, - "418": 0.41707009077072144, - "419": 0.6428954005241394, - "420": 0.5348367691040039, - "421": 0.38486918807029724, - "422": 0.369230180978775, - "423": 0.5235440731048584, - "424": 0.4992254972457886, - "425": 0.3032305836677551, - "426": 0.26986604928970337, - "427": 0.3310961425304413, - "428": 0.36691445112228394, - "429": 0.4646666347980499, - "430": 0.4643303453922272, - "431": 0.42807021737098694 - }, - "loss": { - "378": 3.010556221008301, - "379": 2.9800565242767334, - "380": 2.991891384124756, - "381": 2.9811596870422363, - "382": 3.013350486755371, - "383": 2.9468960762023926, - "384": 2.9682846069335938, - "385": 2.9865987300872803, - "386": 2.9837841987609863, - "387": 2.992126941680908, - "388": 2.981083393096924, - "389": 2.9554102420806885, - "390": 2.96353816986084, - "391": 2.9641830921173096, - "392": 2.995023727416992, - "393": 2.978753089904785, - "394": 2.9782819747924805, - "395": 2.969633102416992, - "396": 2.9756650924682617, - "397": 2.9608676433563232, - "398": 2.957498073577881, - "399": 2.9687013626098633, - "400": 2.984408378601074, - "401": 2.962040424346924, - "402": 2.9563395977020264, - "403": 2.9446516036987305, - "404": 2.979127883911133, - "405": 2.95468807220459, - "406": 2.9572196006774902, - "407": 2.9507298469543457, - "408": 2.9528446197509766, - "409": 2.9478797912597656, - "410": 2.9589197635650635, - "411": 2.931971549987793, - "412": 2.955918312072754, - "413": 2.958726644515991, - "414": 2.9883460998535156, - "415": 2.953390121459961, - "416": 2.9446823596954346, - "417": 2.949601650238037, - "418": 2.9266226291656494, - "419": 2.940394401550293, - "420": 2.976118564605713, - "421": 2.925241470336914, - "422": 2.937685489654541, - "423": 2.9466919898986816, - "424": 2.9619574546813965, - "425": 2.9219822883605957, - "426": 2.9320974349975586, - "427": 2.918477773666382, - "428": 2.9186582565307617, - "429": 2.9233531951904297, - "430": 2.9192821979522705, - "431": 2.9219703674316406 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "train_epoch_time": 4.788901329040527, - "train_loss": 2.92969334436797, - "train_score": 0.17221462515796104, - "val_loss": 2.9486451439140313, - "val_score": 0.1700272675599351 - }, - { - "epoch": 8, - "grad_norm": 0.39331939816474915, - "learning_rate": 0.464, - "model_norm": 87.84315490722656, - "step_logs": { - "grad_norm": { - "432": 0.40006187558174133, - "433": 0.36844465136528015, - "434": 0.3952924609184265, - "435": 0.48806363344192505, - "436": 0.5495551228523254, - "437": 0.668045699596405, - "438": 0.6224932074546814, - "439": 0.4230979084968567, - "440": 0.3479797840118408, - "441": 0.38340625166893005, - "442": 0.3424837291240692, - "443": 0.40772682428359985, - "444": 0.47499582171440125, - "445": 0.5305530428886414, - "446": 0.5541837811470032, - "447": 0.7858777046203613, - "448": 0.5743862390518188, - "449": 0.5429847240447998, - "450": 0.5184003710746765, - "451": 0.5168136358261108, - "452": 0.5578228235244751, - "453": 0.535378098487854, - "454": 0.4767427444458008, - "455": 0.41185852885246277, - "456": 0.3904164433479309, - "457": 0.39373108744621277, - "458": 0.3877347409725189, - "459": 0.35361406207084656, - "460": 0.3541043996810913, - "461": 0.35564976930618286, - "462": 0.37229400873184204, - "463": 0.36379274725914, - "464": 0.35928118228912354, - "465": 0.3857515752315521, - "466": 0.42022520303726196, - "467": 0.4710654020309448, - "468": 0.471584677696228, - "469": 0.5499950647354126, - "470": 0.5296297073364258, - "471": 0.5159841179847717, - "472": 0.6447048783302307, - "473": 0.4973290264606476, - "474": 0.4191618859767914, - "475": 0.28815633058547974, - "476": 0.2634706199169159, - "477": 0.2851393222808838, - "478": 0.3108154535293579, - "479": 0.3626827895641327, - "480": 0.4542961120605469, - "481": 0.4695046544075012, - "482": 0.4535657465457916, - "483": 0.44085997343063354, - "484": 0.408387690782547, - "485": 0.39331939816474915 - }, - "loss": { - "432": 2.9427413940429688, - "433": 2.8971974849700928, - "434": 2.9285292625427246, - "435": 2.9271202087402344, - "436": 2.9371798038482666, - "437": 2.936964511871338, - "438": 2.949596881866455, - "439": 2.9100966453552246, - "440": 2.9197194576263428, - "441": 2.9102163314819336, - "442": 2.9230966567993164, - "443": 2.91611909866333, - "444": 2.9378106594085693, - "445": 2.9030349254608154, - "446": 2.932502269744873, - "447": 2.9274721145629883, - "448": 2.95884370803833, - "449": 2.934290885925293, - "450": 2.9383561611175537, - "451": 2.925407886505127, - "452": 2.938593864440918, - "453": 2.9345874786376953, - "454": 2.9134750366210938, - "455": 2.8959689140319824, - "456": 2.9116811752319336, - "457": 2.9018585681915283, - "458": 2.914478063583374, - "459": 2.8870625495910645, - "460": 2.8916149139404297, - "461": 2.899754524230957, - "462": 2.930670976638794, - "463": 2.8785715103149414, - "464": 2.8861770629882812, - "465": 2.9090828895568848, - "466": 2.897146701812744, - "467": 2.8996613025665283, - "468": 2.924686908721924, - "469": 2.906843662261963, - "470": 2.9266715049743652, - "471": 2.92411208152771, - "472": 2.9472403526306152, - "473": 2.916522979736328, - "474": 2.924605369567871, - "475": 2.8895838260650635, - "476": 2.8787221908569336, - "477": 2.876397132873535, - "478": 2.8896255493164062, - "479": 2.876448392868042, - "480": 2.9053635597229004, - "481": 2.9030919075012207, - "482": 2.9148945808410645, - "483": 2.9046378135681152, - "484": 2.9086437225341797, - "485": 2.9065005779266357 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "train_epoch_time": 4.787418603897095, - "train_loss": 2.8975904314897667, - "train_score": 0.16385401723802176, - "val_loss": 2.91943799368139, - "val_score": 0.16317451219197393 - }, - { - "epoch": 9, - "grad_norm": 0.30390891432762146, - "learning_rate": 0.464, - "model_norm": 87.86610412597656, - "step_logs": { - "grad_norm": { - "486": 0.3884833753108978, - "487": 0.36869633197784424, - "488": 0.419179767370224, - "489": 0.44545257091522217, - "490": 0.42153453826904297, - "491": 0.4108070433139801, - "492": 0.43754008412361145, - "493": 0.3962385356426239, - "494": 0.3834441304206848, - "495": 0.3728509843349457, - "496": 0.36539870500564575, - "497": 0.3631349802017212, - "498": 0.36462244391441345, - "499": 0.3862491250038147, - "500": 0.3902868330478668, - "501": 0.40250545740127563, - "502": 0.38569462299346924, - "503": 0.3716394007205963, - "504": 0.38129302859306335, - "505": 0.3877372443675995, - "506": 0.41685324907302856, - "507": 0.4101344048976898, - "508": 0.40829846262931824, - "509": 0.3952760696411133, - "510": 0.3784595727920532, - "511": 0.37070387601852417, - "512": 0.3860388696193695, - "513": 0.39683663845062256, - "514": 0.40479129552841187, - "515": 0.42428261041641235, - "516": 0.4738938808441162, - "517": 0.46346062421798706, - "518": 0.43982386589050293, - "519": 0.3877086639404297, - "520": 0.35644033551216125, - "521": 0.35609158873558044, - "522": 0.376585990190506, - "523": 0.35381796956062317, - "524": 0.366868793964386, - "525": 0.32213616371154785, - "526": 0.29177966713905334, - "527": 0.3044784367084503, - "528": 0.4109605550765991, - "529": 0.30389589071273804, - "530": 0.2883964478969574, - "531": 0.29107925295829773, - "532": 0.30268076062202454, - "533": 0.31257936358451843, - "534": 0.3079054057598114, - "535": 0.331559419631958, - "536": 0.2933565378189087, - "537": 0.31729722023010254, - "538": 0.3078087568283081, - "539": 0.30390891432762146 - }, - "loss": { - "486": 2.891432285308838, - "487": 2.895451068878174, - "488": 2.894822120666504, - "489": 2.8899145126342773, - "490": 2.9043304920196533, - "491": 2.9070327281951904, - "492": 2.9097061157226562, - "493": 2.8964169025421143, - "494": 2.8906633853912354, - "495": 2.8931174278259277, - "496": 2.8730173110961914, - "497": 2.892716407775879, - "498": 2.8784871101379395, - "499": 2.896024227142334, - "500": 2.8959951400756836, - "501": 2.883206367492676, - "502": 2.8832459449768066, - "503": 2.909162998199463, - "504": 2.8844075202941895, - "505": 2.8829445838928223, - "506": 2.8923609256744385, - "507": 2.862466812133789, - "508": 2.887761354446411, - "509": 2.8792409896850586, - "510": 2.8858485221862793, - "511": 2.8859100341796875, - "512": 2.885378122329712, - "513": 2.8858842849731445, - "514": 2.9066884517669678, - "515": 2.885735511779785, - "516": 2.89491868019104, - "517": 2.87711238861084, - "518": 2.8932738304138184, - "519": 2.8749780654907227, - "520": 2.872653007507324, - "521": 2.8594064712524414, - "522": 2.8818066120147705, - "523": 2.8652231693267822, - "524": 2.8875558376312256, - "525": 2.8826684951782227, - "526": 2.8749003410339355, - "527": 2.860959053039551, - "528": 2.8698220252990723, - "529": 2.868011474609375, - "530": 2.8935108184814453, - "531": 2.8524913787841797, - "532": 2.8598310947418213, - "533": 2.8692216873168945, - "534": 2.865520715713501, - "535": 2.860454559326172, - "536": 2.8765625953674316, - "537": 2.871222972869873, - "538": 2.8787503242492676, - "539": 2.852886199951172 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "train_epoch_time": 4.788470983505249, - "train_loss": 2.8674550178232288, - "train_score": 0.20611885760829302, - "val_loss": 2.8892866270413493, - "val_score": 0.19857294099612843 - }, - { - "epoch": 10, - "grad_norm": 0.3852273225784302, - "learning_rate": 0.464, - "model_norm": 87.88851165771484, - "step_logs": { - "grad_norm": { - "540": 0.31072601675987244, - "541": 0.33412304520606995, - "542": 0.3782663643360138, - "543": 0.4295848309993744, - "544": 0.47475501894950867, - "545": 0.45222991704940796, - "546": 0.4470860958099365, - "547": 0.4074265658855438, - "548": 0.3899801969528198, - "549": 0.37065261602401733, - "550": 0.35630953311920166, - "551": 0.33831164240837097, - "552": 0.33550557494163513, - "553": 0.32936620712280273, - "554": 0.30802252888679504, - "555": 0.3193916082382202, - "556": 0.3542165756225586, - "557": 0.4319803714752197, - "558": 0.44829341769218445, - "559": 0.454242467880249, - "560": 0.468975692987442, - "561": 0.46344709396362305, - "562": 0.4485126733779907, - "563": 0.3968540132045746, - "564": 0.3787069618701935, - "565": 0.3727272152900696, - "566": 0.3637979030609131, - "567": 0.3713497519493103, - "568": 0.3550596237182617, - "569": 0.31624555587768555, - "570": 0.3268010914325714, - "571": 0.3223540484905243, - "572": 0.32249465584754944, - "573": 0.33169621229171753, - "574": 0.32700756192207336, - "575": 0.328964501619339, - "576": 0.3778802156448364, - "577": 0.44009077548980713, - "578": 0.4457842707633972, - "579": 0.4381271004676819, - "580": 0.4028886556625366, - "581": 0.3914549648761749, - "582": 0.3791936933994293, - "583": 0.37189045548439026, - "584": 0.4217931032180786, - "585": 0.442810595035553, - "586": 0.40438714623451233, - "587": 0.41019293665885925, - "588": 0.3906777799129486, - "589": 0.3975141644477844, - "590": 0.4016437828540802, - "591": 0.39046815037727356, - "592": 0.38027289509773254, - "593": 0.3852273225784302 - }, - "loss": { - "540": 2.8652682304382324, - "541": 2.856447696685791, - "542": 2.872041702270508, - "543": 2.8867805004119873, - "544": 2.8739094734191895, - "545": 2.892620086669922, - "546": 2.877148151397705, - "547": 2.8690030574798584, - "548": 2.8649845123291016, - "549": 2.864064931869507, - "550": 2.863469123840332, - "551": 2.876431941986084, - "552": 2.8608522415161133, - "553": 2.8575010299682617, - "554": 2.8603713512420654, - "555": 2.8452115058898926, - "556": 2.864285945892334, - "557": 2.865006685256958, - "558": 2.862560272216797, - "559": 2.87034273147583, - "560": 2.8586273193359375, - "561": 2.8775405883789062, - "562": 2.8730568885803223, - "563": 2.8686351776123047, - "564": 2.865934371948242, - "565": 2.850599527359009, - "566": 2.8591935634613037, - "567": 2.8511219024658203, - "568": 2.8655948638916016, - "569": 2.8376872539520264, - "570": 2.854034423828125, - "571": 2.8605992794036865, - "572": 2.8525283336639404, - "573": 2.8447864055633545, - "574": 2.842714548110962, - "575": 2.8467721939086914, - "576": 2.8557801246643066, - "577": 2.876159191131592, - "578": 2.861581325531006, - "579": 2.87233567237854, - "580": 2.8691887855529785, - "581": 2.877211093902588, - "582": 2.8669381141662598, - "583": 2.855844259262085, - "584": 2.8703207969665527, - "585": 2.8799691200256348, - "586": 2.867830276489258, - "587": 2.839306116104126, - "588": 2.8359737396240234, - "589": 2.8488521575927734, - "590": 2.8584985733032227, - "591": 2.851200819015503, - "592": 2.864673137664795, - "593": 2.8435373306274414 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "train_epoch_time": 4.7877092361450195, - "train_loss": 2.8496891605973755, - "train_score": 0.20287280311030328, - "val_loss": 2.872913730130814, - "val_score": 0.19633503174398578 - }, - { - "epoch": 11, - "grad_norm": 0.5239464044570923, - "learning_rate": 0.464, - "model_norm": 87.9194107055664, - "step_logs": { - "grad_norm": { - "594": 0.42257383465766907, - "595": 0.44964295625686646, - "596": 0.4130430817604065, - "597": 0.36789682507514954, - "598": 0.32851096987724304, - "599": 0.3788467049598694, - "600": 0.3752865195274353, - "601": 0.36830654740333557, - "602": 0.399733304977417, - "603": 0.48297855257987976, - "604": 0.4707321524620056, - "605": 0.4288770258426666, - "606": 0.35812294483184814, - "607": 0.33333203196525574, - "608": 0.3405986726284027, - "609": 0.3559945821762085, - "610": 0.35224106907844543, - "611": 0.3492664694786072, - "612": 0.35338935256004333, - "613": 0.357338011264801, - "614": 0.4009723663330078, - "615": 0.46710577607154846, - "616": 0.45859748125076294, - "617": 0.4375164806842804, - "618": 0.3868687152862549, - "619": 0.37531808018684387, - "620": 0.2936478853225708, - "621": 0.29129859805107117, - "622": 0.3046726882457733, - "623": 0.33380258083343506, - "624": 0.3612520396709442, - "625": 0.41978561878204346, - "626": 0.5552738904953003, - "627": 0.5705365538597107, - "628": 0.4846811592578888, - "629": 0.3962310254573822, - "630": 0.4062882959842682, - "631": 0.43522927165031433, - "632": 0.43066173791885376, - "633": 0.4276736080646515, - "634": 0.41746219992637634, - "635": 0.42552584409713745, - "636": 0.42190685868263245, - "637": 0.4003181457519531, - "638": 0.3897378742694855, - "639": 0.35833361744880676, - "640": 0.4152391850948334, - "641": 0.4509317874908447, - "642": 0.4552031457424164, - "643": 0.4360000491142273, - "644": 0.4611087441444397, - "645": 0.46938836574554443, - "646": 1.1099835634231567, - "647": 0.5239464044570923 - }, - "loss": { - "594": 2.8699967861175537, - "595": 2.854156494140625, - "596": 2.8469033241271973, - "597": 2.8562097549438477, - "598": 2.8361098766326904, - "599": 2.8483147621154785, - "600": 2.8401103019714355, - "601": 2.839268684387207, - "602": 2.840738296508789, - "603": 2.86029314994812, - "604": 2.831247091293335, - "605": 2.8455135822296143, - "606": 2.8481359481811523, - "607": 2.8126351833343506, - "608": 2.826653480529785, - "609": 2.8214635848999023, - "610": 2.8351025581359863, - "611": 2.8463010787963867, - "612": 2.8268990516662598, - "613": 2.8380680084228516, - "614": 2.848926305770874, - "615": 2.8463730812072754, - "616": 2.8471193313598633, - "617": 2.829418897628784, - "618": 2.8207077980041504, - "619": 2.832714080810547, - "620": 2.7979800701141357, - "621": 2.811311721801758, - "622": 2.8190622329711914, - "623": 2.8195295333862305, - "624": 2.8217153549194336, - "625": 2.811366558074951, - "626": 2.816267490386963, - "627": 2.853815793991089, - "628": 2.8188250064849854, - "629": 2.8165621757507324, - "630": 2.810520887374878, - "631": 2.834625005722046, - "632": 2.8090646266937256, - "633": 2.8065078258514404, - "634": 2.818776845932007, - "635": 2.830660343170166, - "636": 2.7966115474700928, - "637": 2.811253547668457, - "638": 2.7917771339416504, - "639": 2.803710460662842, - "640": 2.8038313388824463, - "641": 2.8246309757232666, - "642": 2.7982208728790283, - "643": 2.7998270988464355, - "644": 2.8005104064941406, - "645": 2.803518056869507, - "646": 2.7915449142456055, - "647": 2.925145149230957 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "train_epoch_time": 4.788415431976318, - "train_loss": 2.888531571994382, - "train_score": 0.20063329446777553, - "val_loss": 2.9095747722687046, - "val_score": 0.1982007033686687 - }, - { - "epoch": 12, - "grad_norm": 0.14469276368618011, - "learning_rate": 0.464, - "model_norm": 87.9462890625, - "step_logs": { - "grad_norm": { - "648": 0.5318052172660828, - "649": 0.6321051716804504, - "650": 0.42080065608024597, - "651": 0.4259883761405945, - "652": 0.4047864079475403, - "653": 0.39888811111450195, - "654": 0.4499017298221588, - "655": 0.5054547786712646, - "656": 0.5361528992652893, - "657": 0.5558052659034729, - "658": 0.5358248353004456, - "659": 0.4887659549713135, - "660": 0.485589861869812, - "661": 0.43086305260658264, - "662": 0.3795575797557831, - "663": 0.3434537649154663, - "664": 0.30627480149269104, - "665": 0.3067757189273834, - "666": 0.3441285490989685, - "667": 0.37962791323661804, - "668": 0.35083821415901184, - "669": 0.3440358340740204, - "670": 0.3448472321033478, - "671": 0.3453265130519867, - "672": 0.3232404291629791, - "673": 0.3418600857257843, - "674": 0.34718480706214905, - "675": 0.3254640996456146, - "676": 0.2992471754550934, - "677": 0.30947911739349365, - "678": 0.2990650236606598, - "679": 0.3076228201389313, - "680": 0.29717665910720825, - "681": 0.2664761245250702, - "682": 0.324785441160202, - "683": 0.6659073829650879, - "684": 0.5112767815589905, - "685": 0.3169627785682678, - "686": 0.23131200671195984, - "687": 0.21421672403812408, - "688": 0.19934771955013275, - "689": 0.20982535183429718, - "690": 0.2480393946170807, - "691": 0.2140820473432541, - "692": 0.1506846398115158, - "693": 0.1834387481212616, - "694": 0.22321200370788574, - "695": 0.18303877115249634, - "696": 0.18095025420188904, - "697": 0.18474681675434113, - "698": 0.1542045921087265, - "699": 0.18722376227378845, - "700": 0.12826938927173615, - "701": 0.14469276368618011 - }, - "loss": { - "648": 2.9101409912109375, - "649": 2.8857178688049316, - "650": 2.80180025100708, - "651": 2.823981285095215, - "652": 2.8009486198425293, - "653": 2.7925796508789062, - "654": 2.8104872703552246, - "655": 2.8123631477355957, - "656": 2.7992591857910156, - "657": 2.7989237308502197, - "658": 2.8018665313720703, - "659": 2.78963565826416, - "660": 2.7830281257629395, - "661": 2.802065134048462, - "662": 2.7985219955444336, - "663": 2.7972140312194824, - "664": 2.769890546798706, - "665": 2.7823495864868164, - "666": 2.770477771759033, - "667": 2.7753307819366455, - "668": 2.7468605041503906, - "669": 2.7742881774902344, - "670": 2.7920119762420654, - "671": 2.7548999786376953, - "672": 2.7518343925476074, - "673": 2.778820037841797, - "674": 2.762505292892456, - "675": 2.7465221881866455, - "676": 2.74069881439209, - "677": 2.7722177505493164, - "678": 2.7563436031341553, - "679": 2.740978240966797, - "680": 2.7344717979431152, - "681": 2.7557859420776367, - "682": 2.778360366821289, - "683": 2.753188133239746, - "684": 2.7890872955322266, - "685": 2.7625885009765625, - "686": 2.7399306297302246, - "687": 2.7669248580932617, - "688": 2.743842124938965, - "689": 2.727412223815918, - "690": 2.735049247741699, - "691": 2.7443788051605225, - "692": 2.73051118850708, - "693": 2.724133014678955, - "694": 2.7359414100646973, - "695": 2.74139404296875, - "696": 2.734097480773926, - "697": 2.73106050491333, - "698": 2.742445707321167, - "699": 2.7274019718170166, - "700": 2.7255194187164307, - "701": 2.7200241088867188 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "train_epoch_time": 4.788705825805664, - "train_loss": 2.7261262793794083, - "train_score": 0.25663782269232926, - "val_loss": 2.7493379597822645, - "val_score": 0.24852002010895655 - }, - { - "epoch": 13, - "grad_norm": 0.16334109008312225, - "learning_rate": 0.3093333333333334, - "model_norm": 87.9638900756836, - "step_logs": { - "grad_norm": { - "702": 0.13187536597251892, - "703": 0.129169762134552, - "704": 0.14398512244224548, - "705": 0.19418491423130035, - "706": 0.2328021377325058, - "707": 0.24253851175308228, - "708": 0.23169022798538208, - "709": 0.20916405320167542, - "710": 0.1637910008430481, - "711": 0.15132522583007812, - "712": 0.16344644129276276, - "713": 0.15693415701389313, - "714": 0.16148744523525238, - "715": 0.18595477938652039, - "716": 0.19046254456043243, - "717": 0.2558163106441498, - "718": 0.2811249792575836, - "719": 0.2495967596769333, - "720": 0.21979032456874847, - "721": 0.21898052096366882, - "722": 0.2105591744184494, - "723": 0.25884491205215454, - "724": 0.22478868067264557, - "725": 0.2866717278957367, - "726": 0.2695862948894501, - "727": 0.16498975455760956, - "728": 0.17787492275238037, - "729": 0.19540585577487946, - "730": 0.22890213131904602, - "731": 0.3541260063648224, - "732": 0.2720671594142914, - "733": 0.2354908436536789, - "734": 0.14638392627239227, - "735": 0.1424023061990738, - "736": 0.16258752346038818, - "737": 0.1467246413230896, - "738": 0.2034228891134262, - "739": 0.19281704723834991, - "740": 0.1750289648771286, - "741": 0.15293042361736298, - "742": 0.12029548734426498, - "743": 0.16493894159793854, - "744": 0.15983934700489044, - "745": 0.2205306738615036, - "746": 0.2871588468551636, - "747": 0.3217866122722626, - "748": 0.36361536383628845, - "749": 0.28642401099205017, - "750": 0.23048517107963562, - "751": 0.21606577932834625, - "752": 0.1832922399044037, - "753": 0.15014003217220306, - "754": 0.13805142045021057, - "755": 0.16334109008312225 - }, - "loss": { - "702": 2.7210288047790527, - "703": 2.71040678024292, - "704": 2.7128617763519287, - "705": 2.7106144428253174, - "706": 2.7419934272766113, - "707": 2.7281219959259033, - "708": 2.7229061126708984, - "709": 2.6910910606384277, - "710": 2.703174114227295, - "711": 2.7133238315582275, - "712": 2.715045928955078, - "713": 2.7020325660705566, - "714": 2.735429286956787, - "715": 2.7259769439697266, - "716": 2.6886775493621826, - "717": 2.718719005584717, - "718": 2.7142601013183594, - "719": 2.7110254764556885, - "720": 2.7338175773620605, - "721": 2.703158378601074, - "722": 2.707919120788574, - "723": 2.7115983963012695, - "724": 2.7157111167907715, - "725": 2.7386438846588135, - "726": 2.6857311725616455, - "727": 2.6828131675720215, - "728": 2.7332677841186523, - "729": 2.6948840618133545, - "730": 2.695504665374756, - "731": 2.7100348472595215, - "732": 2.718676805496216, - "733": 2.700397491455078, - "734": 2.7122879028320312, - "735": 2.7046704292297363, - "736": 2.6769018173217773, - "737": 2.698110580444336, - "738": 2.697983503341675, - "739": 2.695675849914551, - "740": 2.6761045455932617, - "741": 2.6895012855529785, - "742": 2.690903663635254, - "743": 2.6934428215026855, - "744": 2.6842222213745117, - "745": 2.692636013031006, - "746": 2.6920151710510254, - "747": 2.699789047241211, - "748": 2.6843533515930176, - "749": 2.696812152862549, - "750": 2.6809630393981934, - "751": 2.6825451850891113, - "752": 2.687816619873047, - "753": 2.6947576999664307, - "754": 2.684507131576538, - "755": 2.681462526321411 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "train_epoch_time": 4.791311264038086, - "train_loss": 2.684370730904972, - "train_score": 0.26502757361492774, - "val_loss": 2.7086549054330855, - "val_score": 0.2551844140897525 - }, - { - "epoch": 14, - "grad_norm": 0.16301603615283966, - "learning_rate": 0.1546666666666667, - "model_norm": 87.96991729736328, - "step_logs": { - "grad_norm": { - "756": 0.1665090024471283, - "757": 0.15137776732444763, - "758": 0.14224958419799805, - "759": 0.17822395265102386, - "760": 0.1344822645187378, - "761": 0.13008250296115875, - "762": 0.121334969997406, - "763": 0.12930189073085785, - "764": 0.13042472302913666, - "765": 0.13586494326591492, - "766": 0.15217404067516327, - "767": 0.18378371000289917, - "768": 0.21408049762248993, - "769": 0.18494872748851776, - "770": 0.13941864669322968, - "771": 0.15975496172904968, - "772": 0.18425214290618896, - "773": 0.15536095201969147, - "774": 0.1300903707742691, - "775": 0.13136355578899384, - "776": 0.1413564831018448, - "777": 0.13182124495506287, - "778": 0.15004104375839233, - "779": 0.14577293395996094, - "780": 0.15609191358089447, - "781": 0.18846388161182404, - "782": 0.12447446584701538, - "783": 0.18269097805023193, - "784": 0.16804993152618408, - "785": 0.13790851831436157, - "786": 0.14341942965984344, - "787": 0.13903860747814178, - "788": 0.13624010980129242, - "789": 0.18383239209651947, - "790": 0.17174266278743744, - "791": 0.14851796627044678, - "792": 0.14885656535625458, - "793": 0.13885612785816193, - "794": 0.1830388605594635, - "795": 0.12799538671970367, - "796": 0.1385277658700943, - "797": 0.11917927861213684, - "798": 0.14329096674919128, - "799": 0.1376391053199768, - "800": 0.14844174683094025, - "801": 0.14833678305149078, - "802": 0.13612188398838043, - "803": 0.14774993062019348, - "804": 0.143861785531044, - "805": 0.13204163312911987, - "806": 0.13631823658943176, - "807": 0.13291941583156586, - "808": 0.1458321511745453, - "809": 0.16301603615283966 - }, - "loss": { - "756": 2.672776937484741, - "757": 2.677155017852783, - "758": 2.6865949630737305, - "759": 2.688605785369873, - "760": 2.652066707611084, - "761": 2.6863296031951904, - "762": 2.6873316764831543, - "763": 2.674668312072754, - "764": 2.674224615097046, - "765": 2.6821889877319336, - "766": 2.683260917663574, - "767": 2.677043914794922, - "768": 2.678922414779663, - "769": 2.6896395683288574, - "770": 2.6841695308685303, - "771": 2.695735454559326, - "772": 2.6675703525543213, - "773": 2.6772587299346924, - "774": 2.6956119537353516, - "775": 2.6756885051727295, - "776": 2.699130058288574, - "777": 2.6842451095581055, - "778": 2.653803586959839, - "779": 2.67105770111084, - "780": 2.676814556121826, - "781": 2.675985097885132, - "782": 2.6740026473999023, - "783": 2.6857800483703613, - "784": 2.677624225616455, - "785": 2.672084331512451, - "786": 2.654049873352051, - "787": 2.6669039726257324, - "788": 2.6923561096191406, - "789": 2.6649057865142822, - "790": 2.6838855743408203, - "791": 2.6713709831237793, - "792": 2.687340259552002, - "793": 2.6601858139038086, - "794": 2.66515851020813, - "795": 2.6606554985046387, - "796": 2.665588855743408, - "797": 2.6693201065063477, - "798": 2.669257402420044, - "799": 2.6660897731781006, - "800": 2.6808218955993652, - "801": 2.680556297302246, - "802": 2.6595144271850586, - "803": 2.6731250286102295, - "804": 2.6824917793273926, - "805": 2.676051616668701, - "806": 2.647117853164673, - "807": 2.681386709213257, - "808": 2.67110013961792, - "809": 2.6987640857696533 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "train_epoch_time": 4.7905213832855225, - "train_loss": 2.671449064763752, - "train_score": 0.2684305506765073, - "val_loss": 2.6970659364378413, - "val_score": 0.25746717090995386 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:34:54.268286", - "final_model_norm": 87.96991729736328, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:33:13.426974", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 0.464, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": 1.255052924156189, - "learning_rate": 4.64e-11, - "model_norm": 87.92820739746094, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 6.244149684906006, - "3": 8.440503120422363, - "4": 16.79232406616211, - "5": 4.453291893005371, - "6": 3.6784656047821045, - "7": 3.89975905418396, - "8": 6.6434431076049805, - "9": 4.686246871948242, - "10": 4.644492149353027, - "11": 3.9039089679718018, - "12": 6.489428997039795, - "13": 4.205042362213135, - "14": 4.445206642150879, - "15": 24.46038246154785, - "16": 80.50111389160156, - "17": 4.711960792541504, - "18": 16.075946807861328, - "19": 12.762138366699219, - "20": 5.1727776527404785, - "21": 4.746818542480469, - "22": 6.861893653869629, - "23": 5.371772289276123, - "24": 5.298776626586914, - "25": 11.80709171295166, - "26": 9.1215181350708, - "27": 6.4599714279174805, - "28": 6.367079734802246, - "29": 5.568437099456787, - "30": 8.229534149169922, - "31": 14.150362014770508, - "32": 15.036437034606934, - "33": 8.959894180297852, - "34": 6.683808326721191, - "35": 4.862767219543457, - "36": 4.8636980056762695, - "37": 20.078683853149414, - "38": 3.1052157878875732, - "39": 2.5379903316497803, - "40": 1.894242763519287, - "41": 4.942265033721924, - "42": 13.566061973571777, - "43": 9.855283737182617, - "44": 5.184023380279541, - "45": 2.8199052810668945, - "46": 2.1188459396362305, - "47": 1.5920944213867188, - "48": 1.099333643913269, - "49": 1.2937748432159424, - "50": 1.3464598655700684, - "51": 1.9658997058868408, - "52": 3.244523286819458, - "53": 1.255052924156189 - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.7835984230041504, - "3": 3.8444766998291016, - "4": 4.425124645233154, - "5": 4.850791931152344, - "6": 4.1621551513671875, - "7": 3.6580116748809814, - "8": 3.812798500061035, - "9": 4.489406585693359, - "10": 3.7161145210266113, - "11": 3.8169572353363037, - "12": 4.414116859436035, - "13": 4.110918045043945, - "14": 4.620980262756348, - "15": 6.789256572723389, - "16": 5.334522247314453, - "17": 5.190731048583984, - "18": 5.546748161315918, - "19": 10.742549896240234, - "20": 8.087642669677734, - "21": 5.821972370147705, - "22": 5.938457489013672, - "23": 8.681151390075684, - "24": 5.574102401733398, - "25": 6.620218276977539, - "26": 13.632080078125, - "27": 11.717260360717773, - "28": 8.583717346191406, - "29": 6.647965908050537, - "30": 8.752342224121094, - "31": 7.99849796295166, - "32": 14.822242736816406, - "33": 20.757707595825195, - "34": 18.493423461914062, - "35": 14.1813325881958, - "36": 8.413106918334961, - "37": 10.946731567382812, - "38": 8.983861923217773, - "39": 6.206698894500732, - "40": 4.411967754364014, - "41": 4.126418113708496, - "42": 10.625564575195312, - "43": 10.824403762817383, - "44": 9.947172164916992, - "45": 10.309802055358887, - "46": 8.01710319519043, - "47": 6.207764148712158, - "48": 5.267107009887695, - "49": 4.831733226776123, - "50": 4.455212116241455, - "51": 3.931161642074585, - "52": 4.3249993324279785, - "53": 4.623747825622559 - }, - "lr": { - "0": 4.64e-11, - "1": 0.009280000045472001, - "2": 0.018560000044544, - "3": 0.027840000043616, - "4": 0.037120000042688, - "5": 0.04640000004176, - "6": 0.055680000040832, - "7": 0.064960000039904, - "8": 0.074240000038976, - "9": 0.083520000038048, - "10": 0.09280000003712001, - "11": 0.102080000036192, - "12": 0.111360000035264, - "13": 0.120640000034336, - "14": 0.129920000033408, - "15": 0.13920000003248, - "16": 0.148480000031552, - "17": 0.157760000030624, - "18": 0.16704000002969602, - "19": 0.17632000002876802, - "20": 0.18560000002784002, - "21": 0.194880000026912, - "22": 0.204160000025984, - "23": 0.21344000002505603, - "24": 0.222720000024128, - "25": 0.23200000002319995, - "26": 0.24128000002227198, - "27": 0.25056000002134404, - "28": 0.259840000020416, - "29": 0.269120000019488, - "30": 0.27840000001856, - "31": 0.28768000001763205, - "32": 0.29696000001670403, - "33": 0.306240000015776, - "34": 0.315520000014848, - "35": 0.32480000001392006, - "36": 0.33408000001299204, - "37": 0.343360000012064, - "38": 0.35264000001113605, - "39": 0.361920000010208, - "40": 0.37120000000928005, - "41": 0.380480000008352, - "42": 0.389760000007424, - "43": 0.39904000000649603, - "44": 0.408320000005568, - "45": 0.41760000000464, - "46": 0.42688000000371207, - "47": 0.43616000000278404, - "48": 0.445440000001856, - "49": 0.454720000000928, - "50": 0.464, - "51": 0.464, - "52": 0.464, - "53": 0.464 - } - }, - "train_epoch_time": 4.789364814758301, - "train_loss": 4.180719149984965, - "train_score": 0.1526105183055103, - "val_loss": 4.180160900755399, - "val_score": 0.15114182700861745 - }, - { - "epoch": 1, - "grad_norm": 0.7922757267951965, - "learning_rate": 0.464, - "model_norm": 87.93119812011719, - "step_logs": { - "grad_norm": { - "54": 1.3509052991867065, - "55": 1.072674036026001, - "56": 0.925609827041626, - "57": 0.8375693559646606, - "58": 1.6903800964355469, - "59": 1.0724360942840576, - "60": 0.7637794613838196, - "61": 1.409530520439148, - "62": 3.699445962905884, - "63": 1.0121699571609497, - "64": 0.9652178287506104, - "65": 0.9076206684112549, - "66": 0.7453747987747192, - "67": 0.7306337356567383, - "68": 1.1381064653396606, - "69": 0.9752975106239319, - "70": 0.6664982438087463, - "71": 0.7316480278968811, - "72": 1.0717217922210693, - "73": 0.8266065716743469, - "74": 0.20798030495643616, - "75": 0.24004042148590088, - "76": 0.3069019913673401, - "77": 0.3994874656200409, - "78": 0.6231363415718079, - "79": 0.6981455087661743, - "80": 0.9245103597640991, - "81": 0.9434074759483337, - "82": 0.9260461330413818, - "83": 0.8412745594978333, - "84": 0.6647123098373413, - "85": 0.7116034626960754, - "86": 0.9867504835128784, - "87": 0.8474131226539612, - "88": 0.5606356859207153, - "89": 0.998553454875946, - "90": 1.701401710510254, - "91": 1.2479462623596191, - "92": 0.8529561758041382, - "93": 0.4944674074649811, - "94": 0.49748218059539795, - "95": 0.593855619430542, - "96": 0.5744707584381104, - "97": 0.6321374773979187, - "98": 0.7070459127426147, - "99": 0.8582203388214111, - "100": 1.1194994449615479, - "101": 1.1126043796539307, - "102": 0.8445208668708801, - "103": 0.6512200832366943, - "104": 0.8280763626098633, - "105": 0.9169391393661499, - "106": 0.8577074408531189, - "107": 0.7922757267951965 - }, - "loss": { - "54": 4.181344985961914, - "55": 3.9017367362976074, - "56": 3.5386595726013184, - "57": 3.505521297454834, - "58": 3.5246646404266357, - "59": 3.856013298034668, - "60": 3.4496750831604004, - "61": 3.545161724090576, - "62": 3.93764066696167, - "63": 4.640109539031982, - "64": 4.22357702255249, - "65": 3.822941780090332, - "66": 3.55462646484375, - "67": 3.4582881927490234, - "68": 3.4612998962402344, - "69": 3.5940842628479004, - "70": 3.408729076385498, - "71": 3.454644203186035, - "72": 3.394103527069092, - "73": 3.568695068359375, - "74": 3.3560407161712646, - "75": 3.341549873352051, - "76": 3.3487112522125244, - "77": 3.3470993041992188, - "78": 3.36503267288208, - "79": 3.424377918243408, - "80": 3.398441791534424, - "81": 3.4952120780944824, - "82": 3.370410442352295, - "83": 3.4830679893493652, - "84": 3.373124599456787, - "85": 3.3951807022094727, - "86": 3.3787293434143066, - "87": 3.5009942054748535, - "88": 3.3334760665893555, - "89": 3.3573577404022217, - "90": 3.5122151374816895, - "91": 3.734975576400757, - "92": 3.4867894649505615, - "93": 3.3376197814941406, - "94": 3.3570609092712402, - "95": 3.3508875370025635, - "96": 3.3901641368865967, - "97": 3.3443620204925537, - "98": 3.3447489738464355, - "99": 3.3522868156433105, - "100": 3.426947832107544, - "101": 3.4931211471557617, - "102": 3.416281223297119, - "103": 3.3241302967071533, - "104": 3.3692924976348877, - "105": 3.385265588760376, - "106": 3.4028725624084473, - "107": 3.377155303955078 - }, - "lr": { - "54": 0.464, - "55": 0.464, - "56": 0.464, - "57": 0.464, - "58": 0.464, - "59": 0.464, - "60": 0.464, - "61": 0.464, - "62": 0.464, - "63": 0.464, - "64": 0.464, - "65": 0.464, - "66": 0.464, - "67": 0.464, - "68": 0.464, - "69": 0.464, - "70": 0.464, - "71": 0.464, - "72": 0.464, - "73": 0.464, - "74": 0.464, - "75": 0.464, - "76": 0.464, - "77": 0.464, - "78": 0.464, - "79": 0.464, - "80": 0.464, - "81": 0.464, - "82": 0.464, - "83": 0.464, - "84": 0.464, - "85": 0.464, - "86": 0.464, - "87": 0.464, - "88": 0.464, - "89": 0.464, - "90": 0.464, - "91": 0.464, - "92": 0.464, - "93": 0.464, - "94": 0.464, - "95": 0.464, - "96": 0.464, - "97": 0.464, - "98": 0.464, - "99": 0.464, - "100": 0.464, - "101": 0.464, - "102": 0.464, - "103": 0.464, - "104": 0.464, - "105": 0.464, - "106": 0.464, - "107": 0.464 - } - }, - "train_epoch_time": 4.787489175796509, - "train_loss": 3.3512276984697777, - "train_score": 0.16325546989225417, - "val_loss": 3.3691477931599665, - "val_score": 0.16154653442987207 - }, - { - "epoch": 2, - "grad_norm": 0.6837993264198303, - "learning_rate": 0.464, - "model_norm": 87.95731353759766, - "step_logs": { - "grad_norm": { - "108": 0.7729028463363647, - "109": 0.8577842712402344, - "110": 0.8788442015647888, - "111": 0.7919557094573975, - "112": 0.7402887940406799, - "113": 0.8491641283035278, - "114": 0.8003381490707397, - "115": 0.6720640659332275, - "116": 0.6806644201278687, - "117": 0.7202664613723755, - "118": 0.6973781585693359, - "119": 0.710938572883606, - "120": 0.7061183452606201, - "121": 0.6684529185295105, - "122": 0.5951650738716125, - "123": 0.5564042925834656, - "124": 0.5777530670166016, - "125": 0.603927731513977, - "126": 0.6163882613182068, - "127": 0.6118666529655457, - "128": 0.5854691863059998, - "129": 0.6328118443489075, - "130": 0.656987726688385, - "131": 0.6404881477355957, - "132": 0.6307449340820312, - "133": 0.6298400163650513, - "134": 0.6094688177108765, - "135": 0.5367652177810669, - "136": 0.5335031747817993, - "137": 0.4940011203289032, - "138": 0.4786943793296814, - "139": 0.5247909426689148, - "140": 0.5546916127204895, - "141": 0.6646736264228821, - "142": 0.6263370513916016, - "143": 0.5697901844978333, - "144": 0.5052744150161743, - "145": 0.4368767738342285, - "146": 0.4330008924007416, - "147": 0.37531915307044983, - "148": 0.39945143461227417, - "149": 0.4583311975002289, - "150": 0.4343625605106354, - "151": 0.42306220531463623, - "152": 0.4210350215435028, - "153": 0.48869818449020386, - "154": 0.5197953581809998, - "155": 0.5794380307197571, - "156": 0.534491240978241, - "157": 0.47136905789375305, - "158": 0.5203049182891846, - "159": 0.6415740847587585, - "160": 0.6665178537368774, - "161": 0.6837993264198303 - }, - "loss": { - "108": 3.343027353286743, - "109": 3.3579330444335938, - "110": 3.394780158996582, - "111": 3.352787971496582, - "112": 3.332044839859009, - "113": 3.349801540374756, - "114": 3.3971102237701416, - "115": 3.322275400161743, - "116": 3.332723379135132, - "117": 3.3307383060455322, - "118": 3.32688570022583, - "119": 3.3205206394195557, - "120": 3.341777801513672, - "121": 3.3131961822509766, - "122": 3.339186429977417, - "123": 3.288053512573242, - "124": 3.3153223991394043, - "125": 3.3331809043884277, - "126": 3.3103647232055664, - "127": 3.2892110347747803, - "128": 3.318519115447998, - "129": 3.31482195854187, - "130": 3.3041484355926514, - "131": 3.284627914428711, - "132": 3.2961630821228027, - "133": 3.2825570106506348, - "134": 3.298042058944702, - "135": 3.2494163513183594, - "136": 3.2979965209960938, - "137": 3.2948079109191895, - "138": 3.271838665008545, - "139": 3.283370018005371, - "140": 3.2520718574523926, - "141": 3.240216016769409, - "142": 3.2973480224609375, - "143": 3.2993502616882324, - "144": 3.2675976753234863, - "145": 3.252697229385376, - "146": 3.255619525909424, - "147": 3.23370099067688, - "148": 3.2278897762298584, - "149": 3.235509157180786, - "150": 3.2723007202148438, - "151": 3.2624588012695312, - "152": 3.2469358444213867, - "153": 3.2500171661376953, - "154": 3.268277645111084, - "155": 3.2700719833374023, - "156": 3.280336380004883, - "157": 3.257960319519043, - "158": 3.2380642890930176, - "159": 3.2370452880859375, - "160": 3.3121280670166016, - "161": 3.239056348800659 - }, - "lr": { - "108": 0.464, - "109": 0.464, - "110": 0.464, - "111": 0.464, - "112": 0.464, - "113": 0.464, - "114": 0.464, - "115": 0.464, - "116": 0.464, - "117": 0.464, - "118": 0.464, - "119": 0.464, - "120": 0.464, - "121": 0.464, - "122": 0.464, - "123": 0.464, - "124": 0.464, - "125": 0.464, - "126": 0.464, - "127": 0.464, - "128": 0.464, - "129": 0.464, - "130": 0.464, - "131": 0.464, - "132": 0.464, - "133": 0.464, - "134": 0.464, - "135": 0.464, - "136": 0.464, - "137": 0.464, - "138": 0.464, - "139": 0.464, - "140": 0.464, - "141": 0.464, - "142": 0.464, - "143": 0.464, - "144": 0.464, - "145": 0.464, - "146": 0.464, - "147": 0.464, - "148": 0.464, - "149": 0.464, - "150": 0.464, - "151": 0.464, - "152": 0.464, - "153": 0.464, - "154": 0.464, - "155": 0.464, - "156": 0.464, - "157": 0.464, - "158": 0.464, - "159": 0.464, - "160": 0.464, - "161": 0.464 - } - }, - "train_epoch_time": 4.787842035293579, - "train_loss": 3.2856115087375066, - "train_score": 0.12368633428981349, - "val_loss": 3.3047467003459907, - "val_score": 0.11694980645391889 - }, - { - "epoch": 3, - "grad_norm": 0.6413682699203491, - "learning_rate": 0.464, - "model_norm": 87.99307250976562, - "step_logs": { - "grad_norm": { - "162": 0.6811285018920898, - "163": 0.6921555995941162, - "164": 0.6935116052627563, - "165": 0.6758252382278442, - "166": 0.6695922613143921, - "167": 0.6933759450912476, - "168": 0.6095072031021118, - "169": 0.47101956605911255, - "170": 0.43185028433799744, - "171": 0.40144869685173035, - "172": 0.4124176800251007, - "173": 0.39040708541870117, - "174": 0.3865910470485687, - "175": 0.3941298723220825, - "176": 0.39663565158843994, - "177": 0.45247504115104675, - "178": 0.4442135691642761, - "179": 0.38476887345314026, - "180": 0.3770773708820343, - "181": 0.4056793749332428, - "182": 0.41073769330978394, - "183": 0.41719454526901245, - "184": 0.4165584146976471, - "185": 0.3881615698337555, - "186": 0.34230518341064453, - "187": 0.30825161933898926, - "188": 0.29052141308784485, - "189": 0.30808180570602417, - "190": 0.2942551374435425, - "191": 0.2962765395641327, - "192": 0.3311639726161957, - "193": 0.4142826497554779, - "194": 0.44628313183784485, - "195": 0.5034071207046509, - "196": 0.5449778437614441, - "197": 0.5193648934364319, - "198": 0.4610893726348877, - "199": 0.3979952931404114, - "200": 0.39102810621261597, - "201": 0.43607431650161743, - "202": 0.42258667945861816, - "203": 0.42442119121551514, - "204": 0.4446522891521454, - "205": 0.47429367899894714, - "206": 0.4473903179168701, - "207": 0.46548813581466675, - "208": 0.4778720736503601, - "209": 0.48176315426826477, - "210": 0.4720529615879059, - "211": 0.4752734899520874, - "212": 0.5729631185531616, - "213": 0.6581975221633911, - "214": 0.6999406814575195, - "215": 0.6413682699203491 - }, - "loss": { - "162": 3.2915611267089844, - "163": 3.259284257888794, - "164": 3.2547430992126465, - "165": 3.2619223594665527, - "166": 3.270458221435547, - "167": 3.2979471683502197, - "168": 3.280670166015625, - "169": 3.229694366455078, - "170": 3.2294466495513916, - "171": 3.2429614067077637, - "172": 3.2495737075805664, - "173": 3.172105312347412, - "174": 3.226919174194336, - "175": 3.1982710361480713, - "176": 3.193342924118042, - "177": 3.2205593585968018, - "178": 3.224392890930176, - "179": 3.1983819007873535, - "180": 3.2073307037353516, - "181": 3.2270095348358154, - "182": 3.199389934539795, - "183": 3.1954102516174316, - "184": 3.1985745429992676, - "185": 3.174147605895996, - "186": 3.2371246814727783, - "187": 3.188760280609131, - "188": 3.198084831237793, - "189": 3.1940371990203857, - "190": 3.1846954822540283, - "191": 3.1881535053253174, - "192": 3.1859548091888428, - "193": 3.209023952484131, - "194": 3.2014236450195312, - "195": 3.2187161445617676, - "196": 3.191277027130127, - "197": 3.192574977874756, - "198": 3.2317447662353516, - "199": 3.1665432453155518, - "200": 3.1838431358337402, - "201": 3.1726231575012207, - "202": 3.17507266998291, - "203": 3.195721387863159, - "204": 3.168273448944092, - "205": 3.1400740146636963, - "206": 3.188877582550049, - "207": 3.180706739425659, - "208": 3.1650032997131348, - "209": 3.1470534801483154, - "210": 3.189568042755127, - "211": 3.1283812522888184, - "212": 3.1814656257629395, - "213": 3.168051242828369, - "214": 3.2089481353759766, - "215": 3.174100399017334 - }, - "lr": { - "162": 0.464, - "163": 0.464, - "164": 0.464, - "165": 0.464, - "166": 0.464, - "167": 0.464, - "168": 0.464, - "169": 0.464, - "170": 0.464, - "171": 0.464, - "172": 0.464, - "173": 0.464, - "174": 0.464, - "175": 0.464, - "176": 0.464, - "177": 0.464, - "178": 0.464, - "179": 0.464, - "180": 0.464, - "181": 0.464, - "182": 0.464, - "183": 0.464, - "184": 0.464, - "185": 0.464, - "186": 0.464, - "187": 0.464, - "188": 0.464, - "189": 0.464, - "190": 0.464, - "191": 0.464, - "192": 0.464, - "193": 0.464, - "194": 0.464, - "195": 0.464, - "196": 0.464, - "197": 0.464, - "198": 0.464, - "199": 0.464, - "200": 0.464, - "201": 0.464, - "202": 0.464, - "203": 0.464, - "204": 0.464, - "205": 0.464, - "206": 0.464, - "207": 0.464, - "208": 0.464, - "209": 0.464, - "210": 0.464, - "211": 0.464, - "212": 0.464, - "213": 0.464, - "214": 0.464, - "215": 0.464 - } - }, - "train_epoch_time": 4.787900686264038, - "train_loss": 3.1774323951223145, - "train_score": 0.17367624640037202, - "val_loss": 3.19929821072435, - "val_score": 0.1669731274777521 - }, - { - "epoch": 4, - "grad_norm": 0.3717726171016693, - "learning_rate": 0.464, - "model_norm": 88.0340347290039, - "step_logs": { - "grad_norm": { - "216": 0.5664742588996887, - "217": 0.5571743249893188, - "218": 0.5474212169647217, - "219": 0.5158417820930481, - "220": 0.47308504581451416, - "221": 0.4393671452999115, - "222": 0.47447481751441956, - "223": 0.5323047041893005, - "224": 0.5777488350868225, - "225": 0.5992069244384766, - "226": 0.5938617587089539, - "227": 0.5525433421134949, - "228": 0.5450514554977417, - "229": 0.5197822451591492, - "230": 0.4942905604839325, - "231": 1.0696790218353271, - "232": 0.5042442083358765, - "233": 0.5214129686355591, - "234": 0.5233376622200012, - "235": 0.6291642785072327, - "236": 0.5193829536437988, - "237": 0.5011067986488342, - "238": 0.45605939626693726, - "239": 0.4285101294517517, - "240": 0.4953652322292328, - "241": 0.47314420342445374, - "242": 0.49985113739967346, - "243": 0.49697956442832947, - "244": 0.5121140480041504, - "245": 0.5653178691864014, - "246": 0.6336447596549988, - "247": 0.6101993918418884, - "248": 0.5862594246864319, - "249": 0.6142231225967407, - "250": 0.5897337794303894, - "251": 0.554370641708374, - "252": 0.5044097304344177, - "253": 0.4481051564216614, - "254": 0.43696653842926025, - "255": 0.48997175693511963, - "256": 0.5274620652198792, - "257": 0.5600911974906921, - "258": 0.5656968951225281, - "259": 0.538122832775116, - "260": 0.49221640825271606, - "261": 0.46678784489631653, - "262": 0.48531803488731384, - "263": 0.5195136666297913, - "264": 0.5314480662345886, - "265": 0.49032315611839294, - "266": 0.4635796844959259, - "267": 0.422687292098999, - "268": 0.3832295536994934, - "269": 0.3717726171016693 - }, - "loss": { - "216": 3.187713146209717, - "217": 3.1606132984161377, - "218": 3.1345620155334473, - "219": 3.1614551544189453, - "220": 3.1735520362854004, - "221": 3.1460280418395996, - "222": 3.1611826419830322, - "223": 3.1580722332000732, - "224": 3.1597862243652344, - "225": 3.128140687942505, - "226": 3.1753532886505127, - "227": 3.1484668254852295, - "228": 3.157589912414551, - "229": 3.124562978744507, - "230": 3.1389760971069336, - "231": 3.128298759460449, - "232": 3.1525557041168213, - "233": 3.1237292289733887, - "234": 3.1393890380859375, - "235": 3.1168925762176514, - "236": 3.144312858581543, - "237": 3.084429979324341, - "238": 3.133145570755005, - "239": 3.089230537414551, - "240": 3.1452391147613525, - "241": 3.107693672180176, - "242": 3.1169419288635254, - "243": 3.1070642471313477, - "244": 3.104316234588623, - "245": 3.1013269424438477, - "246": 3.135496139526367, - "247": 3.1035571098327637, - "248": 3.141105890274048, - "249": 3.138065814971924, - "250": 3.112060070037842, - "251": 3.082488536834717, - "252": 3.1439249515533447, - "253": 3.050626277923584, - "254": 3.086163282394409, - "255": 3.091043472290039, - "256": 3.107741594314575, - "257": 3.0875916481018066, - "258": 3.0926926136016846, - "259": 3.0839829444885254, - "260": 3.0700325965881348, - "261": 3.055593490600586, - "262": 3.070647716522217, - "263": 3.0679359436035156, - "264": 3.106372833251953, - "265": 3.05061674118042, - "266": 3.0802764892578125, - "267": 3.068589448928833, - "268": 3.0574254989624023, - "269": 3.0175788402557373 - }, - "lr": { - "216": 0.464, - "217": 0.464, - "218": 0.464, - "219": 0.464, - "220": 0.464, - "221": 0.464, - "222": 0.464, - "223": 0.464, - "224": 0.464, - "225": 0.464, - "226": 0.464, - "227": 0.464, - "228": 0.464, - "229": 0.464, - "230": 0.464, - "231": 0.464, - "232": 0.464, - "233": 0.464, - "234": 0.464, - "235": 0.464, - "236": 0.464, - "237": 0.464, - "238": 0.464, - "239": 0.464, - "240": 0.464, - "241": 0.464, - "242": 0.464, - "243": 0.464, - "244": 0.464, - "245": 0.464, - "246": 0.464, - "247": 0.464, - "248": 0.464, - "249": 0.464, - "250": 0.464, - "251": 0.464, - "252": 0.464, - "253": 0.464, - "254": 0.464, - "255": 0.464, - "256": 0.464, - "257": 0.464, - "258": 0.464, - "259": 0.464, - "260": 0.464, - "261": 0.464, - "262": 0.464, - "263": 0.464, - "264": 0.464, - "265": 0.464, - "266": 0.464, - "267": 0.464, - "268": 0.464, - "269": 0.464 - } - }, - "train_epoch_time": 4.788339138031006, - "train_loss": 3.032273162830851, - "train_score": 0.2070626344879183, - "val_loss": 3.0533037478559737, - "val_score": 0.20032200812609133 - }, - { - "epoch": 5, - "grad_norm": 0.5066221356391907, - "learning_rate": 0.464, - "model_norm": 88.08234405517578, - "step_logs": { - "grad_norm": { - "270": 0.3661964535713196, - "271": 0.33545616269111633, - "272": 0.32537969946861267, - "273": 0.42328375577926636, - "274": 0.469877153635025, - "275": 0.4564831256866455, - "276": 0.4466836750507355, - "277": 0.4794429540634155, - "278": 0.5175257325172424, - "279": 0.7766371965408325, - "280": 0.6751675605773926, - "281": 0.5785229206085205, - "282": 0.5068453550338745, - "283": 0.4388604462146759, - "284": 0.3626843988895416, - "285": 0.34235700964927673, - "286": 0.2892029285430908, - "287": 0.30587390065193176, - "288": 0.48994946479797363, - "289": 0.9075611233711243, - "290": 0.6866692900657654, - "291": 0.49183326959609985, - "292": 0.4632605016231537, - "293": 0.4485301673412323, - "294": 0.4725918769836426, - "295": 0.5011042952537537, - "296": 0.4562439024448395, - "297": 0.4210890233516693, - "298": 0.4249041676521301, - "299": 0.4469127357006073, - "300": 0.4342898428440094, - "301": 0.41566258668899536, - "302": 0.38698825240135193, - "303": 0.4272957444190979, - "304": 0.42278552055358887, - "305": 0.7098525166511536, - "306": 0.481454461812973, - "307": 0.4574628174304962, - "308": 0.37931933999061584, - "309": 0.41069918870925903, - "310": 0.4405274987220764, - "311": 0.46115586161613464, - "312": 0.4353087842464447, - "313": 0.45827800035476685, - "314": 0.5010109543800354, - "315": 0.5244488716125488, - "316": 0.4856067895889282, - "317": 0.5176835656166077, - "318": 0.5285840034484863, - "319": 0.5231578946113586, - "320": 0.4556329548358917, - "321": 0.43136730790138245, - "322": 0.49187132716178894, - "323": 0.5066221356391907 - }, - "loss": { - "270": 3.0431125164031982, - "271": 3.036886692047119, - "272": 3.011997699737549, - "273": 3.0410447120666504, - "274": 3.037245035171509, - "275": 3.03704571723938, - "276": 2.9901628494262695, - "277": 3.015126943588257, - "278": 3.0377092361450195, - "279": 3.031620979309082, - "280": 3.011617660522461, - "281": 2.985450506210327, - "282": 3.0058674812316895, - "283": 2.9588656425476074, - "284": 2.955876350402832, - "285": 2.942842721939087, - "286": 2.9260411262512207, - "287": 2.9128530025482178, - "288": 2.94216251373291, - "289": 2.9741129875183105, - "290": 3.0020530223846436, - "291": 2.945091962814331, - "292": 2.9349443912506104, - "293": 2.9089603424072266, - "294": 2.942735433578491, - "295": 2.9312779903411865, - "296": 2.9092414379119873, - "297": 2.9327523708343506, - "298": 2.9214487075805664, - "299": 2.898831605911255, - "300": 2.896671772003174, - "301": 2.909642457962036, - "302": 2.9002127647399902, - "303": 2.8919129371643066, - "304": 2.883169651031494, - "305": 2.9125776290893555, - "306": 2.9245355129241943, - "307": 2.8759021759033203, - "308": 2.8774468898773193, - "309": 2.8868656158447266, - "310": 2.8862600326538086, - "311": 2.8969311714172363, - "312": 2.866103410720825, - "313": 2.8573989868164062, - "314": 2.886366367340088, - "315": 2.868023633956909, - "316": 2.840973377227783, - "317": 2.875199794769287, - "318": 2.864121437072754, - "319": 2.853104591369629, - "320": 2.8590290546417236, - "321": 2.8588643074035645, - "322": 2.8435912132263184, - "323": 2.85921573638916 - }, - "lr": { - "270": 0.464, - "271": 0.464, - "272": 0.464, - "273": 0.464, - "274": 0.464, - "275": 0.464, - "276": 0.464, - "277": 0.464, - "278": 0.464, - "279": 0.464, - "280": 0.464, - "281": 0.464, - "282": 0.464, - "283": 0.464, - "284": 0.464, - "285": 0.464, - "286": 0.464, - "287": 0.464, - "288": 0.464, - "289": 0.464, - "290": 0.464, - "291": 0.464, - "292": 0.464, - "293": 0.464, - "294": 0.464, - "295": 0.464, - "296": 0.464, - "297": 0.464, - "298": 0.464, - "299": 0.464, - "300": 0.464, - "301": 0.464, - "302": 0.464, - "303": 0.464, - "304": 0.464, - "305": 0.464, - "306": 0.464, - "307": 0.464, - "308": 0.464, - "309": 0.464, - "310": 0.464, - "311": 0.464, - "312": 0.464, - "313": 0.464, - "314": 0.464, - "315": 0.464, - "316": 0.464, - "317": 0.464, - "318": 0.464, - "319": 0.464, - "320": 0.464, - "321": 0.464, - "322": 0.464, - "323": 0.464 - } - }, - "train_epoch_time": 4.788949251174927, - "train_loss": 2.854194995012653, - "train_score": 0.23856595222713958, - "val_loss": 2.885586470331308, - "val_score": 0.23270217408816385 - }, - { - "epoch": 6, - "grad_norm": 0.3864153325557709, - "learning_rate": 0.464, - "model_norm": 88.11467742919922, - "step_logs": { - "grad_norm": { - "324": 0.5820532441139221, - "325": 0.5842010378837585, - "326": 0.5383394360542297, - "327": 0.5198785662651062, - "328": 0.4956585466861725, - "329": 0.4951489567756653, - "330": 0.45185908675193787, - "331": 0.47827890515327454, - "332": 0.38754040002822876, - "333": 0.323329359292984, - "334": 0.31251347064971924, - "335": 0.327478289604187, - "336": 0.32662010192871094, - "337": 0.322544127702713, - "338": 0.27384263277053833, - "339": 0.32691052556037903, - "340": 0.47355544567108154, - "341": 0.519230306148529, - "342": 0.518526017665863, - "343": 0.47064462304115295, - "344": 0.4253055155277252, - "345": 0.4822605550289154, - "346": 0.49681612849235535, - "347": 0.5098667740821838, - "348": 0.5298381447792053, - "349": 0.5110343098640442, - "350": 0.45480433106422424, - "351": 0.47317343950271606, - "352": 0.48920899629592896, - "353": 0.4198671877384186, - "354": 0.36844274401664734, - "355": 0.3354429006576538, - "356": 0.344651460647583, - "357": 0.4050646424293518, - "358": 0.43140238523483276, - "359": 0.4874323606491089, - "360": 0.5904595851898193, - "361": 0.4948265254497528, - "362": 0.35415011644363403, - "363": 0.23451873660087585, - "364": 0.23185992240905762, - "365": 0.24718642234802246, - "366": 0.30144110321998596, - "367": 0.28273436427116394, - "368": 0.32693031430244446, - "369": 0.4312284290790558, - "370": 0.722140908241272, - "371": 1.135475516319275, - "372": 1.2115484476089478, - "373": 0.6165918707847595, - "374": 0.42361322045326233, - "375": 0.39272767305374146, - "376": 0.3820077180862427, - "377": 0.3864153325557709 - }, - "loss": { - "324": 2.875011444091797, - "325": 2.8820018768310547, - "326": 2.851672887802124, - "327": 2.847370147705078, - "328": 2.862888813018799, - "329": 2.8270115852355957, - "330": 2.8452157974243164, - "331": 2.8523366451263428, - "332": 2.8111085891723633, - "333": 2.8223836421966553, - "334": 2.794248104095459, - "335": 2.8467233180999756, - "336": 2.8062939643859863, - "337": 2.8120603561401367, - "338": 2.831743001937866, - "339": 2.810534715652466, - "340": 2.8184683322906494, - "341": 2.8421192169189453, - "342": 2.80037784576416, - "343": 2.8311898708343506, - "344": 2.8118858337402344, - "345": 2.8088016510009766, - "346": 2.8150391578674316, - "347": 2.8277053833007812, - "348": 2.8275394439697266, - "349": 2.820631504058838, - "350": 2.7985544204711914, - "351": 2.8206357955932617, - "352": 2.8019726276397705, - "353": 2.8063628673553467, - "354": 2.771907091140747, - "355": 2.768950939178467, - "356": 2.7780933380126953, - "357": 2.776686429977417, - "358": 2.786099910736084, - "359": 2.7869672775268555, - "360": 2.786921977996826, - "361": 2.798215627670288, - "362": 2.769136905670166, - "363": 2.783421039581299, - "364": 2.7585668563842773, - "365": 2.767833709716797, - "366": 2.722777843475342, - "367": 2.769777774810791, - "368": 2.7783427238464355, - "369": 2.7714133262634277, - "370": 2.800468921661377, - "371": 2.8357183933258057, - "372": 2.8647027015686035, - "373": 2.853703260421753, - "374": 2.783780097961426, - "375": 2.7631924152374268, - "376": 2.778256416320801, - "377": 2.7554824352264404 - }, - "lr": { - "324": 0.464, - "325": 0.464, - "326": 0.464, - "327": 0.464, - "328": 0.464, - "329": 0.464, - "330": 0.464, - "331": 0.464, - "332": 0.464, - "333": 0.464, - "334": 0.464, - "335": 0.464, - "336": 0.464, - "337": 0.464, - "338": 0.464, - "339": 0.464, - "340": 0.464, - "341": 0.464, - "342": 0.464, - "343": 0.464, - "344": 0.464, - "345": 0.464, - "346": 0.464, - "347": 0.464, - "348": 0.464, - "349": 0.464, - "350": 0.464, - "351": 0.464, - "352": 0.464, - "353": 0.464, - "354": 0.464, - "355": 0.464, - "356": 0.464, - "357": 0.464, - "358": 0.464, - "359": 0.464, - "360": 0.464, - "361": 0.464, - "362": 0.464, - "363": 0.464, - "364": 0.464, - "365": 0.464, - "366": 0.464, - "367": 0.464, - "368": 0.464, - "369": 0.464, - "370": 0.464, - "371": 0.464, - "372": 0.464, - "373": 0.464, - "374": 0.464, - "375": 0.464, - "376": 0.464, - "377": 0.464 - } - }, - "train_epoch_time": 4.789652109146118, - "train_loss": 2.763375854286949, - "train_score": 0.23777349358651695, - "val_loss": 2.785479051511132, - "val_score": 0.23190388184395777 - }, - { - "epoch": 7, - "grad_norm": 0.5307855606079102, - "learning_rate": 0.464, - "model_norm": 88.15125274658203, - "step_logs": { - "grad_norm": { - "378": 0.41669026017189026, - "379": 0.3636738657951355, - "380": 0.36276158690452576, - "381": 0.3732846677303314, - "382": 0.38989880681037903, - "383": 0.380859375, - "384": 0.4410206377506256, - "385": 0.48034048080444336, - "386": 0.49346110224723816, - "387": 0.5446798205375671, - "388": 0.5968800187110901, - "389": 0.574354887008667, - "390": 0.5114524364471436, - "391": 0.4674214720726013, - "392": 0.4792308509349823, - "393": 0.46935907006263733, - "394": 0.48663243651390076, - "395": 0.5008202791213989, - "396": 0.5456418395042419, - "397": 0.49052104353904724, - "398": 0.5148676633834839, - "399": 0.5309667587280273, - "400": 0.5169989466667175, - "401": 0.4969700574874878, - "402": 0.5844029784202576, - "403": 0.5727942585945129, - "404": 0.5252848267555237, - "405": 0.5402370095252991, - "406": 0.5842947959899902, - "407": 0.5709476470947266, - "408": 0.5912723541259766, - "409": 0.5950878262519836, - "410": 0.5892427563667297, - "411": 0.5410339832305908, - "412": 0.5276714563369751, - "413": 0.5643038153648376, - "414": 0.5468411445617676, - "415": 0.4935785233974457, - "416": 0.46643027663230896, - "417": 0.44591641426086426, - "418": 0.4631032943725586, - "419": 0.43627774715423584, - "420": 0.4355587959289551, - "421": 0.44590431451797485, - "422": 0.5502045750617981, - "423": 0.5576859712600708, - "424": 0.4811934530735016, - "425": 0.4225294888019562, - "426": 0.4164482355117798, - "427": 0.4014143645763397, - "428": 0.3887057900428772, - "429": 0.41492924094200134, - "430": 0.455078661441803, - "431": 0.5307855606079102 - }, - "loss": { - "378": 2.7654988765716553, - "379": 2.7577126026153564, - "380": 2.75124192237854, - "381": 2.737492561340332, - "382": 2.7479352951049805, - "383": 2.7489991188049316, - "384": 2.7637739181518555, - "385": 2.751579523086548, - "386": 2.777341604232788, - "387": 2.765752077102661, - "388": 2.770010471343994, - "389": 2.751984119415283, - "390": 2.784501552581787, - "391": 2.7589409351348877, - "392": 2.743619441986084, - "393": 2.7433459758758545, - "394": 2.733316421508789, - "395": 2.7305846214294434, - "396": 2.7404184341430664, - "397": 2.7314116954803467, - "398": 2.744497060775757, - "399": 2.7438461780548096, - "400": 2.730891227722168, - "401": 2.7344067096710205, - "402": 2.752774953842163, - "403": 2.746870279312134, - "404": 2.7604639530181885, - "405": 2.7230732440948486, - "406": 2.726456642150879, - "407": 2.7299156188964844, - "408": 2.748684883117676, - "409": 2.7507944107055664, - "410": 2.7420618534088135, - "411": 2.7365851402282715, - "412": 2.747551918029785, - "413": 2.746805191040039, - "414": 2.72627329826355, - "415": 2.7124593257904053, - "416": 2.717761278152466, - "417": 2.7175302505493164, - "418": 2.7076478004455566, - "419": 2.7017266750335693, - "420": 2.7102725505828857, - "421": 2.6964612007141113, - "422": 2.721733570098877, - "423": 2.725191593170166, - "424": 2.705355644226074, - "425": 2.6846797466278076, - "426": 2.6848511695861816, - "427": 2.692171096801758, - "428": 2.678511142730713, - "429": 2.685178279876709, - "430": 2.689267635345459, - "431": 2.7103657722473145 - }, - "lr": { - "378": 0.464, - "379": 0.464, - "380": 0.464, - "381": 0.464, - "382": 0.464, - "383": 0.464, - "384": 0.464, - "385": 0.464, - "386": 0.464, - "387": 0.464, - "388": 0.464, - "389": 0.464, - "390": 0.464, - "391": 0.464, - "392": 0.464, - "393": 0.464, - "394": 0.464, - "395": 0.464, - "396": 0.464, - "397": 0.464, - "398": 0.464, - "399": 0.464, - "400": 0.464, - "401": 0.464, - "402": 0.464, - "403": 0.464, - "404": 0.464, - "405": 0.464, - "406": 0.464, - "407": 0.464, - "408": 0.464, - "409": 0.464, - "410": 0.464, - "411": 0.464, - "412": 0.464, - "413": 0.464, - "414": 0.464, - "415": 0.464, - "416": 0.464, - "417": 0.464, - "418": 0.464, - "419": 0.464, - "420": 0.464, - "421": 0.464, - "422": 0.464, - "423": 0.464, - "424": 0.464, - "425": 0.464, - "426": 0.464, - "427": 0.464, - "428": 0.464, - "429": 0.464, - "430": 0.464, - "431": 0.464 - } - }, - "train_epoch_time": 4.790506362915039, - "train_loss": 2.7078548082491247, - "train_score": 0.23428196730025358, - "val_loss": 2.7219450424514053, - "val_score": 0.2261454150097789 - }, - { - "epoch": 8, - "grad_norm": 0.5217339992523193, - "learning_rate": 0.464, - "model_norm": 88.1882095336914, - "step_logs": { - "grad_norm": { - "432": 0.5504457354545593, - "433": 0.5215202569961548, - "434": 0.5113682150840759, - "435": 0.5124291777610779, - "436": 0.4730629622936249, - "437": 0.3750542104244232, - "438": 0.399244487285614, - "439": 0.49564921855926514, - "440": 0.4806249737739563, - "441": 0.5737437605857849, - "442": 0.6060463786125183, - "443": 0.4971201419830322, - "444": 0.45268091559410095, - "445": 0.4201361835002899, - "446": 0.3853554129600525, - "447": 0.402225524187088, - "448": 0.47125935554504395, - "449": 0.43564122915267944, - "450": 0.42454615235328674, - "451": 0.48441317677497864, - "452": 0.5431110262870789, - "453": 0.41885140538215637, - "454": 0.38179847598075867, - "455": 0.3720003366470337, - "456": 0.3804495632648468, - "457": 0.40889835357666016, - "458": 0.434234619140625, - "459": 0.4937080144882202, - "460": 0.4979003965854645, - "461": 0.47579073905944824, - "462": 0.5379064679145813, - "463": 0.5342559218406677, - "464": 0.5861862897872925, - "465": 0.5847064852714539, - "466": 0.5869342684745789, - "467": 0.565142810344696, - "468": 0.5108954906463623, - "469": 0.46083521842956543, - "470": 0.4709267318248749, - "471": 0.5020389556884766, - "472": 0.5066967606544495, - "473": 0.49101659655570984, - "474": 0.4528162479400635, - "475": 0.4593248963356018, - "476": 0.45641031861305237, - "477": 0.4859912693500519, - "478": 0.48071128129959106, - "479": 0.4857591986656189, - "480": 0.49276676774024963, - "481": 0.5025609731674194, - "482": 0.4995492994785309, - "483": 0.5039301514625549, - "484": 0.5286762714385986, - "485": 0.5217339992523193 - }, - "loss": { - "432": 2.724982500076294, - "433": 2.688483953475952, - "434": 2.6983938217163086, - "435": 2.7062883377075195, - "436": 2.685987949371338, - "437": 2.689300060272217, - "438": 2.662151336669922, - "439": 2.693514823913574, - "440": 2.685091972351074, - "441": 2.6877806186676025, - "442": 2.725755214691162, - "443": 2.6981377601623535, - "444": 2.7130467891693115, - "445": 2.6893796920776367, - "446": 2.6755263805389404, - "447": 2.687635898590088, - "448": 2.677579402923584, - "449": 2.681330680847168, - "450": 2.682194471359253, - "451": 2.670379400253296, - "452": 2.672797679901123, - "453": 2.6556596755981445, - "454": 2.662994384765625, - "455": 2.667182445526123, - "456": 2.665161609649658, - "457": 2.660501718521118, - "458": 2.6573829650878906, - "459": 2.661374568939209, - "460": 2.6760644912719727, - "461": 2.6895253658294678, - "462": 2.6641812324523926, - "463": 2.6613011360168457, - "464": 2.6649575233459473, - "465": 2.6734819412231445, - "466": 2.668853521347046, - "467": 2.6791417598724365, - "468": 2.680006980895996, - "469": 2.6658997535705566, - "470": 2.6568691730499268, - "471": 2.6597189903259277, - "472": 2.6498641967773438, - "473": 2.671576976776123, - "474": 2.660019874572754, - "475": 2.651566505432129, - "476": 2.656750440597534, - "477": 2.6441211700439453, - "478": 2.6541337966918945, - "479": 2.6541285514831543, - "480": 2.660074234008789, - "481": 2.6316864490509033, - "482": 2.67539644241333, - "483": 2.643857479095459, - "484": 2.6470913887023926, - "485": 2.6522140502929688 - }, - "lr": { - "432": 0.464, - "433": 0.464, - "434": 0.464, - "435": 0.464, - "436": 0.464, - "437": 0.464, - "438": 0.464, - "439": 0.464, - "440": 0.464, - "441": 0.464, - "442": 0.464, - "443": 0.464, - "444": 0.464, - "445": 0.464, - "446": 0.464, - "447": 0.464, - "448": 0.464, - "449": 0.464, - "450": 0.464, - "451": 0.464, - "452": 0.464, - "453": 0.464, - "454": 0.464, - "455": 0.464, - "456": 0.464, - "457": 0.464, - "458": 0.464, - "459": 0.464, - "460": 0.464, - "461": 0.464, - "462": 0.464, - "463": 0.464, - "464": 0.464, - "465": 0.464, - "466": 0.464, - "467": 0.464, - "468": 0.464, - "469": 0.464, - "470": 0.464, - "471": 0.464, - "472": 0.464, - "473": 0.464, - "474": 0.464, - "475": 0.464, - "476": 0.464, - "477": 0.464, - "478": 0.464, - "479": 0.464, - "480": 0.464, - "481": 0.464, - "482": 0.464, - "483": 0.464, - "484": 0.464, - "485": 0.464 - } - }, - "train_epoch_time": 4.79051399230957, - "train_loss": 2.658898579680937, - "train_score": 0.24920866209345535, - "val_loss": 2.6732382131088204, - "val_score": 0.24083309453814777 - }, - { - "epoch": 9, - "grad_norm": 0.5026582479476929, - "learning_rate": 0.464, - "model_norm": 88.22550964355469, - "step_logs": { - "grad_norm": { - "486": 0.5393915772438049, - "487": 0.48238617181777954, - "488": 0.4742048978805542, - "489": 0.46762123703956604, - "490": 0.4421769678592682, - "491": 0.41129744052886963, - "492": 0.3979552388191223, - "493": 0.3982887268066406, - "494": 0.4035964012145996, - "495": 0.4782795011997223, - "496": 0.5390520691871643, - "497": 0.575866162776947, - "498": 0.5617049336433411, - "499": 0.5540283918380737, - "500": 0.5278716087341309, - "501": 0.4533846080303192, - "502": 0.4401773512363434, - "503": 0.44022315740585327, - "504": 0.43612343072891235, - "505": 0.4604063034057617, - "506": 0.4742715656757355, - "507": 0.49274301528930664, - "508": 0.44611626863479614, - "509": 0.41245028376579285, - "510": 0.4085487425327301, - "511": 0.4094655215740204, - "512": 0.37654945254325867, - "513": 0.34507662057876587, - "514": 0.3232730031013489, - "515": 0.3307872414588928, - "516": 0.3872009515762329, - "517": 0.4701208472251892, - "518": 0.5640180110931396, - "519": 0.6066560745239258, - "520": 0.6159108281135559, - "521": 0.5913978219032288, - "522": 0.5769386887550354, - "523": 0.5220202803611755, - "524": 0.5326650142669678, - "525": 0.5230283141136169, - "526": 0.5230275392532349, - "527": 0.5344657301902771, - "528": 0.5188909769058228, - "529": 0.46427685022354126, - "530": 0.3971073627471924, - "531": 0.3990248143672943, - "532": 0.3895544111728668, - "533": 0.3791446387767792, - "534": 0.43812525272369385, - "535": 0.4937969744205475, - "536": 0.5510608553886414, - "537": 0.5472446084022522, - "538": 0.5284805297851562, - "539": 0.5026582479476929 - }, - "loss": { - "486": 2.6691908836364746, - "487": 2.654679775238037, - "488": 2.6492388248443604, - "489": 2.6355605125427246, - "490": 2.6162900924682617, - "491": 2.6369524002075195, - "492": 2.635028839111328, - "493": 2.636723041534424, - "494": 2.62257719039917, - "495": 2.6289432048797607, - "496": 2.6580381393432617, - "497": 2.638354778289795, - "498": 2.6574854850769043, - "499": 2.632498025894165, - "500": 2.6662492752075195, - "501": 2.627734661102295, - "502": 2.6417078971862793, - "503": 2.626744031906128, - "504": 2.631453275680542, - "505": 2.6300528049468994, - "506": 2.6451854705810547, - "507": 2.620011329650879, - "508": 2.6465907096862793, - "509": 2.6392581462860107, - "510": 2.617265224456787, - "511": 2.6251540184020996, - "512": 2.6369576454162598, - "513": 2.612780809402466, - "514": 2.6150434017181396, - "515": 2.596306800842285, - "516": 2.616943836212158, - "517": 2.6273908615112305, - "518": 2.633963108062744, - "519": 2.6436197757720947, - "520": 2.625952959060669, - "521": 2.6523561477661133, - "522": 2.6393675804138184, - "523": 2.6496291160583496, - "524": 2.6553778648376465, - "525": 2.633091926574707, - "526": 2.6248130798339844, - "527": 2.626410484313965, - "528": 2.618163585662842, - "529": 2.6059889793395996, - "530": 2.5993666648864746, - "531": 2.6000046730041504, - "532": 2.6114938259124756, - "533": 2.611501455307007, - "534": 2.617192268371582, - "535": 2.6252808570861816, - "536": 2.6304426193237305, - "537": 2.624691963195801, - "538": 2.6142218112945557, - "539": 2.6360414028167725 - }, - "lr": { - "486": 0.464, - "487": 0.464, - "488": 0.464, - "489": 0.464, - "490": 0.464, - "491": 0.464, - "492": 0.464, - "493": 0.464, - "494": 0.464, - "495": 0.464, - "496": 0.464, - "497": 0.464, - "498": 0.464, - "499": 0.464, - "500": 0.464, - "501": 0.464, - "502": 0.464, - "503": 0.464, - "504": 0.464, - "505": 0.464, - "506": 0.464, - "507": 0.464, - "508": 0.464, - "509": 0.464, - "510": 0.464, - "511": 0.464, - "512": 0.464, - "513": 0.464, - "514": 0.464, - "515": 0.464, - "516": 0.464, - "517": 0.464, - "518": 0.464, - "519": 0.464, - "520": 0.464, - "521": 0.464, - "522": 0.464, - "523": 0.464, - "524": 0.464, - "525": 0.464, - "526": 0.464, - "527": 0.464, - "528": 0.464, - "529": 0.464, - "530": 0.464, - "531": 0.464, - "532": 0.464, - "533": 0.464, - "534": 0.464, - "535": 0.464, - "536": 0.464, - "537": 0.464, - "538": 0.464, - "539": 0.464 - } - }, - "train_epoch_time": 4.789802074432373, - "train_loss": 2.6083524298975767, - "train_score": 0.2606415888414835, - "val_loss": 2.6212401179303804, - "val_score": 0.25407218662797376 - }, - { - "epoch": 10, - "grad_norm": 0.5591016411781311, - "learning_rate": 0.464, - "model_norm": 88.26213836669922, - "step_logs": { - "grad_norm": { - "540": 0.4353190064430237, - "541": 0.40878868103027344, - "542": 0.3924320340156555, - "543": 0.38761797547340393, - "544": 0.37114766240119934, - "545": 0.36142200231552124, - "546": 0.3414303958415985, - "547": 0.32516202330589294, - "548": 0.35573261976242065, - "549": 0.43056225776672363, - "550": 0.5892359614372253, - "551": 0.6880606412887573, - "552": 0.675864040851593, - "553": 0.5905904769897461, - "554": 0.4994680881500244, - "555": 0.48662200570106506, - "556": 0.486063152551651, - "557": 0.47349515557289124, - "558": 0.48976269364356995, - "559": 0.5170110464096069, - "560": 0.5126966834068298, - "561": 0.5223647356033325, - "562": 0.553849458694458, - "563": 0.5416709184646606, - "564": 0.5293968915939331, - "565": 0.5338631868362427, - "566": 0.5509567856788635, - "567": 0.5623188018798828, - "568": 0.5809409022331238, - "569": 0.5941538214683533, - "570": 0.6498664021492004, - "571": 0.6203831434249878, - "572": 0.5465275049209595, - "573": 0.5212960243225098, - "574": 0.5370823740959167, - "575": 0.4927505850791931, - "576": 0.43067026138305664, - "577": 0.423566073179245, - "578": 0.45962396264076233, - "579": 0.49076953530311584, - "580": 0.535290002822876, - "581": 0.5534805059432983, - "582": 0.6020460724830627, - "583": 0.6749753952026367, - "584": 0.712170422077179, - "585": 0.6982851624488831, - "586": 0.6124773025512695, - "587": 0.560810387134552, - "588": 0.5183064937591553, - "589": 0.5262145400047302, - "590": 0.5044381618499756, - "591": 0.5089525580406189, - "592": 0.5265288352966309, - "593": 0.5591016411781311 - }, - "loss": { - "540": 2.618333339691162, - "541": 2.593850612640381, - "542": 2.612354278564453, - "543": 2.6000542640686035, - "544": 2.5882675647735596, - "545": 2.6304752826690674, - "546": 2.605113983154297, - "547": 2.595273494720459, - "548": 2.590259075164795, - "549": 2.614989757537842, - "550": 2.609283447265625, - "551": 2.6401658058166504, - "552": 2.633363723754883, - "553": 2.619216203689575, - "554": 2.6152684688568115, - "555": 2.6202988624572754, - "556": 2.6098289489746094, - "557": 2.592923879623413, - "558": 2.580751657485962, - "559": 2.610132932662964, - "560": 2.5940983295440674, - "561": 2.6100430488586426, - "562": 2.6138992309570312, - "563": 2.640338897705078, - "564": 2.5961523056030273, - "565": 2.5897035598754883, - "566": 2.616889476776123, - "567": 2.598578929901123, - "568": 2.6096761226654053, - "569": 2.6189303398132324, - "570": 2.6292848587036133, - "571": 2.6067323684692383, - "572": 2.606438636779785, - "573": 2.563504695892334, - "574": 2.5901501178741455, - "575": 2.6033718585968018, - "576": 2.566986560821533, - "577": 2.5825204849243164, - "578": 2.5854532718658447, - "579": 2.59041690826416, - "580": 2.59226131439209, - "581": 2.600159168243408, - "582": 2.5773916244506836, - "583": 2.6025354862213135, - "584": 2.6265413761138916, - "585": 2.61869478225708, - "586": 2.619028091430664, - "587": 2.62429141998291, - "588": 2.595555067062378, - "589": 2.587710380554199, - "590": 2.5777010917663574, - "591": 2.593646764755249, - "592": 2.5727310180664062, - "593": 2.590376853942871 - }, - "lr": { - "540": 0.464, - "541": 0.464, - "542": 0.464, - "543": 0.464, - "544": 0.464, - "545": 0.464, - "546": 0.464, - "547": 0.464, - "548": 0.464, - "549": 0.464, - "550": 0.464, - "551": 0.464, - "552": 0.464, - "553": 0.464, - "554": 0.464, - "555": 0.464, - "556": 0.464, - "557": 0.464, - "558": 0.464, - "559": 0.464, - "560": 0.464, - "561": 0.464, - "562": 0.464, - "563": 0.464, - "564": 0.464, - "565": 0.464, - "566": 0.464, - "567": 0.464, - "568": 0.464, - "569": 0.464, - "570": 0.464, - "571": 0.464, - "572": 0.464, - "573": 0.464, - "574": 0.464, - "575": 0.464, - "576": 0.464, - "577": 0.464, - "578": 0.464, - "579": 0.464, - "580": 0.464, - "581": 0.464, - "582": 0.464, - "583": 0.464, - "584": 0.464, - "585": 0.464, - "586": 0.464, - "587": 0.464, - "588": 0.464, - "589": 0.464, - "590": 0.464, - "591": 0.464, - "592": 0.464, - "593": 0.464 - } - }, - "train_epoch_time": 4.7896130084991455, - "train_loss": 2.58598659425076, - "train_score": 0.26898986717720796, - "val_loss": 2.605555377515668, - "val_score": 0.2617905066700398 - }, - { - "epoch": 11, - "grad_norm": 0.6427961587905884, - "learning_rate": 0.464, - "model_norm": 88.30183410644531, - "step_logs": { - "grad_norm": { - "594": 0.6102558970451355, - "595": 0.5830957293510437, - "596": 0.5868234038352966, - "597": 0.5984905362129211, - "598": 0.5819385647773743, - "599": 0.587517499923706, - "600": 0.5767359137535095, - "601": 0.5563687086105347, - "602": 0.5310091376304626, - "603": 0.49605637788772583, - "604": 0.46206727623939514, - "605": 0.4455237090587616, - "606": 0.48990777134895325, - "607": 0.5666103363037109, - "608": 0.6441716551780701, - "609": 0.6580949425697327, - "610": 0.6400120258331299, - "611": 0.6008638739585876, - "612": 0.5176447033882141, - "613": 0.4860977530479431, - "614": 0.4765302836894989, - "615": 0.48777997493743896, - "616": 0.524976372718811, - "617": 0.6055616140365601, - "618": 0.6691855788230896, - "619": 0.6471148133277893, - "620": 0.6173555254936218, - "621": 0.6212912797927856, - "622": 0.5882620215415955, - "623": 0.5502585172653198, - "624": 0.5645357966423035, - "625": 0.5881019234657288, - "626": 0.597263753414154, - "627": 0.5757501125335693, - "628": 0.5418733358383179, - "629": 0.5492876768112183, - "630": 0.6006032228469849, - "631": 0.5849524140357971, - "632": 0.5334150195121765, - "633": 0.5865476131439209, - "634": 0.6621525883674622, - "635": 0.6650758981704712, - "636": 0.47196564078330994, - "637": 0.3868277966976166, - "638": 0.30416712164878845, - "639": 0.2245567888021469, - "640": 0.22286877036094666, - "641": 0.2879146635532379, - "642": 0.4127894341945648, - "643": 0.509972095489502, - "644": 0.6712988615036011, - "645": 0.7168768048286438, - "646": 0.6779754757881165, - "647": 0.6427961587905884 - }, - "loss": { - "594": 2.5911519527435303, - "595": 2.5892903804779053, - "596": 2.5860378742218018, - "597": 2.5969104766845703, - "598": 2.5911648273468018, - "599": 2.5752511024475098, - "600": 2.5722761154174805, - "601": 2.5954272747039795, - "602": 2.5621604919433594, - "603": 2.58282470703125, - "604": 2.5562098026275635, - "605": 2.5705299377441406, - "606": 2.5767621994018555, - "607": 2.5804409980773926, - "608": 2.5848984718322754, - "609": 2.5837836265563965, - "610": 2.5815038681030273, - "611": 2.598987579345703, - "612": 2.5651354789733887, - "613": 2.5845117568969727, - "614": 2.5606136322021484, - "615": 2.5808262825012207, - "616": 2.5814690589904785, - "617": 2.5747976303100586, - "618": 2.570241928100586, - "619": 2.6017098426818848, - "620": 2.565883159637451, - "621": 2.571183681488037, - "622": 2.583958864212036, - "623": 2.56392240524292, - "624": 2.584406614303589, - "625": 2.577767848968506, - "626": 2.576108932495117, - "627": 2.5505712032318115, - "628": 2.557628631591797, - "629": 2.5503249168395996, - "630": 2.5669751167297363, - "631": 2.5596976280212402, - "632": 2.5487265586853027, - "633": 2.5699877738952637, - "634": 2.582122325897217, - "635": 2.5851387977600098, - "636": 2.5492100715637207, - "637": 2.557218551635742, - "638": 2.5428402423858643, - "639": 2.5253586769104004, - "640": 2.5295419692993164, - "641": 2.5135114192962646, - "642": 2.527859926223755, - "643": 2.5553112030029297, - "644": 2.563213348388672, - "645": 2.6114673614501953, - "646": 2.575721263885498, - "647": 2.6093015670776367 - }, - "lr": { - "594": 0.464, - "595": 0.464, - "596": 0.464, - "597": 0.464, - "598": 0.464, - "599": 0.464, - "600": 0.464, - "601": 0.464, - "602": 0.464, - "603": 0.464, - "604": 0.464, - "605": 0.464, - "606": 0.464, - "607": 0.464, - "608": 0.464, - "609": 0.464, - "610": 0.464, - "611": 0.464, - "612": 0.464, - "613": 0.464, - "614": 0.464, - "615": 0.464, - "616": 0.464, - "617": 0.464, - "618": 0.464, - "619": 0.464, - "620": 0.464, - "621": 0.464, - "622": 0.464, - "623": 0.464, - "624": 0.464, - "625": 0.464, - "626": 0.464, - "627": 0.464, - "628": 0.464, - "629": 0.464, - "630": 0.464, - "631": 0.464, - "632": 0.464, - "633": 0.464, - "634": 0.464, - "635": 0.464, - "636": 0.464, - "637": 0.464, - "638": 0.464, - "639": 0.464, - "640": 0.464, - "641": 0.464, - "642": 0.464, - "643": 0.464, - "644": 0.464, - "645": 0.464, - "646": 0.464, - "647": 0.464 - } - }, - "train_epoch_time": 4.789430379867554, - "train_loss": 2.55411589491145, - "train_score": 0.2774053982203797, - "val_loss": 2.5694819216613243, - "val_score": 0.27186333996283335 - }, - { - "epoch": 12, - "grad_norm": 0.37160810828208923, - "learning_rate": 0.464, - "model_norm": 88.33002471923828, - "step_logs": { - "grad_norm": { - "648": 0.6033352017402649, - "649": 0.6375214457511902, - "650": 0.6416547298431396, - "651": 0.6237403750419617, - "652": 0.5660628080368042, - "653": 0.5259665846824646, - "654": 0.45734545588493347, - "655": 0.4149729311466217, - "656": 0.3758750855922699, - "657": 0.35036203265190125, - "658": 0.36929088830947876, - "659": 0.417061984539032, - "660": 0.4331158995628357, - "661": 0.4113219678401947, - "662": 0.4149918556213379, - "663": 0.39995384216308594, - "664": 0.3589474558830261, - "665": 0.3128139078617096, - "666": 0.28866633772850037, - "667": 0.2722757160663605, - "668": 0.265558123588562, - "669": 0.20236419141292572, - "670": 0.20753850042819977, - "671": 0.32348400354385376, - "672": 0.33176127076148987, - "673": 0.2808423340320587, - "674": 0.2504422664642334, - "675": 0.22587434947490692, - "676": 0.23982255160808563, - "677": 0.1987578123807907, - "678": 0.17108003795146942, - "679": 0.1660272777080536, - "680": 0.20091399550437927, - "681": 0.20667262375354767, - "682": 0.23717541992664337, - "683": 0.24786245822906494, - "684": 0.27075037360191345, - "685": 0.3327227532863617, - "686": 0.29253676533699036, - "687": 0.240653395652771, - "688": 0.21306094527244568, - "689": 0.16863195598125458, - "690": 0.16018062829971313, - "691": 0.17213071882724762, - "692": 0.16818664968013763, - "693": 0.18476681411266327, - "694": 0.2009366899728775, - "695": 0.20096440613269806, - "696": 0.2257307767868042, - "697": 0.15748992562294006, - "698": 0.18098805844783783, - "699": 0.23984956741333008, - "700": 0.2926294207572937, - "701": 0.37160810828208923 - }, - "loss": { - "648": 2.5429294109344482, - "649": 2.563180923461914, - "650": 2.5596656799316406, - "651": 2.583275079727173, - "652": 2.555635452270508, - "653": 2.5495266914367676, - "654": 2.527939558029175, - "655": 2.531337261199951, - "656": 2.5354232788085938, - "657": 2.5216381549835205, - "658": 2.526749610900879, - "659": 2.5052216053009033, - "660": 2.512017011642456, - "661": 2.547964096069336, - "662": 2.537106990814209, - "663": 2.531524181365967, - "664": 2.51654314994812, - "665": 2.522202968597412, - "666": 2.532649040222168, - "667": 2.4954707622528076, - "668": 2.5083999633789062, - "669": 2.525273561477661, - "670": 2.5156142711639404, - "671": 2.5051584243774414, - "672": 2.5163145065307617, - "673": 2.5254039764404297, - "674": 2.504227876663208, - "675": 2.5045571327209473, - "676": 2.5301895141601562, - "677": 2.5105018615722656, - "678": 2.508082866668701, - "679": 2.5128772258758545, - "680": 2.4989960193634033, - "681": 2.5019519329071045, - "682": 2.523742914199829, - "683": 2.5194005966186523, - "684": 2.511171579360962, - "685": 2.507780075073242, - "686": 2.50174617767334, - "687": 2.506113290786743, - "688": 2.496569871902466, - "689": 2.5184364318847656, - "690": 2.4979379177093506, - "691": 2.513768196105957, - "692": 2.4787063598632812, - "693": 2.4990909099578857, - "694": 2.5047292709350586, - "695": 2.5084941387176514, - "696": 2.512087821960449, - "697": 2.492587089538574, - "698": 2.4904065132141113, - "699": 2.5076804161071777, - "700": 2.49479079246521, - "701": 2.501924753189087 - }, - "lr": { - "648": 0.464, - "649": 0.4611358024691358, - "650": 0.4582716049382716, - "651": 0.4554074074074074, - "652": 0.45254320987654323, - "653": 0.44967901234567903, - "654": 0.4468148148148149, - "655": 0.44395061728395063, - "656": 0.44108641975308643, - "657": 0.43822222222222224, - "658": 0.43535802469135804, - "659": 0.43249382716049384, - "660": 0.42962962962962964, - "661": 0.42676543209876544, - "662": 0.4239012345679013, - "663": 0.4210370370370371, - "664": 0.41817283950617284, - "665": 0.41530864197530865, - "666": 0.41244444444444445, - "667": 0.4095802469135803, - "668": 0.40671604938271605, - "669": 0.40385185185185185, - "670": 0.4009876543209877, - "671": 0.3981234567901235, - "672": 0.3952592592592593, - "673": 0.39239506172839506, - "674": 0.38953086419753086, - "675": 0.3866666666666667, - "676": 0.3838024691358025, - "677": 0.38093827160493826, - "678": 0.3780740740740741, - "679": 0.3752098765432099, - "680": 0.3723456790123457, - "681": 0.36948148148148147, - "682": 0.36661728395061727, - "683": 0.3637530864197531, - "684": 0.3608888888888889, - "685": 0.3580246913580247, - "686": 0.35516049382716053, - "687": 0.35229629629629633, - "688": 0.34943209876543213, - "689": 0.34656790123456793, - "690": 0.3437037037037037, - "691": 0.3408395061728395, - "692": 0.33797530864197534, - "693": 0.33511111111111114, - "694": 0.33224691358024694, - "695": 0.32938271604938274, - "696": 0.32651851851851854, - "697": 0.32365432098765434, - "698": 0.32079012345679014, - "699": 0.3179259259259259, - "700": 0.31506172839506175, - "701": 0.31219753086419755 - } - }, - "train_epoch_time": 4.791479825973511, - "train_loss": 2.5056555384713914, - "train_score": 0.28899973107547294, - "val_loss": 2.529800688221315, - "val_score": 0.2841875353992738 - }, - { - "epoch": 13, - "grad_norm": 0.1589818298816681, - "learning_rate": 0.3093333333333334, - "model_norm": 88.34654998779297, - "step_logs": { - "grad_norm": { - "702": 0.43228471279144287, - "703": 0.4150571823120117, - "704": 0.37235188484191895, - "705": 0.34717926383018494, - "706": 0.3039432466030121, - "707": 0.29926228523254395, - "708": 0.2916480600833893, - "709": 0.29013413190841675, - "710": 0.2985641360282898, - "711": 0.27594083547592163, - "712": 0.26492661237716675, - "713": 0.24489927291870117, - "714": 0.26873308420181274, - "715": 0.28540027141571045, - "716": 0.2636706531047821, - "717": 0.2658044993877411, - "718": 0.298604279756546, - "719": 0.3089991807937622, - "720": 0.29304563999176025, - "721": 0.28151825070381165, - "722": 0.30985164642333984, - "723": 0.3154941201210022, - "724": 0.2999439239501953, - "725": 0.300311803817749, - "726": 0.2792191803455353, - "727": 0.25215500593185425, - "728": 0.22900502383708954, - "729": 0.1993046998977661, - "730": 0.22680199146270752, - "731": 0.23790690302848816, - "732": 0.22285096347332, - "733": 0.23289717733860016, - "734": 0.23578020930290222, - "735": 0.24348080158233643, - "736": 0.21835733950138092, - "737": 0.2156786173582077, - "738": 0.24533380568027496, - "739": 0.21243096888065338, - "740": 0.23380620777606964, - "741": 0.2154991626739502, - "742": 0.21168167889118195, - "743": 0.20409566164016724, - "744": 0.17405036091804504, - "745": 0.17353741824626923, - "746": 0.1971268653869629, - "747": 0.18005278706550598, - "748": 0.17533138394355774, - "749": 0.18208526074886322, - "750": 0.19603769481182098, - "751": 0.17800000309944153, - "752": 0.15871545672416687, - "753": 0.1857120394706726, - "754": 0.1774788647890091, - "755": 0.1589818298816681 - }, - "loss": { - "702": 2.500382423400879, - "703": 2.48991060256958, - "704": 2.493062734603882, - "705": 2.5000734329223633, - "706": 2.495919704437256, - "707": 2.5012028217315674, - "708": 2.492306709289551, - "709": 2.500197410583496, - "710": 2.509784460067749, - "711": 2.501002311706543, - "712": 2.4673142433166504, - "713": 2.498098850250244, - "714": 2.4844164848327637, - "715": 2.514371395111084, - "716": 2.4912924766540527, - "717": 2.467846393585205, - "718": 2.4839487075805664, - "719": 2.4937055110931396, - "720": 2.484461784362793, - "721": 2.4859306812286377, - "722": 2.4986188411712646, - "723": 2.46124267578125, - "724": 2.483790159225464, - "725": 2.516995906829834, - "726": 2.4903712272644043, - "727": 2.4950814247131348, - "728": 2.480074405670166, - "729": 2.482741355895996, - "730": 2.4764699935913086, - "731": 2.5072567462921143, - "732": 2.4640614986419678, - "733": 2.4912025928497314, - "734": 2.4776031970977783, - "735": 2.4948501586914062, - "736": 2.4690709114074707, - "737": 2.4752299785614014, - "738": 2.4902820587158203, - "739": 2.4792089462280273, - "740": 2.476484775543213, - "741": 2.4966518878936768, - "742": 2.472245693206787, - "743": 2.455617666244507, - "744": 2.4649314880371094, - "745": 2.4670093059539795, - "746": 2.4900245666503906, - "747": 2.482882261276245, - "748": 2.475067138671875, - "749": 2.4585561752319336, - "750": 2.479755401611328, - "751": 2.4682862758636475, - "752": 2.478303909301758, - "753": 2.4845895767211914, - "754": 2.466521739959717, - "755": 2.483273506164551 - }, - "lr": { - "702": 0.3093333333333334, - "703": 0.30646913580246915, - "704": 0.30360493827160495, - "705": 0.30074074074074075, - "706": 0.29787654320987655, - "707": 0.29501234567901236, - "708": 0.29214814814814816, - "709": 0.28928395061728396, - "710": 0.2864197530864198, - "711": 0.2835555555555556, - "712": 0.28069135802469136, - "713": 0.27782716049382716, - "714": 0.27496296296296296, - "715": 0.27209876543209877, - "716": 0.26923456790123457, - "717": 0.26637037037037037, - "718": 0.2635061728395062, - "719": 0.260641975308642, - "720": 0.25777777777777783, - "721": 0.2549135802469136, - "722": 0.2520493827160494, - "723": 0.24918518518518518, - "724": 0.246320987654321, - "725": 0.2434567901234568, - "726": 0.24059259259259264, - "727": 0.23772839506172844, - "728": 0.2348641975308642, - "729": 0.232, - "730": 0.2291358024691358, - "731": 0.2262716049382716, - "732": 0.22340740740740744, - "733": 0.22054320987654322, - "734": 0.21767901234567902, - "735": 0.2148148148148148, - "736": 0.21195061728395065, - "737": 0.20908641975308642, - "738": 0.20622222222222222, - "739": 0.203358024691358, - "740": 0.20049382716049385, - "741": 0.19762962962962966, - "742": 0.19476543209876543, - "743": 0.19190123456790123, - "744": 0.18903703703703706, - "745": 0.18617283950617286, - "746": 0.18330864197530863, - "747": 0.18044444444444444, - "748": 0.17758024691358026, - "749": 0.17471604938271607, - "750": 0.17185185185185184, - "751": 0.16898765432098764, - "752": 0.16612345679012347, - "753": 0.16325925925925927, - "754": 0.16039506172839507, - "755": 0.15753086419753085 - } - }, - "train_epoch_time": 4.791181325912476, - "train_loss": 2.469497203553253, - "train_score": 0.3004393830018885, - "val_loss": 2.4955126393402485, - "val_score": 0.293143656408472 - }, - { - "epoch": 14, - "grad_norm": 0.1528676450252533, - "learning_rate": 0.1546666666666667, - "model_norm": 88.35224914550781, - "step_logs": { - "grad_norm": { - "756": 0.15225781500339508, - "757": 0.16851821541786194, - "758": 0.18109263479709625, - "759": 0.1627882570028305, - "760": 0.166079580783844, - "761": 0.20054063200950623, - "762": 0.17500917613506317, - "763": 0.16772787272930145, - "764": 0.17451803386211395, - "765": 0.1759415715932846, - "766": 0.17383219301700592, - "767": 0.15727967023849487, - "768": 0.1445913463830948, - "769": 0.16452628374099731, - "770": 0.1758730262517929, - "771": 0.13839362561702728, - "772": 0.1468462347984314, - "773": 0.15077322721481323, - "774": 0.17382842302322388, - "775": 0.16189800202846527, - "776": 0.16291821002960205, - "777": 0.15655256807804108, - "778": 0.15239010751247406, - "779": 0.17736950516700745, - "780": 0.15875975787639618, - "781": 0.17633116245269775, - "782": 0.16365593671798706, - "783": 0.16991767287254333, - "784": 0.16173149645328522, - "785": 0.16624715924263, - "786": 0.15952730178833008, - "787": 0.16946932673454285, - "788": 0.15996164083480835, - "789": 0.16099151968955994, - "790": 0.1575293093919754, - "791": 0.15854594111442566, - "792": 0.15482546389102936, - "793": 0.15897639095783234, - "794": 0.15980958938598633, - "795": 0.166922926902771, - "796": 0.15102678537368774, - "797": 0.15620110929012299, - "798": 0.14779940247535706, - "799": 0.14478686451911926, - "800": 0.15418539941310883, - "801": 0.16687539219856262, - "802": 0.1509760320186615, - "803": 0.14906252920627594, - "804": 0.1507200300693512, - "805": 0.16321063041687012, - "806": 0.14158639311790466, - "807": 0.1515313684940338, - "808": 0.15022914111614227, - "809": 0.1528676450252533 - }, - "loss": { - "756": 2.485177516937256, - "757": 2.471430778503418, - "758": 2.4800238609313965, - "759": 2.4690446853637695, - "760": 2.4698028564453125, - "761": 2.4735207557678223, - "762": 2.4731292724609375, - "763": 2.4609038829803467, - "764": 2.4538493156433105, - "765": 2.4573183059692383, - "766": 2.4712400436401367, - "767": 2.479620933532715, - "768": 2.486509084701538, - "769": 2.4522135257720947, - "770": 2.5024983882904053, - "771": 2.4455902576446533, - "772": 2.460141181945801, - "773": 2.463428258895874, - "774": 2.4634666442871094, - "775": 2.4514567852020264, - "776": 2.4770278930664062, - "777": 2.470583915710449, - "778": 2.459066390991211, - "779": 2.4778878688812256, - "780": 2.463188648223877, - "781": 2.443997621536255, - "782": 2.480278968811035, - "783": 2.4452319145202637, - "784": 2.475268840789795, - "785": 2.4804553985595703, - "786": 2.45963716506958, - "787": 2.467740535736084, - "788": 2.488633155822754, - "789": 2.442119836807251, - "790": 2.4509010314941406, - "791": 2.473454475402832, - "792": 2.4729321002960205, - "793": 2.4500319957733154, - "794": 2.45410418510437, - "795": 2.4627552032470703, - "796": 2.4616622924804688, - "797": 2.4415009021759033, - "798": 2.467247486114502, - "799": 2.4649853706359863, - "800": 2.4506001472473145, - "801": 2.4459095001220703, - "802": 2.4668049812316895, - "803": 2.467935085296631, - "804": 2.4737305641174316, - "805": 2.4719676971435547, - "806": 2.465332508087158, - "807": 2.464211940765381, - "808": 2.4637200832366943, - "809": 2.4400713443756104 - }, - "lr": { - "756": 0.1546666666666667, - "757": 0.15180246913580248, - "758": 0.14893827160493828, - "759": 0.14607407407407405, - "760": 0.1432098765432099, - "761": 0.14034567901234568, - "762": 0.13748148148148148, - "763": 0.13461728395061728, - "764": 0.1317530864197531, - "765": 0.12888888888888891, - "766": 0.1260246913580247, - "767": 0.12316049382716047, - "768": 0.12029629629629632, - "769": 0.1174320987654321, - "770": 0.1145679012345679, - "771": 0.1117037037037037, - "772": 0.10883950617283954, - "773": 0.10597530864197532, - "774": 0.10311111111111111, - "775": 0.1002469135802469, - "776": 0.09738271604938274, - "777": 0.09451851851851853, - "778": 0.09165432098765432, - "779": 0.0887901234567901, - "780": 0.08592592592592595, - "781": 0.08306172839506173, - "782": 0.08019753086419754, - "783": 0.07733333333333332, - "784": 0.07446913580246917, - "785": 0.07160493827160495, - "786": 0.06874074074074074, - "787": 0.06587654320987653, - "788": 0.06301234567901237, - "789": 0.06014814814814816, - "790": 0.05728395061728395, - "791": 0.05441975308641974, - "792": 0.051555555555555584, - "793": 0.04869135802469137, - "794": 0.04582716049382716, - "795": 0.04296296296296295, - "796": 0.04009876543209879, - "797": 0.03723456790123458, - "798": 0.03437037037037037, - "799": 0.03150617283950616, - "800": 0.028641975308642, - "801": 0.025777777777777792, - "802": 0.02291358024691358, - "803": 0.02004938271604937, - "804": 0.01718518518518521, - "805": 0.014320987654321, - "806": 0.01145679012345679, - "807": 0.00859259259259258, - "808": 0.005728395061728421, - "809": 0.0028641975308642104 - } - }, - "train_epoch_time": 4.791383504867554, - "train_loss": 2.4618878629320493, - "train_score": 0.30231012381303257, - "val_loss": 2.4886534879182975, - "val_score": 0.2948523610659226 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:36:35.320815", - "final_model_norm": 88.35224914550781, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:34:54.417167", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 0, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": NaN, - "learning_rate": 1e-10, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "0": 23.23117446899414, - "1": 22.989797592163086, - "2": 5.981522560119629, - "3": 7.244385242462158, - "4": 10.633097648620605, - "5": 4.144954681396484, - "6": 6.494001388549805, - "7": 2.9956462383270264, - "8": 14.424826622009277, - "9": 7.633819103240967, - "10": 149.88861083984375, - "11": 6.750445365905762, - "12": 21.091999053955078, - "13": 12.757149696350098, - "14": 5.891195774078369, - "15": 9.666069030761719, - "16": 15.902484893798828, - "17": 6.4736409187316895, - "18": 7.752840995788574, - "19": 6.990000247955322, - "20": 6.225035667419434, - "21": 4.53028678894043, - "22": 8.853759765625, - "23": 4.583296775817871, - "24": 2.9546868801116943, - "25": 3.1095683574676514, - "26": 9.110855102539062, - "27": 3.695751905441284, - "28": 2.2180662155151367, - "29": 1.878195881843567, - "30": 1.5109740495681763, - "31": 1.5007685422897339, - "32": 7.965774059295654, - "33": 6.4047369956970215, - "34": 3.1937880516052246, - "35": 2.0074331760406494, - "36": 7.5528483390808105, - "37": 2.627272844314575, - "38": 8.863873481750488, - "39": 2.131105661392212, - "40": 1.5022618770599365, - "41": 1.4835952520370483, - "42": 8.715837478637695, - "43": 2.574693441390991, - "44": 19.155643463134766, - "45": 36.39860534667969, - "46": 55.313697814941406, - "47": 321.8904113769531, - "48": 3299.11181640625, - "49": 794108.1875, - "50": NaN, - "51": NaN, - "52": NaN, - "53": NaN - }, - "loss": { - "0": 4.53324556350708, - "1": 4.532902717590332, - "2": 3.793055772781372, - "3": 4.178531169891357, - "4": 4.204231262207031, - "5": 5.521515369415283, - "6": 4.58737850189209, - "7": 4.408707618713379, - "8": 5.3844895362854, - "9": 8.882173538208008, - "10": 15.804492950439453, - "11": 5.226345539093018, - "12": 21.98281478881836, - "13": 13.894100189208984, - "14": 14.051280975341797, - "15": 11.401083946228027, - "16": 12.105506896972656, - "17": 21.230819702148438, - "18": 17.732337951660156, - "19": 14.40357780456543, - "20": 11.373653411865234, - "21": 8.966316223144531, - "22": 9.833341598510742, - "23": 12.675477981567383, - "24": 9.089855194091797, - "25": 6.582707405090332, - "26": 6.475600242614746, - "27": 10.64722728729248, - "28": 9.009147644042969, - "29": 6.964444160461426, - "30": 5.376404762268066, - "31": 4.074130535125732, - "32": 4.895949363708496, - "33": 9.36358642578125, - "34": 8.311046600341797, - "35": 6.414035797119141, - "36": 6.510993003845215, - "37": 7.441115379333496, - "38": 6.660740852355957, - "39": 8.212377548217773, - "40": 6.214600563049316, - "41": 4.595078468322754, - "42": 5.635906219482422, - "43": 5.839105129241943, - "44": 20.69137954711914, - "45": 27.671417236328125, - "46": 103.21652221679688, - "47": 367.15301513671875, - "48": 39523.78125, - "49": 400313.9375, - "50": 266901840.0, - "51": NaN, - "52": NaN, - "53": NaN - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "train_epoch_time": 4.789042949676514, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 1, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "loss": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "train_epoch_time": 4.78561806678772, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 2, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "loss": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "train_epoch_time": 4.785696983337402, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 3, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "loss": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "train_epoch_time": 4.7856833934783936, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 4, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "loss": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "train_epoch_time": 4.785582780838013, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 5, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "loss": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "train_epoch_time": 4.785930156707764, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 6, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "loss": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "train_epoch_time": 4.785890817642212, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 7, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "loss": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "train_epoch_time": 4.7862937450408936, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 8, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "loss": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "train_epoch_time": 4.78559136390686, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 9, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "loss": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "train_epoch_time": 4.786235570907593, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 10, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "loss": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "train_epoch_time": 4.78626823425293, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 11, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "loss": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "train_epoch_time": 4.786235809326172, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 12, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "loss": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "train_epoch_time": 4.786622524261475, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 13, - "grad_norm": NaN, - "learning_rate": 0.6666666666666667, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "loss": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "train_epoch_time": 4.786588907241821, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 14, - "grad_norm": NaN, - "learning_rate": 0.33333333333333337, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "loss": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "train_epoch_time": 4.785744667053223, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:38:16.152381", - "final_model_norm": NaN, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:36:35.471996", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 1, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": NaN, - "learning_rate": 1e-10, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "0": 22.837112426757812, - "1": 22.75118637084961, - "2": 6.240360736846924, - "3": 7.6079630851745605, - "4": 7.2784342765808105, - "5": 4.5539655685424805, - "6": 9.584453582763672, - "7": 5.157559871673584, - "8": 5.092408180236816, - "9": 39.324947357177734, - "10": 5.599187850952148, - "11": 48.273582458496094, - "12": 17.04164695739746, - "13": 13.871618270874023, - "14": 7.465798854827881, - "15": 7.114660263061523, - "16": 6.876247882843018, - "17": 5.5258564949035645, - "18": 17.15142822265625, - "19": 16.66469383239746, - "20": 10.560254096984863, - "21": 3.7484688758850098, - "22": 2.711054563522339, - "23": 1.9748886823654175, - "24": 2.641187906265259, - "25": 1.7857410907745361, - "26": 1.8030115365982056, - "27": 1.501802682876587, - "28": 1.1701369285583496, - "29": 1.241692066192627, - "30": 6.217986106872559, - "31": 2.257883071899414, - "32": 8.467720031738281, - "33": 3.28859543800354, - "34": 9.957716941833496, - "35": 21.710872650146484, - "36": 53.593353271484375, - "37": 354.18780517578125, - "38": 582.2900390625, - "39": 2131870.25, - "40": NaN, - "41": NaN, - "42": NaN, - "43": NaN, - "44": NaN, - "45": NaN, - "46": NaN, - "47": NaN, - "48": NaN, - "49": NaN, - "50": NaN, - "51": NaN, - "52": NaN, - "53": NaN - }, - "loss": { - "0": 4.5338897705078125, - "1": 4.527107238769531, - "2": 3.7900640964508057, - "3": 4.2330145835876465, - "4": 4.0051774978637695, - "5": 4.980250358581543, - "6": 4.304337501525879, - "7": 4.8918304443359375, - "8": 5.82696533203125, - "9": 6.8483099937438965, - "10": 4.372932434082031, - "11": 18.85643768310547, - "12": 20.965839385986328, - "13": 19.575660705566406, - "14": 18.41077423095703, - "15": 16.630413055419922, - "16": 13.304697036743164, - "17": 8.574707984924316, - "18": 23.446083068847656, - "19": 50.79539489746094, - "20": 8.92165470123291, - "21": 12.349089622497559, - "22": 9.883639335632324, - "23": 7.176352024078369, - "24": 5.888399600982666, - "25": 5.101390838623047, - "26": 3.943868637084961, - "27": 4.28470516204834, - "28": 3.5186357498168945, - "29": 3.702208995819092, - "30": 4.546236991882324, - "31": 5.643336296081543, - "32": 4.949770450592041, - "33": 7.9264984130859375, - "34": 6.880195617675781, - "35": 19.588285446166992, - "36": 91.00701904296875, - "37": 21.428569793701172, - "38": 2507.474365234375, - "39": 224428.03125, - "40": 857502.375, - "41": NaN, - "42": NaN, - "43": NaN, - "44": NaN, - "45": NaN, - "46": NaN, - "47": NaN, - "48": NaN, - "49": NaN, - "50": NaN, - "51": NaN, - "52": NaN, - "53": NaN - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "train_epoch_time": 4.79034948348999, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 1, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "loss": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "train_epoch_time": 4.78636622428894, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 2, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "loss": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "train_epoch_time": 4.787309646606445, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 3, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "loss": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "train_epoch_time": 4.786698818206787, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 4, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "loss": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "train_epoch_time": 4.787599802017212, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 5, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "loss": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "train_epoch_time": 4.788406610488892, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 6, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "loss": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "train_epoch_time": 4.789200305938721, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 7, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "loss": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "train_epoch_time": 4.789167881011963, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 8, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "loss": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "train_epoch_time": 4.789303779602051, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 9, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "loss": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "train_epoch_time": 4.789206266403198, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 10, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "loss": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "train_epoch_time": 4.789727687835693, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 11, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "loss": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "train_epoch_time": 4.789154767990112, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 12, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "loss": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "train_epoch_time": 4.790106773376465, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 13, - "grad_norm": NaN, - "learning_rate": 0.6666666666666667, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "loss": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "train_epoch_time": 4.789757966995239, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 14, - "grad_norm": NaN, - "learning_rate": 0.33333333333333337, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "loss": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "train_epoch_time": 4.788926839828491, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:39:57.039554", - "final_model_norm": NaN, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:38:16.310790", - "step_scheduler_on_epoch": false - } - }, - { - "config": { - "batch_size": 64, - "dataset": "shakespeare", - "dataset_kwargs": {}, - "loss_func": "sequence_cross_entropy", - "max_epoch": 15, - "model": "llama", - "model_kwargs": { - "dim": 384, - "expand": 4, - "mlp": "mlp", - "n_heads": 6, - "n_layers": 6, - "seq_len": 256, - "vocab_size": 92 - }, - "opt": { - "lr": 1.0, - "lr_schedule": "wsd", - "name": "sgd", - "stepwise_schedule": true, - "warmup_steps": 50 - }, - "run_id": 2, - "score_func": "sequence_cross_entropy_accuracy" - }, - "history": [ - { - "epoch": 0, - "grad_norm": NaN, - "learning_rate": 1e-10, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "0": 22.766481399536133, - "1": 23.4499454498291, - "2": 5.875208854675293, - "3": 7.603043079376221, - "4": 10.991902351379395, - "5": 4.112478733062744, - "6": 3.933785915374756, - "7": 3.6211907863616943, - "8": 10.141874313354492, - "9": 9.137262344360352, - "10": 71.74754333496094, - "11": 23.597883224487305, - "12": 4.8398027420043945, - "13": 17.30185317993164, - "14": 10.70186710357666, - "15": 6.08171272277832, - "16": 5.440594673156738, - "17": 13.690589904785156, - "18": 7.3728742599487305, - "19": 6.940519332885742, - "20": 4.886902809143066, - "21": 3.0086541175842285, - "22": 3.1613903045654297, - "23": 7.931731224060059, - "24": 2.958336591720581, - "25": 2.5366039276123047, - "26": 1.84986412525177, - "27": 1.9589768648147583, - "28": 14.245430946350098, - "29": 6.625199794769287, - "30": 33.479270935058594, - "31": 59.46356964111328, - "32": 113.09754180908203, - "33": 442.8728332519531, - "34": 4547.40234375, - "35": NaN, - "36": NaN, - "37": NaN, - "38": NaN, - "39": NaN, - "40": NaN, - "41": NaN, - "42": NaN, - "43": NaN, - "44": NaN, - "45": NaN, - "46": NaN, - "47": NaN, - "48": NaN, - "49": NaN, - "50": NaN, - "51": NaN, - "52": NaN, - "53": NaN - }, - "loss": { - "0": 4.531927108764648, - "1": 4.532190799713135, - "2": 3.771186351776123, - "3": 4.13394832611084, - "4": 4.27254581451416, - "5": 5.670802593231201, - "6": 4.482396602630615, - "7": 3.6988816261291504, - "8": 4.275219917297363, - "9": 5.822898864746094, - "10": 15.89161205291748, - "11": 17.373863220214844, - "12": 14.073184967041016, - "13": 14.631933212280273, - "14": 16.587650299072266, - "15": 15.770805358886719, - "16": 12.974701881408691, - "17": 13.431902885437012, - "18": 19.924354553222656, - "19": 15.68300724029541, - "20": 12.40634536743164, - "21": 8.940589904785156, - "22": 5.989598274230957, - "23": 6.098030090332031, - "24": 10.11153793334961, - "25": 7.325209140777588, - "26": 5.361544132232666, - "27": 3.7768332958221436, - "28": 10.208740234375, - "29": 8.026286125183105, - "30": 39.3283805847168, - "31": 122.84185791015625, - "32": 97.03656005859375, - "33": 1425.232666015625, - "34": 31620.880859375, - "35": 349572.59375, - "36": NaN, - "37": NaN, - "38": NaN, - "39": NaN, - "40": NaN, - "41": NaN, - "42": NaN, - "43": NaN, - "44": NaN, - "45": NaN, - "46": NaN, - "47": NaN, - "48": NaN, - "49": NaN, - "50": NaN, - "51": NaN, - "52": NaN, - "53": NaN - }, - "lr": { - "0": 1e-10, - "1": 0.020000000098, - "2": 0.040000000096, - "3": 0.060000000094, - "4": 0.08000000009199999, - "5": 0.10000000009, - "6": 0.12000000008799999, - "7": 0.140000000086, - "8": 0.160000000084, - "9": 0.180000000082, - "10": 0.20000000008000002, - "11": 0.220000000078, - "12": 0.240000000076, - "13": 0.260000000074, - "14": 0.280000000072, - "15": 0.30000000007, - "16": 0.320000000068, - "17": 0.34000000006599995, - "18": 0.360000000064, - "19": 0.380000000062, - "20": 0.40000000006, - "21": 0.420000000058, - "22": 0.440000000056, - "23": 0.46000000005400005, - "24": 0.480000000052, - "25": 0.5000000000499999, - "26": 0.520000000048, - "27": 0.540000000046, - "28": 0.560000000044, - "29": 0.5800000000419999, - "30": 0.60000000004, - "31": 0.620000000038, - "32": 0.640000000036, - "33": 0.660000000034, - "34": 0.6800000000319999, - "35": 0.7000000000300001, - "36": 0.720000000028, - "37": 0.740000000026, - "38": 0.760000000024, - "39": 0.780000000022, - "40": 0.80000000002, - "41": 0.820000000018, - "42": 0.840000000016, - "43": 0.860000000014, - "44": 0.880000000012, - "45": 0.9000000000099999, - "46": 0.9200000000080001, - "47": 0.940000000006, - "48": 0.960000000004, - "49": 0.9800000000019999, - "50": 1.0, - "51": 1.0, - "52": 1.0, - "53": 1.0 - } - }, - "train_epoch_time": 4.793226957321167, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 1, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "loss": { - "54": NaN, - "55": NaN, - "56": NaN, - "57": NaN, - "58": NaN, - "59": NaN, - "60": NaN, - "61": NaN, - "62": NaN, - "63": NaN, - "64": NaN, - "65": NaN, - "66": NaN, - "67": NaN, - "68": NaN, - "69": NaN, - "70": NaN, - "71": NaN, - "72": NaN, - "73": NaN, - "74": NaN, - "75": NaN, - "76": NaN, - "77": NaN, - "78": NaN, - "79": NaN, - "80": NaN, - "81": NaN, - "82": NaN, - "83": NaN, - "84": NaN, - "85": NaN, - "86": NaN, - "87": NaN, - "88": NaN, - "89": NaN, - "90": NaN, - "91": NaN, - "92": NaN, - "93": NaN, - "94": NaN, - "95": NaN, - "96": NaN, - "97": NaN, - "98": NaN, - "99": NaN, - "100": NaN, - "101": NaN, - "102": NaN, - "103": NaN, - "104": NaN, - "105": NaN, - "106": NaN, - "107": NaN - }, - "lr": { - "54": 1.0, - "55": 1.0, - "56": 1.0, - "57": 1.0, - "58": 1.0, - "59": 1.0, - "60": 1.0, - "61": 1.0, - "62": 1.0, - "63": 1.0, - "64": 1.0, - "65": 1.0, - "66": 1.0, - "67": 1.0, - "68": 1.0, - "69": 1.0, - "70": 1.0, - "71": 1.0, - "72": 1.0, - "73": 1.0, - "74": 1.0, - "75": 1.0, - "76": 1.0, - "77": 1.0, - "78": 1.0, - "79": 1.0, - "80": 1.0, - "81": 1.0, - "82": 1.0, - "83": 1.0, - "84": 1.0, - "85": 1.0, - "86": 1.0, - "87": 1.0, - "88": 1.0, - "89": 1.0, - "90": 1.0, - "91": 1.0, - "92": 1.0, - "93": 1.0, - "94": 1.0, - "95": 1.0, - "96": 1.0, - "97": 1.0, - "98": 1.0, - "99": 1.0, - "100": 1.0, - "101": 1.0, - "102": 1.0, - "103": 1.0, - "104": 1.0, - "105": 1.0, - "106": 1.0, - "107": 1.0 - } - }, - "train_epoch_time": 4.78930401802063, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 2, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "loss": { - "108": NaN, - "109": NaN, - "110": NaN, - "111": NaN, - "112": NaN, - "113": NaN, - "114": NaN, - "115": NaN, - "116": NaN, - "117": NaN, - "118": NaN, - "119": NaN, - "120": NaN, - "121": NaN, - "122": NaN, - "123": NaN, - "124": NaN, - "125": NaN, - "126": NaN, - "127": NaN, - "128": NaN, - "129": NaN, - "130": NaN, - "131": NaN, - "132": NaN, - "133": NaN, - "134": NaN, - "135": NaN, - "136": NaN, - "137": NaN, - "138": NaN, - "139": NaN, - "140": NaN, - "141": NaN, - "142": NaN, - "143": NaN, - "144": NaN, - "145": NaN, - "146": NaN, - "147": NaN, - "148": NaN, - "149": NaN, - "150": NaN, - "151": NaN, - "152": NaN, - "153": NaN, - "154": NaN, - "155": NaN, - "156": NaN, - "157": NaN, - "158": NaN, - "159": NaN, - "160": NaN, - "161": NaN - }, - "lr": { - "108": 1.0, - "109": 1.0, - "110": 1.0, - "111": 1.0, - "112": 1.0, - "113": 1.0, - "114": 1.0, - "115": 1.0, - "116": 1.0, - "117": 1.0, - "118": 1.0, - "119": 1.0, - "120": 1.0, - "121": 1.0, - "122": 1.0, - "123": 1.0, - "124": 1.0, - "125": 1.0, - "126": 1.0, - "127": 1.0, - "128": 1.0, - "129": 1.0, - "130": 1.0, - "131": 1.0, - "132": 1.0, - "133": 1.0, - "134": 1.0, - "135": 1.0, - "136": 1.0, - "137": 1.0, - "138": 1.0, - "139": 1.0, - "140": 1.0, - "141": 1.0, - "142": 1.0, - "143": 1.0, - "144": 1.0, - "145": 1.0, - "146": 1.0, - "147": 1.0, - "148": 1.0, - "149": 1.0, - "150": 1.0, - "151": 1.0, - "152": 1.0, - "153": 1.0, - "154": 1.0, - "155": 1.0, - "156": 1.0, - "157": 1.0, - "158": 1.0, - "159": 1.0, - "160": 1.0, - "161": 1.0 - } - }, - "train_epoch_time": 4.789414405822754, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 3, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "loss": { - "162": NaN, - "163": NaN, - "164": NaN, - "165": NaN, - "166": NaN, - "167": NaN, - "168": NaN, - "169": NaN, - "170": NaN, - "171": NaN, - "172": NaN, - "173": NaN, - "174": NaN, - "175": NaN, - "176": NaN, - "177": NaN, - "178": NaN, - "179": NaN, - "180": NaN, - "181": NaN, - "182": NaN, - "183": NaN, - "184": NaN, - "185": NaN, - "186": NaN, - "187": NaN, - "188": NaN, - "189": NaN, - "190": NaN, - "191": NaN, - "192": NaN, - "193": NaN, - "194": NaN, - "195": NaN, - "196": NaN, - "197": NaN, - "198": NaN, - "199": NaN, - "200": NaN, - "201": NaN, - "202": NaN, - "203": NaN, - "204": NaN, - "205": NaN, - "206": NaN, - "207": NaN, - "208": NaN, - "209": NaN, - "210": NaN, - "211": NaN, - "212": NaN, - "213": NaN, - "214": NaN, - "215": NaN - }, - "lr": { - "162": 1.0, - "163": 1.0, - "164": 1.0, - "165": 1.0, - "166": 1.0, - "167": 1.0, - "168": 1.0, - "169": 1.0, - "170": 1.0, - "171": 1.0, - "172": 1.0, - "173": 1.0, - "174": 1.0, - "175": 1.0, - "176": 1.0, - "177": 1.0, - "178": 1.0, - "179": 1.0, - "180": 1.0, - "181": 1.0, - "182": 1.0, - "183": 1.0, - "184": 1.0, - "185": 1.0, - "186": 1.0, - "187": 1.0, - "188": 1.0, - "189": 1.0, - "190": 1.0, - "191": 1.0, - "192": 1.0, - "193": 1.0, - "194": 1.0, - "195": 1.0, - "196": 1.0, - "197": 1.0, - "198": 1.0, - "199": 1.0, - "200": 1.0, - "201": 1.0, - "202": 1.0, - "203": 1.0, - "204": 1.0, - "205": 1.0, - "206": 1.0, - "207": 1.0, - "208": 1.0, - "209": 1.0, - "210": 1.0, - "211": 1.0, - "212": 1.0, - "213": 1.0, - "214": 1.0, - "215": 1.0 - } - }, - "train_epoch_time": 4.789056062698364, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 4, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "loss": { - "216": NaN, - "217": NaN, - "218": NaN, - "219": NaN, - "220": NaN, - "221": NaN, - "222": NaN, - "223": NaN, - "224": NaN, - "225": NaN, - "226": NaN, - "227": NaN, - "228": NaN, - "229": NaN, - "230": NaN, - "231": NaN, - "232": NaN, - "233": NaN, - "234": NaN, - "235": NaN, - "236": NaN, - "237": NaN, - "238": NaN, - "239": NaN, - "240": NaN, - "241": NaN, - "242": NaN, - "243": NaN, - "244": NaN, - "245": NaN, - "246": NaN, - "247": NaN, - "248": NaN, - "249": NaN, - "250": NaN, - "251": NaN, - "252": NaN, - "253": NaN, - "254": NaN, - "255": NaN, - "256": NaN, - "257": NaN, - "258": NaN, - "259": NaN, - "260": NaN, - "261": NaN, - "262": NaN, - "263": NaN, - "264": NaN, - "265": NaN, - "266": NaN, - "267": NaN, - "268": NaN, - "269": NaN - }, - "lr": { - "216": 1.0, - "217": 1.0, - "218": 1.0, - "219": 1.0, - "220": 1.0, - "221": 1.0, - "222": 1.0, - "223": 1.0, - "224": 1.0, - "225": 1.0, - "226": 1.0, - "227": 1.0, - "228": 1.0, - "229": 1.0, - "230": 1.0, - "231": 1.0, - "232": 1.0, - "233": 1.0, - "234": 1.0, - "235": 1.0, - "236": 1.0, - "237": 1.0, - "238": 1.0, - "239": 1.0, - "240": 1.0, - "241": 1.0, - "242": 1.0, - "243": 1.0, - "244": 1.0, - "245": 1.0, - "246": 1.0, - "247": 1.0, - "248": 1.0, - "249": 1.0, - "250": 1.0, - "251": 1.0, - "252": 1.0, - "253": 1.0, - "254": 1.0, - "255": 1.0, - "256": 1.0, - "257": 1.0, - "258": 1.0, - "259": 1.0, - "260": 1.0, - "261": 1.0, - "262": 1.0, - "263": 1.0, - "264": 1.0, - "265": 1.0, - "266": 1.0, - "267": 1.0, - "268": 1.0, - "269": 1.0 - } - }, - "train_epoch_time": 4.789112329483032, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 5, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "loss": { - "270": NaN, - "271": NaN, - "272": NaN, - "273": NaN, - "274": NaN, - "275": NaN, - "276": NaN, - "277": NaN, - "278": NaN, - "279": NaN, - "280": NaN, - "281": NaN, - "282": NaN, - "283": NaN, - "284": NaN, - "285": NaN, - "286": NaN, - "287": NaN, - "288": NaN, - "289": NaN, - "290": NaN, - "291": NaN, - "292": NaN, - "293": NaN, - "294": NaN, - "295": NaN, - "296": NaN, - "297": NaN, - "298": NaN, - "299": NaN, - "300": NaN, - "301": NaN, - "302": NaN, - "303": NaN, - "304": NaN, - "305": NaN, - "306": NaN, - "307": NaN, - "308": NaN, - "309": NaN, - "310": NaN, - "311": NaN, - "312": NaN, - "313": NaN, - "314": NaN, - "315": NaN, - "316": NaN, - "317": NaN, - "318": NaN, - "319": NaN, - "320": NaN, - "321": NaN, - "322": NaN, - "323": NaN - }, - "lr": { - "270": 1.0, - "271": 1.0, - "272": 1.0, - "273": 1.0, - "274": 1.0, - "275": 1.0, - "276": 1.0, - "277": 1.0, - "278": 1.0, - "279": 1.0, - "280": 1.0, - "281": 1.0, - "282": 1.0, - "283": 1.0, - "284": 1.0, - "285": 1.0, - "286": 1.0, - "287": 1.0, - "288": 1.0, - "289": 1.0, - "290": 1.0, - "291": 1.0, - "292": 1.0, - "293": 1.0, - "294": 1.0, - "295": 1.0, - "296": 1.0, - "297": 1.0, - "298": 1.0, - "299": 1.0, - "300": 1.0, - "301": 1.0, - "302": 1.0, - "303": 1.0, - "304": 1.0, - "305": 1.0, - "306": 1.0, - "307": 1.0, - "308": 1.0, - "309": 1.0, - "310": 1.0, - "311": 1.0, - "312": 1.0, - "313": 1.0, - "314": 1.0, - "315": 1.0, - "316": 1.0, - "317": 1.0, - "318": 1.0, - "319": 1.0, - "320": 1.0, - "321": 1.0, - "322": 1.0, - "323": 1.0 - } - }, - "train_epoch_time": 4.788971662521362, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 6, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "loss": { - "324": NaN, - "325": NaN, - "326": NaN, - "327": NaN, - "328": NaN, - "329": NaN, - "330": NaN, - "331": NaN, - "332": NaN, - "333": NaN, - "334": NaN, - "335": NaN, - "336": NaN, - "337": NaN, - "338": NaN, - "339": NaN, - "340": NaN, - "341": NaN, - "342": NaN, - "343": NaN, - "344": NaN, - "345": NaN, - "346": NaN, - "347": NaN, - "348": NaN, - "349": NaN, - "350": NaN, - "351": NaN, - "352": NaN, - "353": NaN, - "354": NaN, - "355": NaN, - "356": NaN, - "357": NaN, - "358": NaN, - "359": NaN, - "360": NaN, - "361": NaN, - "362": NaN, - "363": NaN, - "364": NaN, - "365": NaN, - "366": NaN, - "367": NaN, - "368": NaN, - "369": NaN, - "370": NaN, - "371": NaN, - "372": NaN, - "373": NaN, - "374": NaN, - "375": NaN, - "376": NaN, - "377": NaN - }, - "lr": { - "324": 1.0, - "325": 1.0, - "326": 1.0, - "327": 1.0, - "328": 1.0, - "329": 1.0, - "330": 1.0, - "331": 1.0, - "332": 1.0, - "333": 1.0, - "334": 1.0, - "335": 1.0, - "336": 1.0, - "337": 1.0, - "338": 1.0, - "339": 1.0, - "340": 1.0, - "341": 1.0, - "342": 1.0, - "343": 1.0, - "344": 1.0, - "345": 1.0, - "346": 1.0, - "347": 1.0, - "348": 1.0, - "349": 1.0, - "350": 1.0, - "351": 1.0, - "352": 1.0, - "353": 1.0, - "354": 1.0, - "355": 1.0, - "356": 1.0, - "357": 1.0, - "358": 1.0, - "359": 1.0, - "360": 1.0, - "361": 1.0, - "362": 1.0, - "363": 1.0, - "364": 1.0, - "365": 1.0, - "366": 1.0, - "367": 1.0, - "368": 1.0, - "369": 1.0, - "370": 1.0, - "371": 1.0, - "372": 1.0, - "373": 1.0, - "374": 1.0, - "375": 1.0, - "376": 1.0, - "377": 1.0 - } - }, - "train_epoch_time": 4.789371013641357, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 7, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "loss": { - "378": NaN, - "379": NaN, - "380": NaN, - "381": NaN, - "382": NaN, - "383": NaN, - "384": NaN, - "385": NaN, - "386": NaN, - "387": NaN, - "388": NaN, - "389": NaN, - "390": NaN, - "391": NaN, - "392": NaN, - "393": NaN, - "394": NaN, - "395": NaN, - "396": NaN, - "397": NaN, - "398": NaN, - "399": NaN, - "400": NaN, - "401": NaN, - "402": NaN, - "403": NaN, - "404": NaN, - "405": NaN, - "406": NaN, - "407": NaN, - "408": NaN, - "409": NaN, - "410": NaN, - "411": NaN, - "412": NaN, - "413": NaN, - "414": NaN, - "415": NaN, - "416": NaN, - "417": NaN, - "418": NaN, - "419": NaN, - "420": NaN, - "421": NaN, - "422": NaN, - "423": NaN, - "424": NaN, - "425": NaN, - "426": NaN, - "427": NaN, - "428": NaN, - "429": NaN, - "430": NaN, - "431": NaN - }, - "lr": { - "378": 1.0, - "379": 1.0, - "380": 1.0, - "381": 1.0, - "382": 1.0, - "383": 1.0, - "384": 1.0, - "385": 1.0, - "386": 1.0, - "387": 1.0, - "388": 1.0, - "389": 1.0, - "390": 1.0, - "391": 1.0, - "392": 1.0, - "393": 1.0, - "394": 1.0, - "395": 1.0, - "396": 1.0, - "397": 1.0, - "398": 1.0, - "399": 1.0, - "400": 1.0, - "401": 1.0, - "402": 1.0, - "403": 1.0, - "404": 1.0, - "405": 1.0, - "406": 1.0, - "407": 1.0, - "408": 1.0, - "409": 1.0, - "410": 1.0, - "411": 1.0, - "412": 1.0, - "413": 1.0, - "414": 1.0, - "415": 1.0, - "416": 1.0, - "417": 1.0, - "418": 1.0, - "419": 1.0, - "420": 1.0, - "421": 1.0, - "422": 1.0, - "423": 1.0, - "424": 1.0, - "425": 1.0, - "426": 1.0, - "427": 1.0, - "428": 1.0, - "429": 1.0, - "430": 1.0, - "431": 1.0 - } - }, - "train_epoch_time": 4.788316488265991, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 8, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "loss": { - "432": NaN, - "433": NaN, - "434": NaN, - "435": NaN, - "436": NaN, - "437": NaN, - "438": NaN, - "439": NaN, - "440": NaN, - "441": NaN, - "442": NaN, - "443": NaN, - "444": NaN, - "445": NaN, - "446": NaN, - "447": NaN, - "448": NaN, - "449": NaN, - "450": NaN, - "451": NaN, - "452": NaN, - "453": NaN, - "454": NaN, - "455": NaN, - "456": NaN, - "457": NaN, - "458": NaN, - "459": NaN, - "460": NaN, - "461": NaN, - "462": NaN, - "463": NaN, - "464": NaN, - "465": NaN, - "466": NaN, - "467": NaN, - "468": NaN, - "469": NaN, - "470": NaN, - "471": NaN, - "472": NaN, - "473": NaN, - "474": NaN, - "475": NaN, - "476": NaN, - "477": NaN, - "478": NaN, - "479": NaN, - "480": NaN, - "481": NaN, - "482": NaN, - "483": NaN, - "484": NaN, - "485": NaN - }, - "lr": { - "432": 1.0, - "433": 1.0, - "434": 1.0, - "435": 1.0, - "436": 1.0, - "437": 1.0, - "438": 1.0, - "439": 1.0, - "440": 1.0, - "441": 1.0, - "442": 1.0, - "443": 1.0, - "444": 1.0, - "445": 1.0, - "446": 1.0, - "447": 1.0, - "448": 1.0, - "449": 1.0, - "450": 1.0, - "451": 1.0, - "452": 1.0, - "453": 1.0, - "454": 1.0, - "455": 1.0, - "456": 1.0, - "457": 1.0, - "458": 1.0, - "459": 1.0, - "460": 1.0, - "461": 1.0, - "462": 1.0, - "463": 1.0, - "464": 1.0, - "465": 1.0, - "466": 1.0, - "467": 1.0, - "468": 1.0, - "469": 1.0, - "470": 1.0, - "471": 1.0, - "472": 1.0, - "473": 1.0, - "474": 1.0, - "475": 1.0, - "476": 1.0, - "477": 1.0, - "478": 1.0, - "479": 1.0, - "480": 1.0, - "481": 1.0, - "482": 1.0, - "483": 1.0, - "484": 1.0, - "485": 1.0 - } - }, - "train_epoch_time": 4.789828300476074, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 9, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "loss": { - "486": NaN, - "487": NaN, - "488": NaN, - "489": NaN, - "490": NaN, - "491": NaN, - "492": NaN, - "493": NaN, - "494": NaN, - "495": NaN, - "496": NaN, - "497": NaN, - "498": NaN, - "499": NaN, - "500": NaN, - "501": NaN, - "502": NaN, - "503": NaN, - "504": NaN, - "505": NaN, - "506": NaN, - "507": NaN, - "508": NaN, - "509": NaN, - "510": NaN, - "511": NaN, - "512": NaN, - "513": NaN, - "514": NaN, - "515": NaN, - "516": NaN, - "517": NaN, - "518": NaN, - "519": NaN, - "520": NaN, - "521": NaN, - "522": NaN, - "523": NaN, - "524": NaN, - "525": NaN, - "526": NaN, - "527": NaN, - "528": NaN, - "529": NaN, - "530": NaN, - "531": NaN, - "532": NaN, - "533": NaN, - "534": NaN, - "535": NaN, - "536": NaN, - "537": NaN, - "538": NaN, - "539": NaN - }, - "lr": { - "486": 1.0, - "487": 1.0, - "488": 1.0, - "489": 1.0, - "490": 1.0, - "491": 1.0, - "492": 1.0, - "493": 1.0, - "494": 1.0, - "495": 1.0, - "496": 1.0, - "497": 1.0, - "498": 1.0, - "499": 1.0, - "500": 1.0, - "501": 1.0, - "502": 1.0, - "503": 1.0, - "504": 1.0, - "505": 1.0, - "506": 1.0, - "507": 1.0, - "508": 1.0, - "509": 1.0, - "510": 1.0, - "511": 1.0, - "512": 1.0, - "513": 1.0, - "514": 1.0, - "515": 1.0, - "516": 1.0, - "517": 1.0, - "518": 1.0, - "519": 1.0, - "520": 1.0, - "521": 1.0, - "522": 1.0, - "523": 1.0, - "524": 1.0, - "525": 1.0, - "526": 1.0, - "527": 1.0, - "528": 1.0, - "529": 1.0, - "530": 1.0, - "531": 1.0, - "532": 1.0, - "533": 1.0, - "534": 1.0, - "535": 1.0, - "536": 1.0, - "537": 1.0, - "538": 1.0, - "539": 1.0 - } - }, - "train_epoch_time": 4.788756370544434, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 10, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "loss": { - "540": NaN, - "541": NaN, - "542": NaN, - "543": NaN, - "544": NaN, - "545": NaN, - "546": NaN, - "547": NaN, - "548": NaN, - "549": NaN, - "550": NaN, - "551": NaN, - "552": NaN, - "553": NaN, - "554": NaN, - "555": NaN, - "556": NaN, - "557": NaN, - "558": NaN, - "559": NaN, - "560": NaN, - "561": NaN, - "562": NaN, - "563": NaN, - "564": NaN, - "565": NaN, - "566": NaN, - "567": NaN, - "568": NaN, - "569": NaN, - "570": NaN, - "571": NaN, - "572": NaN, - "573": NaN, - "574": NaN, - "575": NaN, - "576": NaN, - "577": NaN, - "578": NaN, - "579": NaN, - "580": NaN, - "581": NaN, - "582": NaN, - "583": NaN, - "584": NaN, - "585": NaN, - "586": NaN, - "587": NaN, - "588": NaN, - "589": NaN, - "590": NaN, - "591": NaN, - "592": NaN, - "593": NaN - }, - "lr": { - "540": 1.0, - "541": 1.0, - "542": 1.0, - "543": 1.0, - "544": 1.0, - "545": 1.0, - "546": 1.0, - "547": 1.0, - "548": 1.0, - "549": 1.0, - "550": 1.0, - "551": 1.0, - "552": 1.0, - "553": 1.0, - "554": 1.0, - "555": 1.0, - "556": 1.0, - "557": 1.0, - "558": 1.0, - "559": 1.0, - "560": 1.0, - "561": 1.0, - "562": 1.0, - "563": 1.0, - "564": 1.0, - "565": 1.0, - "566": 1.0, - "567": 1.0, - "568": 1.0, - "569": 1.0, - "570": 1.0, - "571": 1.0, - "572": 1.0, - "573": 1.0, - "574": 1.0, - "575": 1.0, - "576": 1.0, - "577": 1.0, - "578": 1.0, - "579": 1.0, - "580": 1.0, - "581": 1.0, - "582": 1.0, - "583": 1.0, - "584": 1.0, - "585": 1.0, - "586": 1.0, - "587": 1.0, - "588": 1.0, - "589": 1.0, - "590": 1.0, - "591": 1.0, - "592": 1.0, - "593": 1.0 - } - }, - "train_epoch_time": 4.7892677783966064, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 11, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "loss": { - "594": NaN, - "595": NaN, - "596": NaN, - "597": NaN, - "598": NaN, - "599": NaN, - "600": NaN, - "601": NaN, - "602": NaN, - "603": NaN, - "604": NaN, - "605": NaN, - "606": NaN, - "607": NaN, - "608": NaN, - "609": NaN, - "610": NaN, - "611": NaN, - "612": NaN, - "613": NaN, - "614": NaN, - "615": NaN, - "616": NaN, - "617": NaN, - "618": NaN, - "619": NaN, - "620": NaN, - "621": NaN, - "622": NaN, - "623": NaN, - "624": NaN, - "625": NaN, - "626": NaN, - "627": NaN, - "628": NaN, - "629": NaN, - "630": NaN, - "631": NaN, - "632": NaN, - "633": NaN, - "634": NaN, - "635": NaN, - "636": NaN, - "637": NaN, - "638": NaN, - "639": NaN, - "640": NaN, - "641": NaN, - "642": NaN, - "643": NaN, - "644": NaN, - "645": NaN, - "646": NaN, - "647": NaN - }, - "lr": { - "594": 1.0, - "595": 1.0, - "596": 1.0, - "597": 1.0, - "598": 1.0, - "599": 1.0, - "600": 1.0, - "601": 1.0, - "602": 1.0, - "603": 1.0, - "604": 1.0, - "605": 1.0, - "606": 1.0, - "607": 1.0, - "608": 1.0, - "609": 1.0, - "610": 1.0, - "611": 1.0, - "612": 1.0, - "613": 1.0, - "614": 1.0, - "615": 1.0, - "616": 1.0, - "617": 1.0, - "618": 1.0, - "619": 1.0, - "620": 1.0, - "621": 1.0, - "622": 1.0, - "623": 1.0, - "624": 1.0, - "625": 1.0, - "626": 1.0, - "627": 1.0, - "628": 1.0, - "629": 1.0, - "630": 1.0, - "631": 1.0, - "632": 1.0, - "633": 1.0, - "634": 1.0, - "635": 1.0, - "636": 1.0, - "637": 1.0, - "638": 1.0, - "639": 1.0, - "640": 1.0, - "641": 1.0, - "642": 1.0, - "643": 1.0, - "644": 1.0, - "645": 1.0, - "646": 1.0, - "647": 1.0 - } - }, - "train_epoch_time": 4.789634943008423, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 12, - "grad_norm": NaN, - "learning_rate": 1.0, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "loss": { - "648": NaN, - "649": NaN, - "650": NaN, - "651": NaN, - "652": NaN, - "653": NaN, - "654": NaN, - "655": NaN, - "656": NaN, - "657": NaN, - "658": NaN, - "659": NaN, - "660": NaN, - "661": NaN, - "662": NaN, - "663": NaN, - "664": NaN, - "665": NaN, - "666": NaN, - "667": NaN, - "668": NaN, - "669": NaN, - "670": NaN, - "671": NaN, - "672": NaN, - "673": NaN, - "674": NaN, - "675": NaN, - "676": NaN, - "677": NaN, - "678": NaN, - "679": NaN, - "680": NaN, - "681": NaN, - "682": NaN, - "683": NaN, - "684": NaN, - "685": NaN, - "686": NaN, - "687": NaN, - "688": NaN, - "689": NaN, - "690": NaN, - "691": NaN, - "692": NaN, - "693": NaN, - "694": NaN, - "695": NaN, - "696": NaN, - "697": NaN, - "698": NaN, - "699": NaN, - "700": NaN, - "701": NaN - }, - "lr": { - "648": 1.0, - "649": 0.9938271604938271, - "650": 0.9876543209876543, - "651": 0.9814814814814815, - "652": 0.9753086419753086, - "653": 0.9691358024691358, - "654": 0.962962962962963, - "655": 0.9567901234567902, - "656": 0.9506172839506173, - "657": 0.9444444444444444, - "658": 0.9382716049382716, - "659": 0.9320987654320988, - "660": 0.9259259259259259, - "661": 0.9197530864197531, - "662": 0.9135802469135803, - "663": 0.9074074074074074, - "664": 0.9012345679012346, - "665": 0.8950617283950617, - "666": 0.8888888888888888, - "667": 0.8827160493827161, - "668": 0.8765432098765432, - "669": 0.8703703703703703, - "670": 0.8641975308641976, - "671": 0.8580246913580247, - "672": 0.8518518518518519, - "673": 0.845679012345679, - "674": 0.8395061728395061, - "675": 0.8333333333333334, - "676": 0.8271604938271605, - "677": 0.8209876543209876, - "678": 0.8148148148148149, - "679": 0.808641975308642, - "680": 0.8024691358024691, - "681": 0.7962962962962963, - "682": 0.7901234567901234, - "683": 0.7839506172839507, - "684": 0.7777777777777778, - "685": 0.7716049382716049, - "686": 0.7654320987654322, - "687": 0.7592592592592593, - "688": 0.7530864197530864, - "689": 0.7469135802469136, - "690": 0.7407407407407407, - "691": 0.7345679012345678, - "692": 0.7283950617283951, - "693": 0.7222222222222222, - "694": 0.7160493827160495, - "695": 0.7098765432098766, - "696": 0.7037037037037037, - "697": 0.6975308641975309, - "698": 0.691358024691358, - "699": 0.6851851851851851, - "700": 0.6790123456790124, - "701": 0.6728395061728395 - } - }, - "train_epoch_time": 4.789050579071045, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 13, - "grad_norm": NaN, - "learning_rate": 0.6666666666666667, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "loss": { - "702": NaN, - "703": NaN, - "704": NaN, - "705": NaN, - "706": NaN, - "707": NaN, - "708": NaN, - "709": NaN, - "710": NaN, - "711": NaN, - "712": NaN, - "713": NaN, - "714": NaN, - "715": NaN, - "716": NaN, - "717": NaN, - "718": NaN, - "719": NaN, - "720": NaN, - "721": NaN, - "722": NaN, - "723": NaN, - "724": NaN, - "725": NaN, - "726": NaN, - "727": NaN, - "728": NaN, - "729": NaN, - "730": NaN, - "731": NaN, - "732": NaN, - "733": NaN, - "734": NaN, - "735": NaN, - "736": NaN, - "737": NaN, - "738": NaN, - "739": NaN, - "740": NaN, - "741": NaN, - "742": NaN, - "743": NaN, - "744": NaN, - "745": NaN, - "746": NaN, - "747": NaN, - "748": NaN, - "749": NaN, - "750": NaN, - "751": NaN, - "752": NaN, - "753": NaN, - "754": NaN, - "755": NaN - }, - "lr": { - "702": 0.6666666666666667, - "703": 0.6604938271604939, - "704": 0.654320987654321, - "705": 0.6481481481481481, - "706": 0.6419753086419753, - "707": 0.6358024691358024, - "708": 0.6296296296296297, - "709": 0.6234567901234568, - "710": 0.617283950617284, - "711": 0.6111111111111112, - "712": 0.6049382716049383, - "713": 0.5987654320987654, - "714": 0.5925925925925926, - "715": 0.5864197530864197, - "716": 0.5802469135802469, - "717": 0.5740740740740741, - "718": 0.5679012345679013, - "719": 0.5617283950617284, - "720": 0.5555555555555556, - "721": 0.5493827160493827, - "722": 0.5432098765432098, - "723": 0.537037037037037, - "724": 0.5308641975308642, - "725": 0.5246913580246914, - "726": 0.5185185185185186, - "727": 0.5123456790123457, - "728": 0.5061728395061729, - "729": 0.5, - "730": 0.49382716049382713, - "731": 0.48765432098765427, - "732": 0.4814814814814815, - "733": 0.47530864197530864, - "734": 0.4691358024691358, - "735": 0.4629629629629629, - "736": 0.45679012345679015, - "737": 0.4506172839506173, - "738": 0.4444444444444444, - "739": 0.43827160493827155, - "740": 0.4320987654320988, - "741": 0.42592592592592593, - "742": 0.41975308641975306, - "743": 0.4135802469135802, - "744": 0.40740740740740744, - "745": 0.4012345679012346, - "746": 0.3950617283950617, - "747": 0.38888888888888884, - "748": 0.3827160493827161, - "749": 0.3765432098765432, - "750": 0.37037037037037035, - "751": 0.3641975308641975, - "752": 0.3580246913580247, - "753": 0.35185185185185186, - "754": 0.345679012345679, - "755": 0.3395061728395061 - } - }, - "train_epoch_time": 4.789714097976685, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - }, - { - "epoch": 14, - "grad_norm": NaN, - "learning_rate": 0.33333333333333337, - "model_norm": NaN, - "step_logs": { - "grad_norm": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "loss": { - "756": NaN, - "757": NaN, - "758": NaN, - "759": NaN, - "760": NaN, - "761": NaN, - "762": NaN, - "763": NaN, - "764": NaN, - "765": NaN, - "766": NaN, - "767": NaN, - "768": NaN, - "769": NaN, - "770": NaN, - "771": NaN, - "772": NaN, - "773": NaN, - "774": NaN, - "775": NaN, - "776": NaN, - "777": NaN, - "778": NaN, - "779": NaN, - "780": NaN, - "781": NaN, - "782": NaN, - "783": NaN, - "784": NaN, - "785": NaN, - "786": NaN, - "787": NaN, - "788": NaN, - "789": NaN, - "790": NaN, - "791": NaN, - "792": NaN, - "793": NaN, - "794": NaN, - "795": NaN, - "796": NaN, - "797": NaN, - "798": NaN, - "799": NaN, - "800": NaN, - "801": NaN, - "802": NaN, - "803": NaN, - "804": NaN, - "805": NaN, - "806": NaN, - "807": NaN, - "808": NaN, - "809": NaN - }, - "lr": { - "756": 0.33333333333333337, - "757": 0.3271604938271605, - "758": 0.32098765432098764, - "759": 0.31481481481481477, - "760": 0.308641975308642, - "761": 0.30246913580246915, - "762": 0.2962962962962963, - "763": 0.2901234567901234, - "764": 0.28395061728395066, - "765": 0.2777777777777778, - "766": 0.2716049382716049, - "767": 0.26543209876543206, - "768": 0.2592592592592593, - "769": 0.25308641975308643, - "770": 0.24691358024691357, - "771": 0.2407407407407407, - "772": 0.23456790123456794, - "773": 0.22839506172839508, - "774": 0.2222222222222222, - "775": 0.21604938271604934, - "776": 0.2098765432098766, - "777": 0.20370370370370372, - "778": 0.19753086419753085, - "779": 0.191358024691358, - "780": 0.18518518518518523, - "781": 0.17901234567901236, - "782": 0.1728395061728395, - "783": 0.16666666666666663, - "784": 0.16049382716049387, - "785": 0.154320987654321, - "786": 0.14814814814814814, - "787": 0.14197530864197527, - "788": 0.13580246913580252, - "789": 0.12962962962962965, - "790": 0.12345679012345678, - "791": 0.11728395061728392, - "792": 0.11111111111111116, - "793": 0.1049382716049383, - "794": 0.09876543209876543, - "795": 0.09259259259259256, - "796": 0.0864197530864198, - "797": 0.08024691358024694, - "798": 0.07407407407407407, - "799": 0.0679012345679012, - "800": 0.06172839506172845, - "801": 0.05555555555555558, - "802": 0.04938271604938271, - "803": 0.043209876543209846, - "804": 0.03703703703703709, - "805": 0.030864197530864224, - "806": 0.024691358024691357, - "807": 0.01851851851851849, - "808": 0.012345679012345734, - "809": 0.006172839506172867 - } - }, - "train_epoch_time": 4.789852619171143, - "train_loss": NaN, - "train_score": 0.03529972203043817, - "val_loss": NaN, - "val_score": 0.038120694663781936 - } - ], - "summary": { - "data_parallel": "false", - "end_time": "2025-12-01 18:41:38.043501", - "final_model_norm": NaN, - "init_model_norm": 87.41546630859375, - "input_dim": [ - 256 - ], - "num_batches_per_epoch": 54, - "num_workers": 0, - "output_dim": [ - 256 - ], - "start_time": "2025-12-01 18:39:57.297166", - "step_scheduler_on_epoch": false - } - } -] \ No newline at end of file