diff --git a/src/cnlpt/CnlpModelForClassification.py b/src/cnlpt/CnlpModelForClassification.py
index 30fdde88..30d0caf1 100644
--- a/src/cnlpt/CnlpModelForClassification.py
+++ b/src/cnlpt/CnlpModelForClassification.py
@@ -287,7 +287,7 @@ def __init__(
         self.encoder = encoder_model.from_pretrained(config.encoder_name)
         # part of the motivation for leaving this
         # logic alone for character level models is that
-        # at the time of writing,  CANINE and Flair are the only game in town.
+        # at the time of writing,  CANINE and Flair are the only game in town. 
         # CANINE's hashable embeddings for unicode codepoints allows for
         # additional parameterization, which rn doesn't seem so relevant
         if not config.character_level:
@@ -329,12 +329,12 @@ def __init__(
                 head_size=config.rel_attention_head_dims,
             )
             if config.relations[task_name]:
-                hidden_size = config.num_rel_attention_heads
-                if config.use_prior_tasks:
-                    hidden_size += total_prev_task_labels
+                # hidden_size = config.num_rel_attention_heads
+                # if config.use_prior_tasks:
+                #     hidden_size += total_prev_task_labels
 
                 self.classifiers[task_name] = ClassificationHead(
-                    config, task_num_labels, hidden_size=hidden_size
+                    config, task_num_labels,
                 )
             else:
                 self.classifiers[task_name] = ClassificationHead(
@@ -491,6 +491,30 @@ def compute_loss(
             )
             state["loss"] += task_weight * task_loss
 
+    def remove_task_classifiers(self, tasks: list[str] = None):
+        if tasks is None:
+            self.classifiers = nn.ModuleDict()
+            self.tasks = []
+            self.class_weights = {}
+        else:
+            for task in tasks:
+                self.classifiers.pop(task)
+                self.tasks.remove(task)
+                self.class_weights.pop(task)
+
+    def add_task_classifier(self, task_name: str, label_dictionary: dict[str, list]):
+        self.tasks.append(task_name)
+        self.classifiers[task_name] = ClassificationHead(
+            self.config, len(label_dictionary)
+        )
+        self.label_dictionary[task_name] = label_dictionary
+    
+    def set_class_weights(self, class_weights: Union[list[float], None] = None):
+        if class_weights is None:
+            self.class_weights = {x: None for x in self.label_dictionary.keys()}
+        else:
+            self.class_weights = class_weights
+
     def forward(
         self,
         input_ids=None,
@@ -531,7 +555,6 @@ def forward(
 
         Returns: (`transformers.SequenceClassifierOutput`) the output of the model
         """
-
         kwargs = generalize_encoder_forward_kwargs(
             self.encoder,
             attention_mask=attention_mask,
diff --git a/src/cnlpt/cnlp_args.py b/src/cnlpt/cnlp_args.py
index d3d45c4a..d07d0c59 100644
--- a/src/cnlpt/cnlp_args.py
+++ b/src/cnlpt/cnlp_args.py
@@ -298,18 +298,18 @@ class DaptArguments:
             "help": "Pretrained tokenizer name or path if not the same as model_name"
         },
     )
-    output_dir: Union[str, None] = field(
-        default=None, metadata={"help": "Directory path to write trained model to."}
-    )
-    overwrite_output_dir: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "Overwrite the content of the output directory. "
-                "Use this to continue training if output_dir points to a checkpoint directory."
-            )
-        },
-    )
+    # output_dir: Union[str, None] = field(
+    #     default=None, metadata={"help": "Directory path to write trained model to."}
+    # )
+    # overwrite_output_dir: bool = field(
+    #     default=False,
+    #     metadata={
+    #         "help": (
+    #             "Overwrite the content of the output directory. "
+    #             "Use this to continue training if output_dir points to a checkpoint directory."
+    #         )
+    #     },
+    # )
     data_dir: Union[str, None] = field(
         default=None, metadata={"help": "The data dir for domain-adaptive pretraining."}
     )
@@ -333,12 +333,12 @@ class DaptArguments:
         default=0.2,
         metadata={"help": "The test split proportion for domain-adaptive pretraining."},
     )
-    seed: int = field(
-        default=42,
-        metadata={
-            "help": "The random seed to use for a train/test split for domain-adaptive pretraining (requires --dapt-encoder)."
-        },
-    )
+    # seed: int = field(
+    #     default=42,
+    #     metadata={
+    #         "help": "The random seed to use for a train/test split for domain-adaptive pretraining (requires --dapt-encoder)."
+    #     },
+    # )
     no_eval: bool = field(
         default=False,
         metadata={"help": "Don't split into train and test; just pretrain."},
diff --git a/src/cnlpt/cnlp_data.py b/src/cnlpt/cnlp_data.py
index ba9090e8..12960788 100644
--- a/src/cnlpt/cnlp_data.py
+++ b/src/cnlpt/cnlp_data.py
@@ -1155,10 +1155,9 @@ def __init__(
             batched=True,
             remove_columns=list(remove_columns),
         )
-        dataset = dataset.map(
-            functools.partial(group_texts, self.args.chunk_size),
-            batched=True,
-        )
+
+        dataset = dataset.remove_columns("word_ids")
+      
 
         if isinstance(dataset, (DatasetDict, IterableDatasetDict)) or args.no_eval:
             self.dataset = dataset
diff --git a/src/cnlpt/cnlp_processors.py b/src/cnlpt/cnlp_processors.py
index db496505..997a120d 100644
--- a/src/cnlpt/cnlp_processors.py
+++ b/src/cnlpt/cnlp_processors.py
@@ -171,7 +171,7 @@ def __init__(self, data_dir: str, tasks: set[str] = None, max_train_items=-1):
             else:
                 sep = "\t"
 
-            self.dataset = load_dataset("csv", sep=sep, data_files=data_files)
+            self.dataset = load_dataset("csv", sep=sep, data_files=data_files, keep_default_na=False)
 
             ## find out what tasks are available to this dataset, and see the overlap with what the
             ## user specified at the cli, remove those tasks so we don't also get them from other datasets
diff --git a/src/cnlpt/dapt.py b/src/cnlpt/dapt.py
index 4463b9b8..09c9afaa 100644
--- a/src/cnlpt/dapt.py
+++ b/src/cnlpt/dapt.py
@@ -8,6 +8,7 @@
 from typing import Any, Union
 
 from transformers import (
+    AutoConfig,
     AutoModelForMaskedLM,
     AutoTokenizer,
     HfArgumentParser,
@@ -16,12 +17,68 @@
     set_seed,
 )
 
-from .cnlp_args import DaptArguments
+from torch.nn import CrossEntropyLoss
+from transformers.modeling_outputs import MaskedLMOutput
+from transformers.modeling_utils import PreTrainedModel
+
+from .CnlpModelForClassification import CnlpConfig, freeze_encoder_weights, generalize_encoder_forward_kwargs
+from .cnlp_args import DaptArguments, CnlpTrainingArguments
 from .cnlp_data import DaptDataset
 
 logger = logging.getLogger(__name__)
 
 
+class DaptModel(PreTrainedModel):
+    base_model_prefix = "cnlpt"
+    config_class = CnlpConfig
+
+    def __init__(
+        self,
+        config: config_class,
+        freeze: float = -1.0,
+    ):
+        super().__init__(config)
+        encoder_config = AutoConfig.from_pretrained(config._name_or_path)
+        encoder_config.vocab_size = config.vocab_size
+        config.encoder_config = encoder_config.to_dict()
+        model = AutoModelForMaskedLM.from_config(encoder_config)
+        self.encoder = model.from_pretrained(config._name_or_path)
+        # if not config.character_level:
+        self.encoder.resize_token_embeddings(encoder_config.vocab_size)
+
+        if freeze > 0:
+            freeze_encoder_weights(self.encoder.bert.encoder, freeze)
+
+    def forward(
+            self,
+            input_ids,
+            token_type_ids,
+            attention_mask,
+            labels,
+    ):
+        kwargs = generalize_encoder_forward_kwargs(
+            self.encoder,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            output_hidden_states=True,
+            return_dict=True,
+        )
+
+        outputs = self.encoder(input_ids, **kwargs)
+        logits = outputs.logits
+
+        if labels is not None:
+            loss_fn = CrossEntropyLoss()
+            loss = loss_fn(logits.view(-1, self.config.vocab_size), labels.view(-1))
+            
+        return MaskedLMOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
 def main(
     json_file: Union[str, None] = None, json_obj: Union[dict[str, Any], None] = None
 ):
@@ -39,30 +96,31 @@ def main(
     :rtype: typing.Dict[str, typing.Dict[str, typing.Any]]
     :return: the evaluation results (will be empty if ``--do_eval`` not passed)
     """
-    parser = HfArgumentParser((DaptArguments,))
+    parser = HfArgumentParser((DaptArguments, CnlpTrainingArguments))
     dapt_args: DaptArguments
+    training_args: CnlpTrainingArguments
 
     if json_file is not None and json_obj is not None:
         raise ValueError("cannot specify json_file and json_obj")
 
     if json_file is not None:
-        (dapt_args,) = parser.parse_json_file(json_file=json_file)
+        (dapt_args, training_args) = parser.parse_json_file(json_file=json_file)
     elif json_obj is not None:
-        (dapt_args,) = parser.parse_dict(json_obj)
+        (dapt_args, training_args) = parser.parse_dict(json_obj)
     elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
         # If we pass only one argument to the script and it's the path to a json file,
         # let's parse it to get our arguments.
-        (dapt_args,) = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+        (dapt_args, training_args) = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
     else:
-        (dapt_args,) = parser.parse_args_into_dataclasses()
+        (dapt_args, training_args) = parser.parse_args_into_dataclasses()
 
     if (
-        os.path.exists(dapt_args.output_dir)
-        and os.listdir(dapt_args.output_dir)
-        and not dapt_args.overwrite_output_dir
+        os.path.exists(training_args.output_dir)
+        and os.listdir(training_args.output_dir)
+        and not training_args.overwrite_output_dir
     ):
         raise ValueError(
-            f"Output directory ({dapt_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
         )
 
     # Setup logging
@@ -85,9 +143,10 @@ def main(
     # logger.info("Model parameters %s" % model_args)
 
     logger.info(f"Domain adaptation parameters {dapt_args}")
+    logger.info(f"Training arguments {training_args}")
 
     # Set seed
-    set_seed(dapt_args.seed)
+    set_seed(training_args.seed)
 
     # Load tokenizer: Need this first for loading the datasets
     tokenizer = AutoTokenizer.from_pretrained(
@@ -101,13 +160,15 @@ def main(
         # additional_special_tokens=['<e>', '</e>', '<a1>', '</a1>', '<a2>', '</a2>', '<cr>', '<neg>']
     )
 
-    model = AutoModelForMaskedLM.from_pretrained(dapt_args.encoder_name)
+    # model = AutoModelForMaskedLM.from_pretrained(dapt_args.encoder_name)
+    config = AutoConfig.from_pretrained(dapt_args.encoder_name)
+    model = DaptModel(config, freeze=training_args.freeze)
 
     dataset = DaptDataset(dapt_args, tokenizer=tokenizer)
 
     trainer = Trainer(
         model=model,
-        args=TrainingArguments(output_dir=dapt_args.output_dir),
+        args=training_args,
         train_dataset=dataset.train,
         eval_dataset=dataset.test if not dapt_args.no_eval else None,
         data_collator=dataset.data_collator,
diff --git a/src/cnlpt/train_system.py b/src/cnlpt/train_system.py
index bdb335be..d75489dd 100644
--- a/src/cnlpt/train_system.py
+++ b/src/cnlpt/train_system.py
@@ -300,7 +300,7 @@ def main(
     if data_args.weight_classes:
         from collections import Counter
 
-        class_weights = []
+        class_weights = {}
         for task in task_names:
             # get labels in the right order ([0, 1])
             if isinstance(
@@ -309,17 +309,21 @@ def main(
                 dataset.tasks_to_labels[task] = dataset.tasks_to_labels[task][1:] + [
                     dataset.tasks_to_labels[task][0]
                 ]
-            labels = dataset.processed_dataset["train"][task]
+            if tagger[task]:
+                labels = [token_label for sent in dataset.processed_dataset["train"][task] for token_label in sent.split()]
+            else:
+                labels = dataset.processed_dataset["train"][task]
             weights = []
             label_counts = Counter(labels)
             for label in dataset.tasks_to_labels[task]:
-                weights.append(len(labels) / (num_labels[task] * label_counts[label]))
+                count = max(label_counts[label], 1)
+                weights.append(len(labels) / (num_labels[task] * count))
                 # class weights are determined by severity of class imbalance
             if len(task_names) > 1:
-                class_weights.append(weights)
+                class_weights[task] = torch.tensor(weights).to(training_args.device)
             else:
-                class_weights = weights  # if we just have the one class, simplify the tensor or pytorch will be mad
-        class_weights = torch.tensor(class_weights).to(training_args.device)
+                class_weights = torch.tensor(weights).to(training_args.device)  # if we just have the one class, simplify the tensor or pytorch will be mad
+        # class_weights = torch.tensor(class_weights).to(training_args.device)
         # sm = torch.nn.Softmax(dim=class_weights.ndim - 1)
         # class_weights = sm(class_weights)
 
@@ -446,6 +450,7 @@ def main(
 
         # TODO check when download any pretrained language model to local disk, if
         # the following condition "is_hub_model(encoder_name)" works or not.
+        # ^ is_hub_model and is_external_encoder both return False, as long as "model_type": "cnlpt" is in config.json
         if not is_external_encoder(encoder_name):
             # we are loading one of our own trained models as a starting point.
             #
@@ -459,7 +464,6 @@ def main(
             # the model file to be loaded down below the normal way. since that temp file
             # doesn't have a stored classifier it will use the randomly-inited classifier head
             # with the size of the supplied config (for the new task).
-            # TODO This setting 1) is not tested yet.
             # 2) if training_args.do_train is false:
             # we evaluate or make predictions of our trained models.
             # Both two setting require the registeration of CnlpConfig, and use
@@ -468,6 +472,11 @@ def main(
             # Load the cnlp configuration using AutoConfig, this will not override
             # the arguments from trained cnlp models. While using CnlpConfig will override
             # the model_type and model_name of the encoder.
+            if model_args.keep_existing_classifiers == model_args.ignore_existing_classifiers:  # XNOR
+                raise ValueError(
+                    "For continued training of a cnlpt model, one of --keep_existing_classifiers or --ignore_existing_classifiers flags should be selected."
+                )
+            
             config = AutoConfig.from_pretrained(
                 (
                     model_args.config_name
@@ -477,41 +486,57 @@ def main(
                 cache_dir=model_args.cache_dir,
                 # in this case we're looking at a fine-tuned model (?)
                 character_level=data_args.character_level,
+                layer=model_args.layer,
             )
-
             if training_args.do_train:
                 # Setting 1) only load weights from the encoder
-                raise NotImplementedError(
-                    "This functionality has not been restored yet"
-                )
+                if model_args.ignore_existing_classifiers:
+                    config.finetuning_task = (
+                        data_args.task_name
+                        if data_args.task_name is not None
+                        else dataset.tasks
+                    )
+                elif model_args.keep_existing_classifiers:
+                    # setting 2) evaluate or make predictions  
+                    if (
+                        config.finetuning_task != data_args.task_name
+                        or config.relations != relations
+                        or config.tagger != tagger
+                    ):
+                        raise ValueError(
+                            "When --keep_existing_classifiers is selected, please ensure"
+                            "that you set the settings the same as those used in the"
+                            "previous training run."
+                        )
+
                 model = CnlpModelForClassification(
-                    model_path=model_args.encoder_name,
                     config=config,
-                    cache_dir=model_args.cache_dir,
-                    tagger=tagger,
-                    relations=relations,
-                    class_weights=dataset.class_weights,
+                    # class_weights=dataset.class_weights,
+                    class_weights=class_weights,
                     final_task_weight=training_args.final_task_weight,
-                    use_prior_tasks=model_args.use_prior_tasks,
-                    argument_regularization=model_args.arg_reg,
+                    freeze=training_args.freeze,
                 )
-                delattr(model, "classifiers")
-                delattr(model, "feature_extractors")
+                if model_args.ignore_existing_classifiers:
+                    model.remove_task_classifiers()
+                    for task in data_args.task_name:
+                        model.add_task_classifier(task, dataset.get_labels()[task])
+                model.set_class_weights(dataset.class_weights)
+
                 if training_args.do_train:
                     tempmodel = tempfile.NamedTemporaryFile(dir=model_args.cache_dir)
                     torch.save(model.state_dict(), tempmodel)
                     model_name = tempmodel.name
-            else:
+            else:  # load existing head
                 # setting 2) evaluate or make predictions
                 model = CnlpModelForClassification.from_pretrained(
                     model_args.encoder_name,
                     config=config,
-                    class_weights=dataset.class_weights,
+                    class_weights=class_weights,
                     final_task_weight=training_args.final_task_weight,
                     freeze=training_args.freeze,
                     bias_fit=training_args.bias_fit,
                 )
-
+            model.tasks = data_args.task_name
         else:
             # This only works when model_args.encoder_name is one of the
             # model card from https://huggingface.co/models
@@ -541,7 +566,7 @@ def main(
             config.vocab_size = len(tokenizer)
             model = CnlpModelForClassification(
                 config=config,
-                class_weights=dataset.class_weights,
+                class_weights=class_weights,
                 final_task_weight=training_args.final_task_weight,
                 freeze=training_args.freeze,
                 bias_fit=training_args.bias_fit,
@@ -656,15 +681,22 @@ def compute_metrics_fn(p: EvalPrediction):
                         raise RuntimeError(
                             f"Unrecognized label type: {type(training_args.model_selection_label)}"
                         )
-                else:  # same default as in 0.6.0
+                elif dataset.output_modes[task] == relex:
                     task_scores.append(
                         metrics[task_name].get(
                             "one_score", np.mean(metrics[task_name].get("f1"))
                         )
                     )
+                else:
+                    task_scores.append(
+                        metrics[task_name].get(
+                            "one_score", np.mean(metrics[task_name].get("token_f1"))
+                        )
+                    )
                 # task_scores.append(processor.get_one_score(metrics.get(task_name, metrics.get(task_name.split('-')[0], None))))
 
             one_score = sum(task_scores) / len(task_scores)
+            metrics["one_score"] = one_score
 
             if model is not None:
                 if not hasattr(model, "best_score") or one_score > model.best_score:
@@ -675,7 +707,7 @@ def compute_metrics_fn(p: EvalPrediction):
                     model.best_eval_results = metrics
                     if trainer.is_world_process_zero():
                         if training_args.do_train:
-                            trainer.save_model()
+                            trainer.save_model()  # NOTE: a RobertaConfig is loaded here. why?
                             tokenizer.save_pretrained(training_args.output_dir)
                             if model_name == "cnn" or model_name == "lstm":
                                 with open(
@@ -690,7 +722,7 @@ def compute_metrics_fn(p: EvalPrediction):
                                     )
                                     config_dict["task_names"] = task_names
                                     json.dump(config_dict, f)
-                        for task_ind, task_name in enumerate(metrics):
+                        for task_ind, task_name in enumerate(task_names):
                             with open(output_eval_file, "a") as writer:
                                 logger.info(
                                     f"***** Eval results for task {task_name} *****"
@@ -720,7 +752,8 @@ def compute_metrics_fn(p: EvalPrediction):
         return compute_metrics_fn
 
     # Initialize our Trainer
-    training_args.load_best_model_at_end = True
+    # training_args.load_best_model_at_end = True
+    # TODO the argument in CnlpTrainingArguments is `model_selection_score`. reconcile this with `metric_for_best_model`?
     training_args.metric_for_best_model = "one_score"
     trainer = Trainer(
         model=model,
@@ -884,7 +917,7 @@ def compute_metrics_fn(p: EvalPrediction):
 
                 out_table = process_prediction(
                     task_names=dataset.tasks,
-                    error_analysis=False,
+                    error_analysis=training_args.error_analysis,
                     output_prob=training_args.output_prob,
                     character_level=data_args.character_level,
                     task_to_label_packet=task_to_label_packet,
@@ -910,4 +943,4 @@ def _mp_fn(index):
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file